#include "page.h" Page::Page() { } std::string Page::getString(std::string& htmlToPhrased) { makeText(htmlToPhrased); return this->text; } void Page::makeText(std::string& html) { int index = 0; index = runToActualText(html, index); //set index into the actual place where the data is manageTableContent(html, index); } int Page::runToActualText(std::string& from, int index) { while (index < (int)from.length()) { if (from[index] == '<') { if (from.substr(index,7) == "") return index+7; } index++; } return -1; } void Page::manageTableContent(std::string& html, int index) { std::string temp; for (int i = index; i < (int)html.length(); i++) { if(html[i] == '<') { // / / std::string endofTable = ""; std::string tableTag = html.substr(i, 4); //legth of "tr/td" if(tableTag == "") { temp += "\n"; //new row -> new line i = stitchText(html, temp, i+4); if(i == -1) //EOF break; } else if(tableTag == "" || tableTag == "") { temp += "\t"; // new cell -> tab between data if (html.substr(i, 6) == "text = temp; } int Page::stitchText(std::string& from, std::string& to, int index) { if (from[index] == '<') { std::string bTag = from.substr(index, 3); if (bTag != "") return index-1; //go back one step - for the main function to inc i index += 3; } while (from[index] != '<' && index < (int)from.length()) { if (from[index] == '&') { //  std::string nbspChr = from.substr(index, 6); if (nbspChr == " ") { index += 5; from.at(index) = ' '; } } if (endOfString(index,(int) from.length())) return -1; //EOF else if (from[index] == '<') return index - 1; //go back one step - for the main function to inc i if (from[index] != '\n') //check the actuall data before continue to += from[index]; index++; } return index-1; } bool Page::endOfString(int index, int length) { if(index < length) return false; return true; }