2014-09-08 15:54:52 +00:00
|
|
|
#include "page.h"
|
|
|
|
|
2014-10-05 11:16:01 +00:00
|
|
|
Page::Page() { dateHeader = "";}
|
2014-09-17 01:08:38 +00:00
|
|
|
/**
|
|
|
|
* @brief Page::getString
|
|
|
|
* @param htmlToPhrased
|
|
|
|
* @return
|
|
|
|
*/
|
|
|
|
QString Page::getString(QString &htmlToParse)
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
makeText(htmlToParse);
|
|
|
|
return this->text;
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
2014-09-17 01:08:38 +00:00
|
|
|
void Page::makeText(QString &html)
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
int index = 0;
|
|
|
|
index = html.indexOf("<tbody>",0); //set index into the place where the data is
|
|
|
|
manageTableContent(html, index);
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
2014-09-17 01:08:38 +00:00
|
|
|
/**
|
|
|
|
* @brief Page::manageTableContent strip html, make it string
|
|
|
|
* @param html html to parse
|
|
|
|
* @param index index to start looking for data
|
|
|
|
*/
|
|
|
|
void Page::manageTableContent(QString &html, int index)
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
if (index == -1)
|
|
|
|
return;
|
|
|
|
QString temp;
|
|
|
|
for (int i = index; i < html.length(); i++)
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
if (html.at(i) == '<')
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
//<tr> / <td> / <th>
|
|
|
|
QString endofTable = "</tbody>";
|
|
|
|
QString tableTag = html.mid(i, 4); //legth of "tr/td"
|
|
|
|
if (tableTag == "<tr>")
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
if (!dateHeader.isEmpty())
|
|
|
|
temp += dateHeader;
|
|
|
|
i = stitchText(html, temp, i+4);
|
|
|
|
if (i == -1) //EOF
|
|
|
|
break;
|
2014-10-05 11:16:01 +00:00
|
|
|
|
|
|
|
}
|
2014-10-06 16:15:24 +00:00
|
|
|
else if (tableTag == "</tr")
|
2014-10-05 11:16:01 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
temp += "\n"; //end row -> new line
|
|
|
|
i+=5;
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
2014-10-06 16:15:24 +00:00
|
|
|
else if (tableTag == "<td>" || tableTag == "<th>")
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
if (!dateHeader.isEmpty())
|
|
|
|
temp += "\t"; // new cell -> tab between data
|
|
|
|
if (html.mid(i, 6) == "<td><a") //link to lecturer portal, need to be deleted
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
i += 6;
|
|
|
|
while (html.at(++i) != '>');
|
|
|
|
i = stitchText(html, temp, i+1);
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
2014-10-06 16:15:24 +00:00
|
|
|
else
|
|
|
|
i = stitchText(html, temp, i+4);
|
2014-10-05 12:33:57 +00:00
|
|
|
|
2014-10-06 16:15:24 +00:00
|
|
|
if (dateHeader.isEmpty())
|
|
|
|
temp += "\t";
|
2014-10-05 12:33:57 +00:00
|
|
|
|
2014-10-06 16:15:24 +00:00
|
|
|
if (i == -1) //EOF
|
|
|
|
break;
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
2014-10-06 16:15:24 +00:00
|
|
|
else if (tableTag == "<td ") // a Year title (in grades table) or Day and Hour (in calendar page)
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
|
|
|
|
if (!dateHeader.isEmpty())
|
|
|
|
{
|
|
|
|
//checking if theres a need to fill a timestamp of course
|
|
|
|
//if the string is not empty, then we will chop the last date stamp to avoid multiple date stamp in empty rows
|
|
|
|
if (!temp.isEmpty())
|
2014-10-05 11:16:01 +00:00
|
|
|
if (temp.lastIndexOf(dateHeader) == temp.length()-dateHeader.length())
|
2014-10-06 16:15:24 +00:00
|
|
|
{
|
|
|
|
temp.chop(dateHeader.length()+5);
|
2014-10-05 11:16:01 +00:00
|
|
|
temp += "\t";
|
2014-10-06 16:15:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
while ((html.mid(i,5) != "</td>") && (i < (int)html.length()))
|
2014-10-05 12:33:57 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
if (html.mid(i,3) == "<b>") //for gpa. year & semester title
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else if ((html.at(i) == '>') && (html.mid(i+4,3) != "<b>")) //for calendar. day and hours
|
|
|
|
{
|
|
|
|
i += 1; //lenght of >
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
i++;
|
2014-10-05 12:33:57 +00:00
|
|
|
}
|
2014-10-06 16:15:24 +00:00
|
|
|
i = stitchText(html, temp, i);
|
|
|
|
temp += "\t";
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
2014-10-06 16:15:24 +00:00
|
|
|
if (html.mid(i,(endofTable).length()) == endofTable) //is end of table
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
break;
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-10-06 16:15:24 +00:00
|
|
|
this->text = temp;
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
|
|
|
|
2014-09-17 01:08:38 +00:00
|
|
|
int Page::stitchText(QString &from, QString &to, int index)
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
if (from.mid(index,3) == "<b>")
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
QString bTag = from.mid(index, 3);
|
|
|
|
QString dateline = from.mid(index,from.indexOf("</b>",index+4)-index);
|
|
|
|
QString temp;
|
|
|
|
QString date;
|
|
|
|
char* tok;
|
|
|
|
int i = 0;
|
|
|
|
char* textToTok = strdup(dateline.toStdString().c_str());
|
|
|
|
tok = strtok(textToTok,"<> :");
|
|
|
|
while (tok != NULL)
|
2014-10-05 11:16:01 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
if (i == 1)
|
2014-10-05 11:16:01 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
temp = tok;
|
|
|
|
date += temp + "\t";
|
2014-10-05 11:16:01 +00:00
|
|
|
}
|
2014-10-06 16:15:24 +00:00
|
|
|
else if (i == 3)
|
2014-10-05 11:16:01 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
temp = tok;
|
|
|
|
date += temp;
|
2014-10-05 11:16:01 +00:00
|
|
|
}
|
2014-10-06 16:15:24 +00:00
|
|
|
i++;
|
|
|
|
tok = strtok(NULL, "<> :");
|
2014-10-05 11:16:01 +00:00
|
|
|
}
|
2014-10-06 16:15:24 +00:00
|
|
|
dateHeader = date;
|
|
|
|
if (bTag != "<b>")
|
|
|
|
return index-1; //go back one step - for the main function to inc i
|
|
|
|
index += dateline.length();
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
|
|
|
|
2014-10-06 16:15:24 +00:00
|
|
|
while (from.at(index) != '<' && index < (int)from.length())
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
if (from[index] == '&')
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
//
|
|
|
|
QString nbspChr = from.mid(index, 6);
|
|
|
|
if (nbspChr == " ")
|
2014-09-08 15:54:52 +00:00
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
index += 5;
|
|
|
|
from.replace(index,1,' ');
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2014-10-06 16:15:24 +00:00
|
|
|
if (endOfString(index,(int) from.length()))
|
|
|
|
return -1; //EOF
|
2014-09-08 15:54:52 +00:00
|
|
|
|
2014-10-06 16:15:24 +00:00
|
|
|
else if (from.at(index) == '<')
|
|
|
|
return index - 1; //go back one step - for the main function to inc i
|
2014-09-08 15:54:52 +00:00
|
|
|
|
2014-10-06 16:15:24 +00:00
|
|
|
if ((from.at(index) != '\n') && (from.at(index) != '\t')) //check the actuall data before continue
|
|
|
|
to += from.at(index);
|
|
|
|
index++;
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
|
|
|
|
2014-10-06 16:15:24 +00:00
|
|
|
return index-1;
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|
|
|
|
bool Page::endOfString(int index, int length)
|
|
|
|
{
|
2014-10-06 16:15:24 +00:00
|
|
|
if(index < length)
|
|
|
|
return false;
|
|
|
|
return true;
|
2014-09-08 15:54:52 +00:00
|
|
|
}
|