|
a/src/internfile/myhtmlparse.cpp |
|
b/src/internfile/myhtmlparse.cpp |
|
... |
|
... |
165 |
" %Y-%m-%d %H:%M:%S ", &tm)) {
|
165 |
" %Y-%m-%d %H:%M:%S ", &tm)) {
|
166 |
char ascuxtime[100];
|
166 |
char ascuxtime[100];
|
167 |
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
|
167 |
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
|
168 |
dmtime = ascuxtime;
|
168 |
dmtime = ascuxtime;
|
169 |
}
|
169 |
}
|
|
|
170 |
}
|
|
|
171 |
#if 0 // We're not a robot, so we don't care about robots metainfo
|
170 |
} else if (name == "robots") {
|
172 |
else if (name == "robots") {
|
171 |
string val = i->second;
|
173 |
string val = i->second;
|
172 |
decode_entities(val);
|
174 |
decode_entities(val);
|
173 |
lowercase_term(val);
|
175 |
lowercase_term(val);
|
174 |
if (val.find("none") != string::npos ||
|
176 |
if (val.find("none") != string::npos ||
|
175 |
val.find("noindex") != string::npos) {
|
177 |
val.find("noindex") != string::npos) {
|
176 |
indexing_allowed = false;
|
178 |
indexing_allowed = false;
|
177 |
LOGDEB1(("myhtmlparse: robots/noindex\n"));
|
179 |
LOGDEB1(("myhtmlparse: robots/noindex\n"));
|
178 |
throw false;
|
180 |
throw false;
|
179 |
}
|
181 |
}
|
180 |
}
|
182 |
}
|
|
|
183 |
#endif // 0
|
181 |
} else if ((j = p.find("http-equiv")) != p.end()) {
|
184 |
} else if ((j = p.find("http-equiv")) != p.end()) {
|
182 |
string hequiv = j->second;
|
185 |
string hequiv = j->second;
|
183 |
lowercase_term(hequiv);
|
186 |
lowercase_term(hequiv);
|
184 |
if (hequiv == "content-type") {
|
187 |
if (hequiv == "content-type") {
|
185 |
string value = i->second;
|
188 |
string value = i->second;
|
|
... |
|
... |
330 |
if (tag == "xmp") pending_space = true;
|
333 |
if (tag == "xmp") pending_space = true;
|
331 |
break;
|
334 |
break;
|
332 |
}
|
335 |
}
|
333 |
}
|
336 |
}
|
334 |
|
337 |
|
335 |
// This gets called when hitting eof. If the <body> is open, do
|
338 |
// This gets called when hitting eof.
|
|
|
339 |
// We used to do:
|
|
|
340 |
// > If the <body> is open, do
|
336 |
// something with the text (that is, don't throw up). Else, things are
|
341 |
// > something with the text (that is, don't throw up). Else, things are
|
337 |
// too weird, throw an error. We don't get called if the parser finds
|
342 |
// > too weird, throw an error. We don't get called if the parser finds
|
338 |
// a closing body tag (exception gets thrown by closing_tag())
|
343 |
// > a closing body tag (exception gets thrown by closing_tag())
|
|
|
344 |
// But we don't throw any more. Whatever text we've extracted up to now is
|
|
|
345 |
// better than nothing.
|
339 |
void
|
346 |
void
|
340 |
MyHtmlParser::do_eof()
|
347 |
MyHtmlParser::do_eof()
|
341 |
{
|
348 |
{
|
342 |
if (!in_body_tag)
|
349 |
// if (!in_body_tag)
|
343 |
throw(false);
|
350 |
// throw(false);
|
344 |
}
|
351 |
}
|