|
a/src/internfile/myhtmlparse.cpp |
|
b/src/internfile/myhtmlparse.cpp |
|
... |
|
... |
142 |
map<string, string>::const_iterator i, j;
|
142 |
map<string, string>::const_iterator i, j;
|
143 |
if ((i = p.find("content")) != p.end()) {
|
143 |
if ((i = p.find("content")) != p.end()) {
|
144 |
if ((j = p.find("name")) != p.end()) {
|
144 |
if ((j = p.find("name")) != p.end()) {
|
145 |
string name = j->second;
|
145 |
string name = j->second;
|
146 |
lowercase_term(name);
|
146 |
lowercase_term(name);
|
147 |
if (name == "description") {
|
|
|
148 |
if (sample.empty()) {
|
|
|
149 |
sample = i->second;
|
|
|
150 |
decode_entities(sample);
|
|
|
151 |
}
|
|
|
152 |
} else if (name == "keywords") {
|
|
|
153 |
if (!keywords.empty()) keywords += ' ';
|
|
|
154 |
string tmp = i->second;
|
|
|
155 |
decode_entities(tmp);
|
|
|
156 |
keywords += tmp;
|
|
|
157 |
} else if (name == "author") {
|
|
|
158 |
if (!author.empty()) author += ' ';
|
|
|
159 |
string tmp = i->second;
|
|
|
160 |
decode_entities(tmp);
|
|
|
161 |
author += tmp;
|
|
|
162 |
} else if (name == "date") {
|
147 |
if (name == "date") {
|
163 |
// Yes this doesnt exist. It's output by filters
|
148 |
// Yes this doesnt exist. It's output by filters
|
164 |
// And the format isn't even standard http/html
|
149 |
// And the format isn't even standard http/html
|
165 |
// FIXME
|
150 |
// FIXME
|
166 |
string tmp = i->second;
|
151 |
string tmp = i->second;
|
167 |
decode_entities(tmp);
|
152 |
decode_entities(tmp);
|
|
... |
|
... |
170 |
" %Y-%m-%d %H:%M:%S ", &tm)) {
|
155 |
" %Y-%m-%d %H:%M:%S ", &tm)) {
|
171 |
char ascuxtime[100];
|
156 |
char ascuxtime[100];
|
172 |
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
|
157 |
sprintf(ascuxtime, "%ld", (long)mktime(&tm));
|
173 |
dmtime = ascuxtime;
|
158 |
dmtime = ascuxtime;
|
174 |
}
|
159 |
}
|
|
|
160 |
} else if (name == "robots") {
|
|
|
161 |
} else {
|
|
|
162 |
if (!meta[name].empty())
|
|
|
163 |
meta[name] += ' ';
|
|
|
164 |
string tmp = i->second;
|
|
|
165 |
decode_entities(tmp);
|
|
|
166 |
meta[name] += tmp;
|
175 |
}
|
167 |
}
|
176 |
} else if ((j = p.find("http-equiv")) != p.end()) {
|
168 |
} else if ((j = p.find("http-equiv")) != p.end()) {
|
177 |
string hequiv = j->second;
|
169 |
string hequiv = j->second;
|
178 |
lowercase_term(hequiv);
|
170 |
lowercase_term(hequiv);
|
179 |
if (hequiv == "content-type") {
|
171 |
if (hequiv == "content-type") {
|
180 |
string value = i->second;
|
172 |
string value = i->second;
|
|
... |
|
... |
307 |
}
|
299 |
}
|
308 |
if (tag == "select") pending_space = true;
|
300 |
if (tag == "select") pending_space = true;
|
309 |
break;
|
301 |
break;
|
310 |
case 't':
|
302 |
case 't':
|
311 |
if (tag == "title") {
|
303 |
if (tag == "title") {
|
312 |
if (title.empty()) {
|
304 |
if (meta["title"].empty()) {
|
313 |
title = dump;
|
305 |
meta["title"] = dump;
|
314 |
dump = "";
|
306 |
dump = "";
|
315 |
}
|
307 |
}
|
316 |
break;
|
308 |
break;
|
317 |
}
|
309 |
}
|
318 |
if (tag == "table" || tag == "td" || tag == "textarea" ||
|
310 |
if (tag == "table" || tag == "td" || tag == "textarea" ||
|