|
a/src/internfile/myhtmlparse.cpp |
|
b/src/internfile/myhtmlparse.cpp |
|
... |
|
... |
33 |
#include "mimeparse.h"
|
33 |
#include "mimeparse.h"
|
34 |
#include "smallut.h"
|
34 |
#include "smallut.h"
|
35 |
#include "cancelcheck.h"
|
35 |
#include "cancelcheck.h"
|
36 |
#include "debuglog.h"
|
36 |
#include "debuglog.h"
|
37 |
#include "transcode.h"
|
37 |
#include "transcode.h"
|
|
|
38 |
|
|
|
39 |
static const string cstr_html_charset("charset");
|
|
|
40 |
static const string cstr_html_content("content");
|
38 |
|
41 |
|
39 |
inline static bool
|
42 |
inline static bool
|
40 |
p_notdigit(char c)
|
43 |
p_notdigit(char c)
|
41 |
{
|
44 |
{
|
42 |
return !isdigit(static_cast<unsigned char>(c));
|
45 |
return !isdigit(static_cast<unsigned char>(c));
|
|
... |
|
... |
351 |
}
|
354 |
}
|
352 |
break;
|
355 |
break;
|
353 |
case 'm':
|
356 |
case 'm':
|
354 |
if (tag == "meta") {
|
357 |
if (tag == "meta") {
|
355 |
string content;
|
358 |
string content;
|
356 |
if (get_parameter(cstr_content, content)) {
|
359 |
if (get_parameter(cstr_html_content, content)) {
|
357 |
string name;
|
360 |
string name;
|
358 |
if (get_parameter("name", name)) {
|
361 |
if (get_parameter("name", name)) {
|
359 |
lowercase_term(name);
|
362 |
lowercase_term(name);
|
360 |
if (name == "date") {
|
363 |
if (name == "date") {
|
361 |
// Yes this doesnt exist. It's output by filters
|
364 |
// Yes this doesnt exist. It's output by filters
|
|
... |
|
... |
385 |
lowercase_term(hdr);
|
388 |
lowercase_term(hdr);
|
386 |
if (hdr == "content-type") {
|
389 |
if (hdr == "content-type") {
|
387 |
MimeHeaderValue p;
|
390 |
MimeHeaderValue p;
|
388 |
parseMimeHeaderValue(content, p);
|
391 |
parseMimeHeaderValue(content, p);
|
389 |
map<string, string>::const_iterator k;
|
392 |
map<string, string>::const_iterator k;
|
390 |
if ((k = p.params.find(cstr_charset)) !=
|
393 |
if ((k = p.params.find(cstr_html_charset)) !=
|
391 |
p.params.end()) {
|
394 |
p.params.end()) {
|
392 |
charset = k->second;
|
395 |
charset = k->second;
|
393 |
if (!samecharset(charset, fromcharset)) {
|
396 |
if (!samecharset(charset, fromcharset)) {
|
394 |
LOGDEB1(("Doc http-equiv charset '%s' "
|
397 |
LOGDEB1(("Doc http-equiv charset '%s' "
|
395 |
"differs from dir deflt '%s'\n",
|
398 |
"differs from dir deflt '%s'\n",
|
|
... |
|
... |
400 |
}
|
403 |
}
|
401 |
}
|
404 |
}
|
402 |
}
|
405 |
}
|
403 |
}
|
406 |
}
|
404 |
string newcharset;
|
407 |
string newcharset;
|
405 |
if (get_parameter(cstr_charset, newcharset)) {
|
408 |
if (get_parameter(cstr_html_charset, newcharset)) {
|
406 |
// HTML5 added: <meta charset="...">
|
409 |
// HTML5 added: <meta charset="...">
|
407 |
lowercase_term(newcharset);
|
410 |
lowercase_term(newcharset);
|
408 |
charset = newcharset;
|
411 |
charset = newcharset;
|
409 |
if (!samecharset(charset, fromcharset)) {
|
412 |
if (!samecharset(charset, fromcharset)) {
|
410 |
LOGDEB1(("Doc html5 charset '%s' "
|
413 |
LOGDEB1(("Doc html5 charset '%s' "
|