Switch to unified view

a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp
...
...
179
      in_pre_tag(false),
179
      in_pre_tag(false),
180
      pending_space(false),
180
      pending_space(false),
181
      indexing_allowed(true)
181
      indexing_allowed(true)
182
{
182
{
183
    // The default html document charset is iso-8859-1. We'll update
183
    // The default html document charset is iso-8859-1. We'll update
184
    // this value from the encoding tag if found.
184
    // this value from the encoding tag if found. Actually use cp1252 which
185
    charset = "iso-8859-1";
185
    // is a superset
186
    charset = "CP1252";
186
}
187
}
187
188
188
void MyHtmlParser::decode_entities(string &s)
189
void MyHtmlParser::decode_entities(string &s)
189
{
190
{
190
    LOGDEB2(("MyHtmlParser::decode_entities\n"));
191
    LOGDEB2(("MyHtmlParser::decode_entities\n"));
...
...
400
                parseMimeHeaderValue(content, p);
401
                parseMimeHeaderValue(content, p);
401
                map<string, string>::const_iterator k;
402
                map<string, string>::const_iterator k;
402
                if ((k = p.params.find(cstr_html_charset)) != 
403
                if ((k = p.params.find(cstr_html_charset)) != 
403
                p.params.end()) {
404
                p.params.end()) {
404
                charset = k->second;
405
                charset = k->second;
406
              if (!charset.empty() && 
405
                if (!samecharset(charset, fromcharset)) {
407
                    !samecharset(charset, fromcharset)) {
406
                    LOGDEB1(("Doc http-equiv charset '%s' "
408
                    LOGDEB1(("Doc http-equiv charset '%s' "
407
                        "differs from dir deflt '%s'\n",
409
                        "differs from dir deflt '%s'\n",
408
                        charset.c_str(), 
410
                        charset.c_str(), 
409
                        fromcharset.c_str()));
411
                        fromcharset.c_str()));
410
                    throw false;
412
                    throw false;
...
...
416
        string newcharset;
418
        string newcharset;
417
        if (get_parameter(cstr_html_charset, newcharset)) {
419
        if (get_parameter(cstr_html_charset, newcharset)) {
418
            // HTML5 added: <meta charset="...">
420
            // HTML5 added: <meta charset="...">
419
            lowercase_term(newcharset);
421
            lowercase_term(newcharset);
420
            charset = newcharset;
422
            charset = newcharset;
423
          if (!charset.empty() && 
421
            if (!samecharset(charset, fromcharset)) {
424
          !samecharset(charset, fromcharset)) {
422
            LOGDEB1(("Doc html5 charset '%s' "
425
            LOGDEB1(("Doc html5 charset '%s' "
423
                 "differs from dir deflt '%s'\n",
426
                 "differs from dir deflt '%s'\n",
424
                 charset.c_str(), 
427
                 charset.c_str(), 
425
                 fromcharset.c_str()));
428
                 fromcharset.c_str()));
426
            throw false;
429
            throw false;