Switch to unified view

a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp
...
...
358
        if (get_parameter(cstr_html_content, content)) {
358
        if (get_parameter(cstr_html_content, content)) {
359
            string name;
359
            string name;
360
            if (get_parameter("name", name)) {
360
            if (get_parameter("name", name)) {
361
            lowercase_term(name);
361
            lowercase_term(name);
362
            if (name == "date") {
362
            if (name == "date") {
363
              // Yes this doesnt exist. It's output by filters
363
              // Specific to Recoll filters.
364
              // And the format isn't even standard http/html
365
              // FIXME
366
                decode_entities(content);
364
                decode_entities(content);
367
                struct tm tm;
365
                struct tm tm;
368
                if (strptime(content.c_str(), 
366
                if (strptime(content.c_str(), 
369
                     " %Y-%m-%d %H:%M:%S ", &tm) ||
367
                     " %Y-%m-%d %H:%M:%S ", &tm) ||
370
                strptime(content.c_str(), 
368
                strptime(content.c_str(), 
...
...
374
                sprintf(ascuxtime, "%ld", (long)mktime(&tm));
372
                sprintf(ascuxtime, "%ld", (long)mktime(&tm));
375
                dmtime = ascuxtime;
373
                dmtime = ascuxtime;
376
                }
374
                }
377
            } else if (name == "robots") {
375
            } else if (name == "robots") {
378
            } else {
376
            } else {
377
              string markup;
378
              bool ishtml = false;
379
              if (get_parameter("markup", markup)) {
380
              if (!stringlowercmp("html", markup)) {
381
                  ishtml = true;
382
              }
383
              }
379
                if (!meta[name].empty())
384
                if (!meta[name].empty())
380
                meta[name] += ' ';
385
                meta[name] += ' ';
381
                decode_entities(content);
386
                decode_entities(content);
382
                meta[name] += content;
387
                meta[name] += content;
388
              if (ishtml && 
389
              meta[name].compare(0, cstr_fldhtm.size(), 
390
                         cstr_fldhtm)) {
391
              meta[name].insert(0, cstr_fldhtm);
392
              }
383
            }
393
            }
384
            } 
394
            } 
385
            string hdr;
395
            string hdr;
386
            if (get_parameter("http-equiv", hdr)) {
396
            if (get_parameter("http-equiv", hdr)) {
387
            lowercase_term(hdr);
397
            lowercase_term(hdr);
...
...
415
                 fromcharset.c_str()));
425
                 fromcharset.c_str()));
416
            throw false;
426
            throw false;
417
            }
427
            }
418
        }
428
        }
419
        break;
429
        break;
420
      }
421
        if (tag == "marquee" || tag == "menu" || tag == "multicol")
430
        } else if (tag == "marquee" || tag == "menu" || tag == "multicol")
422
        pending_space = true;
431
        pending_space = true;
423
        break;
432
        break;
424
    case 'o':
433
    case 'o':
425
        if (tag == "ol" || tag == "option") pending_space = true;
434
        if (tag == "ol" || tag == "option") pending_space = true;
426
        break;
435
        break;
...
...
439
        break;
448
        break;
440
    case 's':
449
    case 's':
441
        if (tag == "style") {
450
        if (tag == "style") {
442
        in_style_tag = true;
451
        in_style_tag = true;
443
        break;
452
        break;
444
      }
445
        if (tag == "script") {
453
        } else if (tag == "script") {
446
        in_script_tag = true;
454
        in_script_tag = true;
447
        break;
455
        break;
448
      }
456
      } else if (tag == "select") 
449
      if (tag == "select") pending_space = true;
457
      pending_space = true;
450
        break;
458
        break;
451
    case 't':
459
    case 't':
452
        if (tag == "table" || tag == "td" || tag == "textarea" ||
460
        if (tag == "table" || tag == "td" || tag == "textarea" ||
453
        tag == "th") pending_space = true;
461
        tag == "th") pending_space = true;
454
        break;
462
        break;