a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp
...
...
223
    }
223
    }
224
};
224
};
225
225
226
const size_t MboxCache::o_b1size = 1024;
226
const size_t MboxCache::o_b1size = 1024;
227
static class MboxCache mcache;
227
static class MboxCache mcache;
228
static const string keyquirks("mhmboxquirks");
228
229
229
MimeHandlerMbox::~MimeHandlerMbox()
230
MimeHandlerMbox::~MimeHandlerMbox()
230
{
231
{
231
    clear();
232
    clear();
232
}
233
}
...
...
263
    fseek((FILE *)m_vfp, 0, SEEK_END);
264
    fseek((FILE *)m_vfp, 0, SEEK_END);
264
    m_fsize = ftell((FILE*)m_vfp);
265
    m_fsize = ftell((FILE*)m_vfp);
265
    fseek((FILE*)m_vfp, 0, SEEK_SET);
266
    fseek((FILE*)m_vfp, 0, SEEK_SET);
266
    m_havedoc = true;
267
    m_havedoc = true;
267
    m_offsets.clear();
268
    m_offsets.clear();
269
    m_quirks = 0;
270
271
    // Check for location-based quirks:
272
    RclConfig *config = RclConfig::getMainConfig();
273
    string quirks;
274
    if (config && config->getConfParam(keyquirks, quirks)) {
275
  if (quirks == "tbird") {
276
      LOGDEB(("MimeHandlerMbox: setting quirks TBIRD\n"));
277
      m_quirks |= MBOXQUIRK_TBIRD;
278
  }
279
    }
280
268
    return true;
281
    return true;
269
}
282
}
270
283
271
#define LL 1024
284
#define LL 1024
272
typedef char line_type[LL+10];
285
typedef char line_type[LL+10];
...
...
323
// in "Date: " header format, like:   From - Mon, 8 May 2006 10:57:32
336
// in "Date: " header format, like:   From - Mon, 8 May 2006 10:57:32
324
// This was added as an alternative format. By the way it also fools "mail" and
337
// This was added as an alternative format. By the way it also fools "mail" and
325
// emacs-vm, Recoll is not alone
338
// emacs-vm, Recoll is not alone
326
// Update: 2009-11-27: word after From may be quoted string: From "john bull"
339
// Update: 2009-11-27: word after From may be quoted string: From "john bull"
327
static const  char *frompat =  
340
static const  char *frompat =  
328
#if 0 //1.9.0
329
    "^From .* [1-2][0-9][0-9][0-9]$";
330
#endif
331
#if 1
332
"^From[ ]+([^ ]+|\"[^\"]+\")[ ]+"    // 'From (toto@tutu|"john bull") '
341
"^From[ ]+([^ ]+|\"[^\"]+\")[ ]+"    // 'From (toto@tutu|"john bull") '
333
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
342
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
334
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+"             // Time, seconds optional
343
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+"             // Time, seconds optional
335
"([^ ]+[ ]+)?"                                        // Optional tz
344
"([^ ]+[ ]+)?"                                        // Optional tz
336
"[12][0-9][0-9][0-9]"            // Year, unanchored, more data may follow
345
"[12][0-9][0-9][0-9]"            // Year, unanchored, more data may follow
...
...
338
"^From[ ]+[^ ]+[ ]+"                                   // From toto@tutu
347
"^From[ ]+[^ ]+[ ]+"                                   // From toto@tutu
339
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Mon, 8 May
348
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Mon, 8 May
340
"[12][0-9][0-9][0-9][ ]+"                              // Year
349
"[12][0-9][0-9][0-9][ ]+"                              // Year
341
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?"                  // Time, secs optional
350
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?"                  // Time, secs optional
342
    ;
351
    ;
343
#endif
352
344
    //    "([ ]+[-+][0-9]{4})?$"
345
static regex_t fromregex;
353
static regex_t fromregex;
346
static bool regcompiled;
354
static bool regcompiled;
347
355
348
bool MimeHandlerMbox::next_document()
356
bool MimeHandlerMbox::next_document()
349
{
357
{
...
...
423
            hademptyline, ll, line));
431
            hademptyline, ll, line));
424
        if (ll <= 0) {
432
        if (ll <= 0) {
425
        hademptyline = true;
433
        hademptyline = true;
426
        continue;
434
        continue;
427
        }
435
        }
436
      // Non empty line. If the previous one was empty, check regex
437
      if (hademptyline) {
438
      // Tbird sometimes omits the empty line, so avoid resetting
439
      // state (initially true) and hope for the best
440
      if (!(m_quirks & MBOXQUIRK_TBIRD))
441
          hademptyline = false;
428
        if (hademptyline && !regexec(&fromregex, line, 0, 0, 0)) {
442
      if (!regexec(&fromregex, line, 0, 0, 0)) {
429
      LOGDEB0(("MimeHandlerMbox: msgnum %d, From_ at line %d: [%s]\n",
443
          LOGDEB0(("MimeHandlerMbox: msgnum %d, "
430
                         m_msgnum, m_lineno, line));
444
           "From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));
431
        start = ftello(fp);
445
            start = ftello(fp);
432
        m_offsets.push_back(off_From);
446
            m_offsets.push_back(off_From);
433
        m_msgnum++;
447
            m_msgnum++;
434
        break;
448
            break;
449
      }
435
        }
450
        }
436
      hademptyline = false;
437
    }
451
    }
438
452
439
    // Look for next 'From ' line or eof, end of message.
453
    // Look for next 'From ' line or eof, end of message.
440
    for (;;) {
454
    for (;;) {
441
        end = ftello(fp);
455
        end = ftello(fp);
...
...
447
        m_lineno++;
461
        m_lineno++;
448
        int ll;
462
        int ll;
449
        stripendnl(line, ll);
463
        stripendnl(line, ll);
450
        LOGDEB2(("End: hadempty %d ll %d Line: [%s]\n", 
464
        LOGDEB2(("End: hadempty %d ll %d Line: [%s]\n", 
451
            hademptyline, ll, line));
465
            hademptyline, ll, line));
466
      if (hademptyline) {
467
      if (ll > 0) {
468
          if (!(m_quirks & MBOXQUIRK_TBIRD))
469
          hademptyline = false;
452
        if (hademptyline && !regexec(&fromregex, line, 0, 0, 0)) {
470
         if (!regexec(&fromregex, line, 0, 0, 0)) {
453
        // Rewind to start of "From " line
471
         // Rewind to start of "From " line
454
        fseek(fp, end, SEEK_SET);
472
         fseek(fp, end, SEEK_SET);
455
        m_lineno--;
473
         m_lineno--;
456
        break;
474
         break;
475
          }
476
      }
477
      } else if (ll <= 0) {
478
      hademptyline = true;
457
        }
479
        }
458
        if (mtarg <= 0 || m_msgnum == mtarg) {
480
        if (mtarg <= 0 || m_msgnum == mtarg) {
459
        line[ll] = '\n';
481
        line[ll] = '\n';
460
        line[ll+1] = 0;
482
        line[ll+1] = 0;
461
        msgtxt += line;
483
        msgtxt += line;
462
      }
463
      if (ll <= 0) {
464
      hademptyline = true;
465
      } else {
466
      hademptyline = false;
467
        }
484
        }
468
    }
485
    }
469
486
470
    } while (mtarg > 0 && m_msgnum < mtarg);
487
    } while (mtarg > 0 && m_msgnum < mtarg);
471
488