|
a/src/internfile/mh_mbox.cpp |
|
b/src/internfile/mh_mbox.cpp |
|
... |
|
... |
223 |
}
|
223 |
}
|
224 |
};
|
224 |
};
|
225 |
|
225 |
|
226 |
const size_t MboxCache::o_b1size = 1024;
|
226 |
const size_t MboxCache::o_b1size = 1024;
|
227 |
static class MboxCache mcache;
|
227 |
static class MboxCache mcache;
|
|
|
228 |
static const string keyquirks("mhmboxquirks");
|
228 |
|
229 |
|
229 |
MimeHandlerMbox::~MimeHandlerMbox()
|
230 |
MimeHandlerMbox::~MimeHandlerMbox()
|
230 |
{
|
231 |
{
|
231 |
clear();
|
232 |
clear();
|
232 |
}
|
233 |
}
|
|
... |
|
... |
263 |
fseek((FILE *)m_vfp, 0, SEEK_END);
|
264 |
fseek((FILE *)m_vfp, 0, SEEK_END);
|
264 |
m_fsize = ftell((FILE*)m_vfp);
|
265 |
m_fsize = ftell((FILE*)m_vfp);
|
265 |
fseek((FILE*)m_vfp, 0, SEEK_SET);
|
266 |
fseek((FILE*)m_vfp, 0, SEEK_SET);
|
266 |
m_havedoc = true;
|
267 |
m_havedoc = true;
|
267 |
m_offsets.clear();
|
268 |
m_offsets.clear();
|
|
|
269 |
m_quirks = 0;
|
|
|
270 |
|
|
|
271 |
// Check for location-based quirks:
|
|
|
272 |
RclConfig *config = RclConfig::getMainConfig();
|
|
|
273 |
string quirks;
|
|
|
274 |
if (config && config->getConfParam(keyquirks, quirks)) {
|
|
|
275 |
if (quirks == "tbird") {
|
|
|
276 |
LOGDEB(("MimeHandlerMbox: setting quirks TBIRD\n"));
|
|
|
277 |
m_quirks |= MBOXQUIRK_TBIRD;
|
|
|
278 |
}
|
|
|
279 |
}
|
|
|
280 |
|
268 |
return true;
|
281 |
return true;
|
269 |
}
|
282 |
}
|
270 |
|
283 |
|
271 |
#define LL 1024
|
284 |
#define LL 1024
|
272 |
typedef char line_type[LL+10];
|
285 |
typedef char line_type[LL+10];
|
|
... |
|
... |
323 |
// in "Date: " header format, like: From - Mon, 8 May 2006 10:57:32
|
336 |
// in "Date: " header format, like: From - Mon, 8 May 2006 10:57:32
|
324 |
// This was added as an alternative format. By the way it also fools "mail" and
|
337 |
// This was added as an alternative format. By the way it also fools "mail" and
|
325 |
// emacs-vm, Recoll is not alone
|
338 |
// emacs-vm, Recoll is not alone
|
326 |
// Update: 2009-11-27: word after From may be quoted string: From "john bull"
|
339 |
// Update: 2009-11-27: word after From may be quoted string: From "john bull"
|
327 |
static const char *frompat =
|
340 |
static const char *frompat =
|
328 |
#if 0 //1.9.0
|
|
|
329 |
"^From .* [1-2][0-9][0-9][0-9]$";
|
|
|
330 |
#endif
|
|
|
331 |
#if 1
|
|
|
332 |
"^From[ ]+([^ ]+|\"[^\"]+\")[ ]+" // 'From (toto@tutu|"john bull") '
|
341 |
"^From[ ]+([^ ]+|\"[^\"]+\")[ ]+" // 'From (toto@tutu|"john bull") '
|
333 |
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
|
342 |
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
|
334 |
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+" // Time, seconds optional
|
343 |
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+" // Time, seconds optional
|
335 |
"([^ ]+[ ]+)?" // Optional tz
|
344 |
"([^ ]+[ ]+)?" // Optional tz
|
336 |
"[12][0-9][0-9][0-9]" // Year, unanchored, more data may follow
|
345 |
"[12][0-9][0-9][0-9]" // Year, unanchored, more data may follow
|
|
... |
|
... |
338 |
"^From[ ]+[^ ]+[ ]+" // From toto@tutu
|
347 |
"^From[ ]+[^ ]+[ ]+" // From toto@tutu
|
339 |
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Mon, 8 May
|
348 |
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Mon, 8 May
|
340 |
"[12][0-9][0-9][0-9][ ]+" // Year
|
349 |
"[12][0-9][0-9][0-9][ ]+" // Year
|
341 |
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional
|
350 |
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional
|
342 |
;
|
351 |
;
|
343 |
#endif
|
352 |
|
344 |
// "([ ]+[-+][0-9]{4})?$"
|
|
|
345 |
static regex_t fromregex;
|
353 |
static regex_t fromregex;
|
346 |
static bool regcompiled;
|
354 |
static bool regcompiled;
|
347 |
|
355 |
|
348 |
bool MimeHandlerMbox::next_document()
|
356 |
bool MimeHandlerMbox::next_document()
|
349 |
{
|
357 |
{
|
|
... |
|
... |
423 |
hademptyline, ll, line));
|
431 |
hademptyline, ll, line));
|
424 |
if (ll <= 0) {
|
432 |
if (ll <= 0) {
|
425 |
hademptyline = true;
|
433 |
hademptyline = true;
|
426 |
continue;
|
434 |
continue;
|
427 |
}
|
435 |
}
|
|
|
436 |
// Non empty line. If the previous one was empty, check regex
|
|
|
437 |
if (hademptyline) {
|
|
|
438 |
// Tbird sometimes omits the empty line, so avoid resetting
|
|
|
439 |
// state (initially true) and hope for the best
|
|
|
440 |
if (!(m_quirks & MBOXQUIRK_TBIRD))
|
|
|
441 |
hademptyline = false;
|
428 |
if (hademptyline && !regexec(&fromregex, line, 0, 0, 0)) {
|
442 |
if (!regexec(&fromregex, line, 0, 0, 0)) {
|
429 |
LOGDEB0(("MimeHandlerMbox: msgnum %d, From_ at line %d: [%s]\n",
|
443 |
LOGDEB0(("MimeHandlerMbox: msgnum %d, "
|
430 |
m_msgnum, m_lineno, line));
|
444 |
"From_ at line %d: [%s]\n", m_msgnum, m_lineno, line));
|
431 |
start = ftello(fp);
|
445 |
start = ftello(fp);
|
432 |
m_offsets.push_back(off_From);
|
446 |
m_offsets.push_back(off_From);
|
433 |
m_msgnum++;
|
447 |
m_msgnum++;
|
434 |
break;
|
448 |
break;
|
|
|
449 |
}
|
435 |
}
|
450 |
}
|
436 |
hademptyline = false;
|
|
|
437 |
}
|
451 |
}
|
438 |
|
452 |
|
439 |
// Look for next 'From ' line or eof, end of message.
|
453 |
// Look for next 'From ' line or eof, end of message.
|
440 |
for (;;) {
|
454 |
for (;;) {
|
441 |
end = ftello(fp);
|
455 |
end = ftello(fp);
|
|
... |
|
... |
447 |
m_lineno++;
|
461 |
m_lineno++;
|
448 |
int ll;
|
462 |
int ll;
|
449 |
stripendnl(line, ll);
|
463 |
stripendnl(line, ll);
|
450 |
LOGDEB2(("End: hadempty %d ll %d Line: [%s]\n",
|
464 |
LOGDEB2(("End: hadempty %d ll %d Line: [%s]\n",
|
451 |
hademptyline, ll, line));
|
465 |
hademptyline, ll, line));
|
|
|
466 |
if (hademptyline) {
|
|
|
467 |
if (ll > 0) {
|
|
|
468 |
if (!(m_quirks & MBOXQUIRK_TBIRD))
|
|
|
469 |
hademptyline = false;
|
452 |
if (hademptyline && !regexec(&fromregex, line, 0, 0, 0)) {
|
470 |
if (!regexec(&fromregex, line, 0, 0, 0)) {
|
453 |
// Rewind to start of "From " line
|
471 |
// Rewind to start of "From " line
|
454 |
fseek(fp, end, SEEK_SET);
|
472 |
fseek(fp, end, SEEK_SET);
|
455 |
m_lineno--;
|
473 |
m_lineno--;
|
456 |
break;
|
474 |
break;
|
|
|
475 |
}
|
|
|
476 |
}
|
|
|
477 |
} else if (ll <= 0) {
|
|
|
478 |
hademptyline = true;
|
457 |
}
|
479 |
}
|
458 |
if (mtarg <= 0 || m_msgnum == mtarg) {
|
480 |
if (mtarg <= 0 || m_msgnum == mtarg) {
|
459 |
line[ll] = '\n';
|
481 |
line[ll] = '\n';
|
460 |
line[ll+1] = 0;
|
482 |
line[ll+1] = 0;
|
461 |
msgtxt += line;
|
483 |
msgtxt += line;
|
462 |
}
|
|
|
463 |
if (ll <= 0) {
|
|
|
464 |
hademptyline = true;
|
|
|
465 |
} else {
|
|
|
466 |
hademptyline = false;
|
|
|
467 |
}
|
484 |
}
|
468 |
}
|
485 |
}
|
469 |
|
486 |
|
470 |
} while (mtarg > 0 && m_msgnum < mtarg);
|
487 |
} while (mtarg > 0 && m_msgnum < mtarg);
|
471 |
|
488 |
|