Switch to unified view

a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: mh_mbox.cpp,v 1.3 2007-12-13 06:58:21 dockes Exp $ (C) 2005 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: mh_mbox.cpp,v 1.4 2008-08-29 13:05:12 dockes Exp $ (C) 2005 J.F.Dockes";
3
#endif
3
#endif
4
/*
4
/*
5
 *   This program is free software; you can redistribute it and/or modify
5
 *   This program is free software; you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   the Free Software Foundation; either version 2 of the License, or
...
...
113
//  Emacs VM botch ? Adds tz between hour and year
113
//  Emacs VM botch ? Adds tz between hour and year
114
//      From dockes Wed Feb 23 10:31:20 +0100 2005
114
//      From dockes Wed Feb 23 10:31:20 +0100 2005
115
//      From dockes Fri Dec  1 20:36:39 +0100 2006
115
//      From dockes Fri Dec  1 20:36:39 +0100 2006
116
// The modified regexp gives the exact same results on the ietf mail archive
116
// The modified regexp gives the exact same results on the ietf mail archive
117
// and my own's.
117
// and my own's.
118
// Update, 2008-08-29: some old? Thunderbird versions apparently use a date
119
// in "Date: " header format, like:   From - Mon, 8 May 2006 10:57:32
120
// This was added as an alternative format. By the way it also fools "mail" and
121
// emacs-vm, Recoll is not alone
122
//
118
static const  char *frompat =  
123
static const  char *frompat =  
119
#if 0 //1.9.0
124
#if 0 //1.9.0
120
    "^From .* [1-2][0-9][0-9][0-9]$";
125
    "^From .* [1-2][0-9][0-9][0-9]$";
121
#endif
126
#endif
122
#if 1
127
#if 1
123
"^From[ ]+[^ ]+[ ]+"                                  // From toto@tutu
128
"^From[ ]+[^ ]+[ ]+"                                  // From whatever
124
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Date
129
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Date
125
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+"             // Time, seconds optional
130
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+"             // Time, seconds optional
126
"([^ ]+[ ]+)?"                                        // Optional tz
131
"([^ ]+[ ]+)?"                                        // Optional tz
127
"[12][0-9][0-9][0-9]"            // Year, unanchored, more data may follow
132
"[12][0-9][0-9][0-9]"            // Year, unanchored, more data may follow
133
"|"      // Or standard mail Date: header format
134
"^From[ ]+[^ ]+[ ]+"                                  // From toto@tutu
135
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Date Mon, 8 May
136
"[12][0-9][0-9][0-9][ ]+"            // Year
137
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional: 10:57(:32)?
128
    ;
138
    ;
129
#endif
139
#endif
130
    //    "([ ]+[-+][0-9]{4})?$"
140
    //    "([ ]+[-+][0-9]{4})?$"
131
static regex_t fromregex;
141
static regex_t fromregex;
132
static bool regcompiled;
142
static bool regcompiled;