Switch to unified view

a/src/internfile/mh_mbox.cpp b/src/internfile/mh_mbox.cpp
...
...
127
// and my own's.
127
// and my own's.
128
// Update, 2008-08-29: some old? Thunderbird versions apparently use a date
128
// Update, 2008-08-29: some old? Thunderbird versions apparently use a date
129
// in "Date: " header format, like:   From - Mon, 8 May 2006 10:57:32
129
// in "Date: " header format, like:   From - Mon, 8 May 2006 10:57:32
130
// This was added as an alternative format. By the way it also fools "mail" and
130
// This was added as an alternative format. By the way it also fools "mail" and
131
// emacs-vm, Recoll is not alone
131
// emacs-vm, Recoll is not alone
132
//
132
// Update: 2009-11-27: word after From may be quoted string: From "john bull"
133
static const  char *frompat =  
133
static const  char *frompat =  
134
#if 0 //1.9.0
134
#if 0 //1.9.0
135
    "^From .* [1-2][0-9][0-9][0-9]$";
135
    "^From .* [1-2][0-9][0-9][0-9]$";
136
#endif
136
#endif
137
#if 1
137
#if 1
138
"^From[ ]+[^ ]+[ ]+"                                  // From whatever
138
"^From[ ]+([^ ]+|\"[^\"]+\")[ ]+"    // 'From (toto@tutu|"john bull") '
139
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Date
139
"[[:alpha:]]{3}[ ]+[[:alpha:]]{3}[ ]+[0-3 ][0-9][ ]+" // Fri Oct 26
140
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+"             // Time, seconds optional
140
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?[ ]+"             // Time, seconds optional
141
"([^ ]+[ ]+)?"                                        // Optional tz
141
"([^ ]+[ ]+)?"                                        // Optional tz
142
"[12][0-9][0-9][0-9]"            // Year, unanchored, more data may follow
142
"[12][0-9][0-9][0-9]"            // Year, unanchored, more data may follow
143
"|"      // Or standard mail Date: header format
143
"|"      // Or standard mail Date: header format
144
"^From[ ]+[^ ]+[ ]+"                                  // From toto@tutu
144
"^From[ ]+[^ ]+[ ]+"                                   // From toto@tutu
145
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Date Mon, 8 May
145
"[[:alpha:]]{3},[ ]+[0-3]?[0-9][ ]+[[:alpha:]]{3}[ ]+" // Mon, 8 May
146
"[12][0-9][0-9][0-9][ ]+"            // Year
146
"[12][0-9][0-9][0-9][ ]+"                              // Year
147
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?" // Time, secs optional: 10:57(:32)?
147
"[0-2][0-9]:[0-5][0-9](:[0-5][0-9])?"                  // Time, secs optional
148
    ;
148
    ;
149
#endif
149
#endif
150
    //    "([ ]+[-+][0-9]{4})?$"
150
    //    "([ ]+[-+][0-9]{4})?$"
151
static regex_t fromregex;
151
static regex_t fromregex;
152
static bool regcompiled;
152
static bool regcompiled;
...
...
333
  while (mh.has_documents()) {
333
  while (mh.has_documents()) {
334
      if (!mh.next_document()) {
334
      if (!mh.next_document()) {
335
      cerr << "next_document failed" << endl;
335
      cerr << "next_document failed" << endl;
336
      exit(1);
336
      exit(1);
337
      }
337
      }
338
      map<string, string>::const_iterator it = 
339
  mh.get_meta_data().find("content");
340
      int size;
341
      if (it == mh.get_meta_data().end()) {
342
  size = -1;
343
      } else {
344
  size = it->second.length();
345
      }
346
      cout << "Doc " << docnt << " size " << size  << endl;
338
      docnt++;
347
      docnt++;
339
  }
348
  }
340
  cout << docnt << " documents found in " << filename << endl;
349
  cout << docnt << " documents found in " << filename << endl;
341
  exit(0);
350
  exit(0);
342
}
351
}