Switch to unified view

a/src/utils/idfile.cpp b/src/utils/idfile.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: idfile.cpp,v 1.4 2006-01-23 13:32:28 dockes Exp $ (C) 2005 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: idfile.cpp,v 1.5 2006-12-02 07:32:13 dockes Exp $ (C) 2005 J.F.Dockes";
3
#endif
3
#endif
4
/*
4
/*
5
 *   This program is free software; you can redistribute it and/or modify
5
 *   This program is free software; you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   the Free Software Foundation; either version 2 of the License, or
...
...
29
29
30
#ifndef NO_NAMESPACES
30
#ifndef NO_NAMESPACES
31
using namespace std;
31
using namespace std;
32
#endif /* NO_NAMESPACES */
32
#endif /* NO_NAMESPACES */
33
33
34
/** 
35
 * This code is currently ONLY used to identify mbox and mail message files
36
 * which are badly handled by standard mime type identifiers
37
 * There is a very old (circa 1990) mbox format using blocks of ^A (0x01) chars
38
 * to separate messages, that we don't recognize currently
39
 */
40
34
std::list<string> idFileAllTypes()
41
std::list<string> idFileAllTypes()
35
{
42
{
36
    std::list<string> lst;
43
    std::list<string> lst;
37
    lst.push_back("text/x-mail");
44
    lst.push_back("text/x-mail");
38
    lst.push_back("message/rfc822");
45
    lst.push_back("message/rfc822");
39
    return lst;
46
    return lst;
40
}
47
}
41
48
42
// Mail headers we compare to:
49
// Mail headers we compare to:
43
static const char *mailhs[] = {"From: ", "Received: ", "Message-Id: ", "To: ", 
50
static const char *mailhs[] = {"From: ", "Received: ", "Message-Id: ", "To: ", 
44
                   "Date: ", "Subject: ", "Status: "};
51
                   "Date: ", "Subject: ", "Status: ", 
52
                 "In-Reply-To: "};
45
static const int mailhsl[] = {6, 10, 12, 4, 6, 9, 8};
53
static const int mailhsl[] = {6, 10, 12, 4, 6, 9, 8, 13};
46
static const int nmh = sizeof(mailhs) / sizeof(char *);
54
static const int nmh = sizeof(mailhs) / sizeof(char *);
47
55
48
const int wantnhead = 3;
56
const int wantnhead = 3;
49
57
50
string idFile(const char *fn)
58
string idFile(const char *fn)
...
...
55
    LOGERR(("idFile: could not open [%s]\n", fn));
63
    LOGERR(("idFile: could not open [%s]\n", fn));
56
    return string("");
64
    return string("");
57
    }       
65
    }       
58
66
59
    bool line1HasFrom = false;
67
    bool line1HasFrom = false;
68
    bool gotnonempty = false;
60
    int lookslikemail = 0;
69
    int lookslikemail = 0;
61
70
62
    // emacs VM sometimes inserts very long lines with continuations or
71
    // emacs VM sometimes inserts very long lines with continuations or
63
    // not (for folder information). This forces us to look at many
72
    // not (for folder information). This forces us to look at many
64
    // lines and long ones
73
    // lines and long ones
74
    int lnum = 1;
65
    for (int lnum = 1; lnum < 200; lnum++) {
75
    for (int loop = 1; loop < 200; loop++, lnum++) {
66
76
67
#define LL 1024
77
#define LL 1024
68
    char cline[LL+1];
78
    char cline[LL+1];
69
    cline[LL] = 0;
79
    cline[LL] = 0;
70
    input.getline(cline, LL-1);
80
    input.getline(cline, LL-1);
...
...
75
        }
85
        }
76
        // Must be eof ?
86
        // Must be eof ?
77
        break;
87
        break;
78
    }
88
    }
79
89
90
  // gcount includes the \n
91
  int ll = input.gcount() - 1; 
92
  if (ll > 0)
93
      gotnonempty = true;
94
80
    LOGDEB2(("idfile: lnum %d : [%s]\n", lnum, cline));
95
    LOGDEB2(("idfile: lnum %d ll %d: [%s]\n", lnum, ll, cline));
96
81
    // Check for a few things that can't be found in a mail file,
97
    // Check for a few things that can't be found in a mail file,
82
    // (optimization to get a quick negative
98
    // (optimization to get a quick negative)
83
99
100
  // Empty lines
101
  if (ll <= 0) {
102
      // Accept a few empty lines at the beginning of the file,
103
      // otherwise this is the end of headers
104
      if (gotnonempty || lnum > 10) {
105
      LOGDEB2(("Got empty line\n"));
106
      break;
107
      } else {
108
      // Don't increment the line counter for initial empty lines.
109
      lnum--;
110
      continue;
111
      }
112
  }
113
114
  // emacs vm can insert VERY long header lines.
115
  if (ll > 800) {
116
      LOGDEB2(("idFile: Line too long\n"));
117
      return string("");
118
  }
119
120
  // Check for mbox 'From ' line
121
  if (lnum == 1 && !strncmp("From ", cline, 5)) {
122
      line1HasFrom = true;
123
      continue;
124
  } 
125
126
  // Except for a possible first line with 'From ', lines must
84
    // Lines must begin with whitespace or have a colon in the
127
    // begin with whitespace or have a colon 
85
    // first 50 chars (hope no one comes up with a longer header
128
    // (hope no one comes up with a longer header name !
86
  // name !
87
    if (!isspace(cline[0])) {
129
    if (!isspace(cline[0])) {
88
        char *cp = strchr(cline, ':');
130
        char *cp = strchr(cline, ':');
89
        if (cp == 0 || (cp - cline) > 70) {
131
        if (cp == 0 || (cp - cline) > 70) {
90
        LOGDEB2(("idfile: can't be mail header line: [%s]\n", cline));
132
        LOGDEB2(("idfile: can't be mail header line: [%s]\n", cline));
91
        break;
133
        break;
92
        }
134
        }
93
    }
135
    }
94
 
95
  int ll = strlen(cline);
96
  if (ll > 1000) {
97
      LOGDEB2(("idFile: Line too long\n"));
98
      return string("");
99
  }
100
  if (lnum == 1) {
101
      if (!strncmp("From ", cline, 5)) {
102
      line1HasFrom = true;
103
      continue;
104
      }
105
  }
106
136
137
  // Compare to known headers
107
    for (int i = 0; i < nmh; i++) {
138
    for (int i = 0; i < nmh; i++) {
108
        if (!strncasecmp(mailhs[i], cline, mailhsl[i])) {
139
        if (!strncasecmp(mailhs[i], cline, mailhsl[i])) {
109
        //fprintf(stderr, "Got [%s]\n", mailhs[i]);
140
        //fprintf(stderr, "Got [%s]\n", mailhs[i]);
110
        lookslikemail++;
141
        lookslikemail++;
111
        break;
142
        break;
...
...
137
#include "debuglog.h"
168
#include "debuglog.h"
138
#include "idfile.h"
169
#include "idfile.h"
139
170
140
int main(int argc, char **argv)
171
int main(int argc, char **argv)
141
{
172
{
142
    if (argc != 2) {
173
    if (argc < 2) {
143
    cerr << "Usage: idfile filename" << endl;
174
    cerr << "Usage: idfile filename" << endl;
144
    exit(1);
175
    exit(1);
145
    }
176
    }
146
    DebugLog::getdbl()->setloglevel(DEBDEB1);
177
    DebugLog::getdbl()->setloglevel(DEBDEB1);
147
    DebugLog::setfilename("stderr");
178
    DebugLog::setfilename("stderr");
179
    for (int i = 1; i < argc; i++) {
148
    string mime = idFile(argv[1]);
180
  string mime = idFile(argv[i]);
149
    cout << argv[1] << " : " << mime << endl;
181
  cout << argv[i] << " : " << mime << endl;
182
    }
150
    exit(0);
183
    exit(0);
151
}
184
}
152
185
153
#endif
186
#endif