a/src/internfile/mh_mail.cpp b/src/internfile/mh_mail.cpp
...
...
23
#include <cstdlib>
23
#include <cstdlib>
24
24
25
#include <map>
25
#include <map>
26
#include <sstream>
26
#include <sstream>
27
27
28
#include "cstr.h"
28
#include "mimehandler.h"
29
#include "mimehandler.h"
29
#include "readfile.h"
30
#include "readfile.h"
30
#include "transcode.h"
31
#include "transcode.h"
31
#include "mimeparse.h"
32
#include "mimeparse.h"
32
#include "mh_mail.h"
33
#include "mh_mail.h"
...
...
41
#include "mime.h"
42
#include "mime.h"
42
43
43
using namespace std;
44
using namespace std;
44
45
45
static const int maxdepth = 20;
46
static const int maxdepth = 20;
46
static const string cstr_mimetype = "mimetype";
47
static const string cstr_content = "content";
48
static const string cstr_author = "author";
49
static const string cstr_recipient = "recipient";
47
static const string cstr_recipient = "recipient";
50
static const string cstr_modificationdate = "modificationdate";
48
static const string cstr_modificationdate = "modificationdate";
51
static const string cstr_title = "title";
49
static const string cstr_title = "title";
52
static const string cstr_msgid = "msgid";
50
static const string cstr_msgid = "msgid";
53
static const string cstr_abstract = "abstract";
51
static const string cstr_abstract = "abstract";
54
55
static const string cstr_newline = "\n";
56
52
57
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt) 
53
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt) 
58
    : RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
54
    : RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
59
{
55
{
60
56
...
...
174
    if (!m_havedoc)
170
    if (!m_havedoc)
175
    return false;
171
    return false;
176
    bool res = false;
172
    bool res = false;
177
173
178
    if (m_idx == -1) {
174
    if (m_idx == -1) {
179
    m_metaData[cstr_mimetype] = "text/plain";
175
    m_metaData[cstr_mimetype] = cstr_textplain;
180
    res = processMsg(m_bincdoc, 0);
176
    res = processMsg(m_bincdoc, 0);
181
    LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
177
    LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
182
        m_metaData[cstr_mimetype].c_str()));
178
        m_metaData[cstr_mimetype].c_str()));
183
        const string& txt = m_metaData[cstr_content];
179
        const string& txt = m_metaData[cstr_content];
184
        if (m_startoftext < txt.size())
180
        if (m_startoftext < txt.size())
185
            m_metaData[cstr_abstract] = 
181
            m_metaData[cstr_abstract] = 
186
                truncate_to_word(txt.substr(m_startoftext), 250);
182
                truncate_to_word(txt.substr(m_startoftext), 250);
187
    } else {
183
    } else {
188
        m_metaData[cstr_abstract] = "";
184
        m_metaData[cstr_abstract].clear();
189
    res = processAttach();
185
    res = processAttach();
190
    }
186
    }
191
    m_idx++;
187
    m_idx++;
192
    m_havedoc = m_idx < (int)m_attachments.size();
188
    m_havedoc = m_idx < (int)m_attachments.size();
193
    if (!m_havedoc) {
189
    if (!m_havedoc) {
...
...
238
    return false;
234
    return false;
239
    }
235
    }
240
    MHMailAttach *att = m_attachments[m_idx];
236
    MHMailAttach *att = m_attachments[m_idx];
241
237
242
    m_metaData[cstr_mimetype] = att->m_contentType;
238
    m_metaData[cstr_mimetype] = att->m_contentType;
243
    m_metaData["charset"] = att->m_charset;
239
    m_metaData[cstr_charset] = att->m_charset;
244
    m_metaData["filename"] = att->m_filename;
240
    m_metaData["filename"] = att->m_filename;
245
    // Change the title to something helpul
241
    // Change the title to something helpul
246
    m_metaData[cstr_title] = att->m_filename + "  (" + m_subject + ")";
242
    m_metaData[cstr_title] = att->m_filename + "  (" + m_subject + ")";
247
    LOGDEB1(("  processAttach:ct [%s] cs [%s] fn [%s]\n", 
243
    LOGDEB1(("  processAttach:ct [%s] cs [%s] fn [%s]\n", 
248
        att->m_contentType.c_str(),
244
        att->m_contentType.c_str(),
...
...
261
    body = decoded;
257
    body = decoded;
262
258
263
    // Special case for text/plain content. Internfile should deal
259
    // Special case for text/plain content. Internfile should deal
264
    // with this but it expects text/plain to be utf-8 already, so we
260
    // with this but it expects text/plain to be utf-8 already, so we
265
    // handle the transcoding if needed
261
    // handle the transcoding if needed
266
    if (m_metaData[cstr_mimetype] == "text/plain" && 
262
    if (m_metaData[cstr_mimetype] == cstr_textplain && 
267
    stringicmp(m_metaData["charset"], "UTF-8")) {
263
    stringicmp(m_metaData[cstr_charset], "UTF-8")) {
268
    string utf8;
264
    string utf8;
269
    if (!transcode(body, utf8, m_metaData["charset"], "UTF-8")) {
265
    if (!transcode(body, utf8, m_metaData[cstr_charset], "UTF-8")) {
270
        LOGERR(("  processAttach: transcode to utf-8 failed "
266
        LOGERR(("  processAttach: transcode to utf-8 failed "
271
            "for charset [%s]\n", m_metaData["charset"].c_str()));
267
            "for charset [%s]\n", m_metaData[cstr_charset].c_str()));
272
        // Just let it through and hope for the best...
268
        // Just let it through and hope for the best...
273
    } else {
269
    } else {
274
        body = utf8;
270
        body = utf8;
275
    }
271
    }
276
    }
272
    }
...
...
286
    }
282
    }
287
283
288
    // Ipath
284
    // Ipath
289
    char nbuf[20];
285
    char nbuf[20];
290
    sprintf(nbuf, "%d", m_idx);
286
    sprintf(nbuf, "%d", m_idx);
291
    m_metaData["ipath"] = nbuf;
287
    m_metaData[cstr_ipath] = nbuf;
292
288
293
    return true;
289
    return true;
294
}
290
}
295
291
296
// Transform a single message into a document. The subject becomes the
292
// Transform a single message into a document. The subject becomes the
...
...
450
            continue;
446
            continue;
451
        }
447
        }
452
        MimeHeaderValue content_type;
448
        MimeHeaderValue content_type;
453
        parseMimeHeaderValue(hi.getValue(), content_type);
449
        parseMimeHeaderValue(hi.getValue(), content_type);
454
        LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str()));
450
        LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str()));
455
        if (!stringlowercmp("text/plain", content_type.value))
451
        if (!stringlowercmp(cstr_textplain, content_type.value))
456
            ittxt = it;
452
            ittxt = it;
457
        else if (!stringlowercmp("text/html", content_type.value)) 
453
        else if (!stringlowercmp("text/html", content_type.value)) 
458
            ithtml = it;
454
            ithtml = it;
459
        }
455
        }
460
        if (ittxt != doc->members.end()) {
456
        if (ittxt != doc->members.end()) {
...
...
471
    // Part is not multipart: it must be either simple or message. Take
467
    // Part is not multipart: it must be either simple or message. Take
472
    // a look at interesting headers and a possible filename parameter
468
    // a look at interesting headers and a possible filename parameter
473
469
474
    // Get and parse content-type header.
470
    // Get and parse content-type header.
475
    Binc::HeaderItem hi;
471
    Binc::HeaderItem hi;
476
    string ctt = "text/plain";
472
    string ctt = cstr_textplain;
477
    if (doc->h.getFirstHeader("Content-Type", hi)) {
473
    if (doc->h.getFirstHeader("Content-Type", hi)) {
478
    ctt = hi.getValue();
474
    ctt = hi.getValue();
479
    }
475
    }
480
    LOGDEB2(("walkmime:content-type: %s\n", ctt.c_str()));
476
    LOGDEB2(("walkmime:content-type: %s\n", ctt.c_str()));
481
    MimeHeaderValue content_type;
477
    MimeHeaderValue content_type;
...
...
529
    // 8 bit chars exist in a message that is stated as us-ascii. Ie the 
525
    // 8 bit chars exist in a message that is stated as us-ascii. Ie the 
530
    // mailer used by yahoo support ('KANA') does this. We could convert 
526
    // mailer used by yahoo support ('KANA') does this. We could convert 
531
    // to iso-8859 only if the transfer-encoding is 8 bit, or test for
527
    // to iso-8859 only if the transfer-encoding is 8 bit, or test for
532
    // actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
528
    // actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
533
    string charset;
529
    string charset;
534
    it = content_type.params.find(string("charset"));
530
    it = content_type.params.find(string(cstr_charset));
535
    if (it != content_type.params.end())
531
    if (it != content_type.params.end())
536
    charset = it->second;
532
    charset = it->second;
537
    if (charset.empty() || 
533
    if (charset.empty() || 
538
    !stringlowercmp("us-ascii", charset) || 
534
    !stringlowercmp("us-ascii", charset) || 
539
    !stringlowercmp("default", charset) || 
535
    !stringlowercmp("default", charset) || 
...
...
553
549
554
    // If the Content-Disposition is not inline, we treat it as
550
    // If the Content-Disposition is not inline, we treat it as
555
    // attachment, as per rfc2183. 
551
    // attachment, as per rfc2183. 
556
    // If it is inline but not text or html, same thing.
552
    // If it is inline but not text or html, same thing.
557
    if (stringlowercmp("inline", content_disposition.value) ||
553
    if (stringlowercmp("inline", content_disposition.value) ||
558
    (stringlowercmp("text/plain", content_type.value) && 
554
    (stringlowercmp(cstr_textplain, content_type.value) && 
559
     stringlowercmp("text/html", content_type.value)) ) {
555
     stringlowercmp("text/html", content_type.value)) ) {
560
    if (!filename.empty()) {
556
    if (!filename.empty()) {
561
        out += "\n";
557
        out += "\n";
562
        if (m_forPreview)
558
        if (m_forPreview)
563
        out += "[" + dispindic + " " + content_type.value + ": ";
559
        out += "[" + dispindic + " " + content_type.value + ": ";