|
a/src/internfile/mh_mail.cpp |
|
b/src/internfile/mh_mail.cpp |
|
... |
|
... |
23 |
#include <cstdlib>
|
23 |
#include <cstdlib>
|
24 |
|
24 |
|
25 |
#include <map>
|
25 |
#include <map>
|
26 |
#include <sstream>
|
26 |
#include <sstream>
|
27 |
|
27 |
|
|
|
28 |
#include "cstr.h"
|
28 |
#include "mimehandler.h"
|
29 |
#include "mimehandler.h"
|
29 |
#include "readfile.h"
|
30 |
#include "readfile.h"
|
30 |
#include "transcode.h"
|
31 |
#include "transcode.h"
|
31 |
#include "mimeparse.h"
|
32 |
#include "mimeparse.h"
|
32 |
#include "mh_mail.h"
|
33 |
#include "mh_mail.h"
|
|
... |
|
... |
41 |
#include "mime.h"
|
42 |
#include "mime.h"
|
42 |
|
43 |
|
43 |
using namespace std;
|
44 |
using namespace std;
|
44 |
|
45 |
|
45 |
static const int maxdepth = 20;
|
46 |
static const int maxdepth = 20;
|
46 |
static const string cstr_mimetype = "mimetype";
|
|
|
47 |
static const string cstr_content = "content";
|
|
|
48 |
static const string cstr_author = "author";
|
|
|
49 |
static const string cstr_recipient = "recipient";
|
47 |
static const string cstr_recipient = "recipient";
|
50 |
static const string cstr_modificationdate = "modificationdate";
|
48 |
static const string cstr_modificationdate = "modificationdate";
|
51 |
static const string cstr_title = "title";
|
49 |
static const string cstr_title = "title";
|
52 |
static const string cstr_msgid = "msgid";
|
50 |
static const string cstr_msgid = "msgid";
|
53 |
static const string cstr_abstract = "abstract";
|
51 |
static const string cstr_abstract = "abstract";
|
54 |
|
|
|
55 |
static const string cstr_newline = "\n";
|
|
|
56 |
|
52 |
|
57 |
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)
|
53 |
MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)
|
58 |
: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
54 |
: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
|
59 |
{
|
55 |
{
|
60 |
|
56 |
|
|
... |
|
... |
174 |
if (!m_havedoc)
|
170 |
if (!m_havedoc)
|
175 |
return false;
|
171 |
return false;
|
176 |
bool res = false;
|
172 |
bool res = false;
|
177 |
|
173 |
|
178 |
if (m_idx == -1) {
|
174 |
if (m_idx == -1) {
|
179 |
m_metaData[cstr_mimetype] = "text/plain";
|
175 |
m_metaData[cstr_mimetype] = cstr_textplain;
|
180 |
res = processMsg(m_bincdoc, 0);
|
176 |
res = processMsg(m_bincdoc, 0);
|
181 |
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
|
177 |
LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
|
182 |
m_metaData[cstr_mimetype].c_str()));
|
178 |
m_metaData[cstr_mimetype].c_str()));
|
183 |
const string& txt = m_metaData[cstr_content];
|
179 |
const string& txt = m_metaData[cstr_content];
|
184 |
if (m_startoftext < txt.size())
|
180 |
if (m_startoftext < txt.size())
|
185 |
m_metaData[cstr_abstract] =
|
181 |
m_metaData[cstr_abstract] =
|
186 |
truncate_to_word(txt.substr(m_startoftext), 250);
|
182 |
truncate_to_word(txt.substr(m_startoftext), 250);
|
187 |
} else {
|
183 |
} else {
|
188 |
m_metaData[cstr_abstract] = "";
|
184 |
m_metaData[cstr_abstract].clear();
|
189 |
res = processAttach();
|
185 |
res = processAttach();
|
190 |
}
|
186 |
}
|
191 |
m_idx++;
|
187 |
m_idx++;
|
192 |
m_havedoc = m_idx < (int)m_attachments.size();
|
188 |
m_havedoc = m_idx < (int)m_attachments.size();
|
193 |
if (!m_havedoc) {
|
189 |
if (!m_havedoc) {
|
|
... |
|
... |
238 |
return false;
|
234 |
return false;
|
239 |
}
|
235 |
}
|
240 |
MHMailAttach *att = m_attachments[m_idx];
|
236 |
MHMailAttach *att = m_attachments[m_idx];
|
241 |
|
237 |
|
242 |
m_metaData[cstr_mimetype] = att->m_contentType;
|
238 |
m_metaData[cstr_mimetype] = att->m_contentType;
|
243 |
m_metaData["charset"] = att->m_charset;
|
239 |
m_metaData[cstr_charset] = att->m_charset;
|
244 |
m_metaData["filename"] = att->m_filename;
|
240 |
m_metaData["filename"] = att->m_filename;
|
245 |
// Change the title to something helpul
|
241 |
// Change the title to something helpul
|
246 |
m_metaData[cstr_title] = att->m_filename + " (" + m_subject + ")";
|
242 |
m_metaData[cstr_title] = att->m_filename + " (" + m_subject + ")";
|
247 |
LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n",
|
243 |
LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n",
|
248 |
att->m_contentType.c_str(),
|
244 |
att->m_contentType.c_str(),
|
|
... |
|
... |
261 |
body = decoded;
|
257 |
body = decoded;
|
262 |
|
258 |
|
263 |
// Special case for text/plain content. Internfile should deal
|
259 |
// Special case for text/plain content. Internfile should deal
|
264 |
// with this but it expects text/plain to be utf-8 already, so we
|
260 |
// with this but it expects text/plain to be utf-8 already, so we
|
265 |
// handle the transcoding if needed
|
261 |
// handle the transcoding if needed
|
266 |
if (m_metaData[cstr_mimetype] == "text/plain" &&
|
262 |
if (m_metaData[cstr_mimetype] == cstr_textplain &&
|
267 |
stringicmp(m_metaData["charset"], "UTF-8")) {
|
263 |
stringicmp(m_metaData[cstr_charset], "UTF-8")) {
|
268 |
string utf8;
|
264 |
string utf8;
|
269 |
if (!transcode(body, utf8, m_metaData["charset"], "UTF-8")) {
|
265 |
if (!transcode(body, utf8, m_metaData[cstr_charset], "UTF-8")) {
|
270 |
LOGERR((" processAttach: transcode to utf-8 failed "
|
266 |
LOGERR((" processAttach: transcode to utf-8 failed "
|
271 |
"for charset [%s]\n", m_metaData["charset"].c_str()));
|
267 |
"for charset [%s]\n", m_metaData[cstr_charset].c_str()));
|
272 |
// Just let it through and hope for the best...
|
268 |
// Just let it through and hope for the best...
|
273 |
} else {
|
269 |
} else {
|
274 |
body = utf8;
|
270 |
body = utf8;
|
275 |
}
|
271 |
}
|
276 |
}
|
272 |
}
|
|
... |
|
... |
286 |
}
|
282 |
}
|
287 |
|
283 |
|
288 |
// Ipath
|
284 |
// Ipath
|
289 |
char nbuf[20];
|
285 |
char nbuf[20];
|
290 |
sprintf(nbuf, "%d", m_idx);
|
286 |
sprintf(nbuf, "%d", m_idx);
|
291 |
m_metaData["ipath"] = nbuf;
|
287 |
m_metaData[cstr_ipath] = nbuf;
|
292 |
|
288 |
|
293 |
return true;
|
289 |
return true;
|
294 |
}
|
290 |
}
|
295 |
|
291 |
|
296 |
// Transform a single message into a document. The subject becomes the
|
292 |
// Transform a single message into a document. The subject becomes the
|
|
... |
|
... |
450 |
continue;
|
446 |
continue;
|
451 |
}
|
447 |
}
|
452 |
MimeHeaderValue content_type;
|
448 |
MimeHeaderValue content_type;
|
453 |
parseMimeHeaderValue(hi.getValue(), content_type);
|
449 |
parseMimeHeaderValue(hi.getValue(), content_type);
|
454 |
LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str()));
|
450 |
LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str()));
|
455 |
if (!stringlowercmp("text/plain", content_type.value))
|
451 |
if (!stringlowercmp(cstr_textplain, content_type.value))
|
456 |
ittxt = it;
|
452 |
ittxt = it;
|
457 |
else if (!stringlowercmp("text/html", content_type.value))
|
453 |
else if (!stringlowercmp("text/html", content_type.value))
|
458 |
ithtml = it;
|
454 |
ithtml = it;
|
459 |
}
|
455 |
}
|
460 |
if (ittxt != doc->members.end()) {
|
456 |
if (ittxt != doc->members.end()) {
|
|
... |
|
... |
471 |
// Part is not multipart: it must be either simple or message. Take
|
467 |
// Part is not multipart: it must be either simple or message. Take
|
472 |
// a look at interesting headers and a possible filename parameter
|
468 |
// a look at interesting headers and a possible filename parameter
|
473 |
|
469 |
|
474 |
// Get and parse content-type header.
|
470 |
// Get and parse content-type header.
|
475 |
Binc::HeaderItem hi;
|
471 |
Binc::HeaderItem hi;
|
476 |
string ctt = "text/plain";
|
472 |
string ctt = cstr_textplain;
|
477 |
if (doc->h.getFirstHeader("Content-Type", hi)) {
|
473 |
if (doc->h.getFirstHeader("Content-Type", hi)) {
|
478 |
ctt = hi.getValue();
|
474 |
ctt = hi.getValue();
|
479 |
}
|
475 |
}
|
480 |
LOGDEB2(("walkmime:content-type: %s\n", ctt.c_str()));
|
476 |
LOGDEB2(("walkmime:content-type: %s\n", ctt.c_str()));
|
481 |
MimeHeaderValue content_type;
|
477 |
MimeHeaderValue content_type;
|
|
... |
|
... |
529 |
// 8 bit chars exist in a message that is stated as us-ascii. Ie the
|
525 |
// 8 bit chars exist in a message that is stated as us-ascii. Ie the
|
530 |
// mailer used by yahoo support ('KANA') does this. We could convert
|
526 |
// mailer used by yahoo support ('KANA') does this. We could convert
|
531 |
// to iso-8859 only if the transfer-encoding is 8 bit, or test for
|
527 |
// to iso-8859 only if the transfer-encoding is 8 bit, or test for
|
532 |
// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
|
528 |
// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
|
533 |
string charset;
|
529 |
string charset;
|
534 |
it = content_type.params.find(string("charset"));
|
530 |
it = content_type.params.find(string(cstr_charset));
|
535 |
if (it != content_type.params.end())
|
531 |
if (it != content_type.params.end())
|
536 |
charset = it->second;
|
532 |
charset = it->second;
|
537 |
if (charset.empty() ||
|
533 |
if (charset.empty() ||
|
538 |
!stringlowercmp("us-ascii", charset) ||
|
534 |
!stringlowercmp("us-ascii", charset) ||
|
539 |
!stringlowercmp("default", charset) ||
|
535 |
!stringlowercmp("default", charset) ||
|
|
... |
|
... |
553 |
|
549 |
|
554 |
// If the Content-Disposition is not inline, we treat it as
|
550 |
// If the Content-Disposition is not inline, we treat it as
|
555 |
// attachment, as per rfc2183.
|
551 |
// attachment, as per rfc2183.
|
556 |
// If it is inline but not text or html, same thing.
|
552 |
// If it is inline but not text or html, same thing.
|
557 |
if (stringlowercmp("inline", content_disposition.value) ||
|
553 |
if (stringlowercmp("inline", content_disposition.value) ||
|
558 |
(stringlowercmp("text/plain", content_type.value) &&
|
554 |
(stringlowercmp(cstr_textplain, content_type.value) &&
|
559 |
stringlowercmp("text/html", content_type.value)) ) {
|
555 |
stringlowercmp("text/html", content_type.value)) ) {
|
560 |
if (!filename.empty()) {
|
556 |
if (!filename.empty()) {
|
561 |
out += "\n";
|
557 |
out += "\n";
|
562 |
if (m_forPreview)
|
558 |
if (m_forPreview)
|
563 |
out += "[" + dispindic + " " + content_type.value + ": ";
|
559 |
out += "[" + dispindic + " " + content_type.value + ": ";
|