...
#include <cstdlib>

#include <map>
#include <sstream>

#include "cstr.h"
#include "mimehandler.h"
#include "readfile.h"
#include "transcode.h"
#include "mimeparse.h"
#include "mh_mail.h"
...
#include "mime.h"

using namespace std;

static const int maxdepth = 20;



static const string cstr_recipient = "recipient";
static const string cstr_modificationdate = "modificationdate";
static const string cstr_title = "title";
static const string cstr_msgid = "msgid";
static const string cstr_abstract = "abstract";



MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt) 
    : RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
{

...
    if (!m_havedoc)
    return false;
    bool res = false;

    if (m_idx == -1) {
    m_metaData[cstr_mimetype] = cstr_textplain;
    res = processMsg(m_bincdoc, 0);
    LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
        m_metaData[cstr_mimetype].c_str()));
        const string& txt = m_metaData[cstr_content];
        if (m_startoftext < txt.size())
            m_metaData[cstr_abstract] = 
                truncate_to_word(txt.substr(m_startoftext), 250);
    } else {
        m_metaData[cstr_abstract].clear();
    res = processAttach();
    }
    m_idx++;
    m_havedoc = m_idx < (int)m_attachments.size();
    if (!m_havedoc) {
...
    return false;
    }
    MHMailAttach *att = m_attachments[m_idx];

    m_metaData[cstr_mimetype] = att->m_contentType;
    m_metaData[cstr_charset] = att->m_charset;
    m_metaData["filename"] = att->m_filename;
    // Change the title to something helpul
    m_metaData[cstr_title] = att->m_filename + "  (" + m_subject + ")";
    LOGDEB1(("  processAttach:ct [%s] cs [%s] fn [%s]\n", 
        att->m_contentType.c_str(),
...
    body = decoded;

    // Special case for text/plain content. Internfile should deal
    // with this but it expects text/plain to be utf-8 already, so we
    // handle the transcoding if needed
    if (m_metaData[cstr_mimetype] == cstr_textplain && 
    stringicmp(m_metaData[cstr_charset], "UTF-8")) {
    string utf8;
    if (!transcode(body, utf8, m_metaData[cstr_charset], "UTF-8")) {
        LOGERR(("  processAttach: transcode to utf-8 failed "
            "for charset [%s]\n", m_metaData[cstr_charset].c_str()));
        // Just let it through and hope for the best...
    } else {
        body = utf8;
    }
    }
...
    }

    // Ipath
    char nbuf[20];
    sprintf(nbuf, "%d", m_idx);
    m_metaData[cstr_ipath] = nbuf;

    return true;
}

// Transform a single message into a document. The subject becomes the
...
            continue;
        }
        MimeHeaderValue content_type;
        parseMimeHeaderValue(hi.getValue(), content_type);
        LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str()));
        if (!stringlowercmp(cstr_textplain, content_type.value))
            ittxt = it;
        else if (!stringlowercmp("text/html", content_type.value)) 
            ithtml = it;
        }
        if (ittxt != doc->members.end()) {
...
    // Part is not multipart: it must be either simple or message. Take
    // a look at interesting headers and a possible filename parameter

    // Get and parse content-type header.
    Binc::HeaderItem hi;
    string ctt = cstr_textplain;
    if (doc->h.getFirstHeader("Content-Type", hi)) {
    ctt = hi.getValue();
    }
    LOGDEB2(("walkmime:content-type: %s\n", ctt.c_str()));
    MimeHeaderValue content_type;
...
    // 8 bit chars exist in a message that is stated as us-ascii. Ie the 
    // mailer used by yahoo support ('KANA') does this. We could convert 
    // to iso-8859 only if the transfer-encoding is 8 bit, or test for
    // actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
    string charset;
    it = content_type.params.find(string(cstr_charset));
    if (it != content_type.params.end())
    charset = it->second;
    if (charset.empty() || 
    !stringlowercmp("us-ascii", charset) || 
    !stringlowercmp("default", charset) || 
...

    // If the Content-Disposition is not inline, we treat it as
    // attachment, as per rfc2183. 
    // If it is inline but not text or html, same thing.
    if (stringlowercmp("inline", content_disposition.value) ||
    (stringlowercmp(cstr_textplain, content_type.value) && 
     stringlowercmp("text/html", content_type.value)) ) {
    if (!filename.empty()) {
        out += "\n";
        if (m_forPreview)
        out += "[" + dispindic + " " + content_type.value + ": ";

	a/src/internfile/mh_mail.cpp		b/src/internfile/mh_mail.cpp
	...		...
23	#include <cstdlib>	23	#include <cstdlib>
24		24
25	#include <map>	25	#include <map>
26	#include <sstream>	26	#include <sstream>
27		27
		28	#include "cstr.h"
28	#include "mimehandler.h"	29	#include "mimehandler.h"
29	#include "readfile.h"	30	#include "readfile.h"
30	#include "transcode.h"	31	#include "transcode.h"
31	#include "mimeparse.h"	32	#include "mimeparse.h"
32	#include "mh_mail.h"	33	#include "mh_mail.h"
	...		...
41	#include "mime.h"	42	#include "mime.h"
42		43
43	using namespace std;	44	using namespace std;
44		45
45	static const int maxdepth = 20;	46	static const int maxdepth = 20;
46	static const string cstr_mimetype = "mimetype";
47	static const string cstr_content = "content";
48	static const string cstr_author = "author";
49	static const string cstr_recipient = "recipient";	47	static const string cstr_recipient = "recipient";
50	static const string cstr_modificationdate = "modificationdate";	48	static const string cstr_modificationdate = "modificationdate";
51	static const string cstr_title = "title";	49	static const string cstr_title = "title";
52	static const string cstr_msgid = "msgid";	50	static const string cstr_msgid = "msgid";
53	static const string cstr_abstract = "abstract";	51	static const string cstr_abstract = "abstract";
54
55	static const string cstr_newline = "\n";
56		52
57	MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)	53	MimeHandlerMail::MimeHandlerMail(RclConfig *cnf, const string &mt)
58	: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)	54	: RecollFilter(cnf, mt), m_bincdoc(0), m_fd(-1), m_stream(0), m_idx(-1)
59	{	55	{
60		56
	...		...
174	if (!m_havedoc)	170	if (!m_havedoc)
175	return false;	171	return false;
176	bool res = false;	172	bool res = false;
177		173
178	if (m_idx == -1) {	174	if (m_idx == -1) {
179	m_metaData[cstr_mimetype] = "text/plain";	175	m_metaData[cstr_mimetype] = cstr_textplain;
180	res = processMsg(m_bincdoc, 0);	176	res = processMsg(m_bincdoc, 0);
181	LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",	177	LOGDEB1(("MimeHandlerMail::next_document: mimetype %s\n",
182	m_metaData[cstr_mimetype].c_str()));	178	m_metaData[cstr_mimetype].c_str()));
183	const string& txt = m_metaData[cstr_content];	179	const string& txt = m_metaData[cstr_content];
184	if (m_startoftext < txt.size())	180	if (m_startoftext < txt.size())
185	m_metaData[cstr_abstract] =	181	m_metaData[cstr_abstract] =
186	truncate_to_word(txt.substr(m_startoftext), 250);	182	truncate_to_word(txt.substr(m_startoftext), 250);
187	} else {	183	} else {
188	m_metaData[cstr_abstract] = "";	184	m_metaData[cstr_abstract].clear();
189	res = processAttach();	185	res = processAttach();
190	}	186	}
191	m_idx++;	187	m_idx++;
192	m_havedoc = m_idx < (int)m_attachments.size();	188	m_havedoc = m_idx < (int)m_attachments.size();
193	if (!m_havedoc) {	189	if (!m_havedoc) {
	...		...
238	return false;	234	return false;
239	}	235	}
240	MHMailAttach *att = m_attachments[m_idx];	236	MHMailAttach *att = m_attachments[m_idx];
241		237
242	m_metaData[cstr_mimetype] = att->m_contentType;	238	m_metaData[cstr_mimetype] = att->m_contentType;
243	m_metaData["charset"] = att->m_charset;	239	m_metaData[cstr_charset] = att->m_charset;
244	m_metaData["filename"] = att->m_filename;	240	m_metaData["filename"] = att->m_filename;
245	// Change the title to something helpul	241	// Change the title to something helpul
246	m_metaData[cstr_title] = att->m_filename + " (" + m_subject + ")";	242	m_metaData[cstr_title] = att->m_filename + " (" + m_subject + ")";
247	LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n",	243	LOGDEB1((" processAttach:ct [%s] cs [%s] fn [%s]\n",
248	att->m_contentType.c_str(),	244	att->m_contentType.c_str(),
	...		...
261	body = decoded;	257	body = decoded;
262		258
263	// Special case for text/plain content. Internfile should deal	259	// Special case for text/plain content. Internfile should deal
264	// with this but it expects text/plain to be utf-8 already, so we	260	// with this but it expects text/plain to be utf-8 already, so we
265	// handle the transcoding if needed	261	// handle the transcoding if needed
266	if (m_metaData[cstr_mimetype] == "text/plain" &&	262	if (m_metaData[cstr_mimetype] == cstr_textplain &&
267	stringicmp(m_metaData["charset"], "UTF-8")) {	263	stringicmp(m_metaData[cstr_charset], "UTF-8")) {
268	string utf8;	264	string utf8;
269	if (!transcode(body, utf8, m_metaData["charset"], "UTF-8")) {	265	if (!transcode(body, utf8, m_metaData[cstr_charset], "UTF-8")) {
270	LOGERR((" processAttach: transcode to utf-8 failed "	266	LOGERR((" processAttach: transcode to utf-8 failed "
271	"for charset [%s]\n", m_metaData["charset"].c_str()));	267	"for charset [%s]\n", m_metaData[cstr_charset].c_str()));
272	// Just let it through and hope for the best...	268	// Just let it through and hope for the best...
273	} else {	269	} else {
274	body = utf8;	270	body = utf8;
275	}	271	}
276	}	272	}
	...		...
286	}	282	}
287		283
288	// Ipath	284	// Ipath
289	char nbuf[20];	285	char nbuf[20];
290	sprintf(nbuf, "%d", m_idx);	286	sprintf(nbuf, "%d", m_idx);
291	m_metaData["ipath"] = nbuf;	287	m_metaData[cstr_ipath] = nbuf;
292		288
293	return true;	289	return true;
294	}	290	}
295		291
296	// Transform a single message into a document. The subject becomes the	292	// Transform a single message into a document. The subject becomes the
	...		...
450	continue;	446	continue;
451	}	447	}
452	MimeHeaderValue content_type;	448	MimeHeaderValue content_type;
453	parseMimeHeaderValue(hi.getValue(), content_type);	449	parseMimeHeaderValue(hi.getValue(), content_type);
454	LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str()));	450	LOGDEB2(("walkmime: C-type: %s\n",content_type.value.c_str()));
455	if (!stringlowercmp("text/plain", content_type.value))	451	if (!stringlowercmp(cstr_textplain, content_type.value))
456	ittxt = it;	452	ittxt = it;
457	else if (!stringlowercmp("text/html", content_type.value))	453	else if (!stringlowercmp("text/html", content_type.value))
458	ithtml = it;	454	ithtml = it;
459	}	455	}
460	if (ittxt != doc->members.end()) {	456	if (ittxt != doc->members.end()) {
	...		...
471	// Part is not multipart: it must be either simple or message. Take	467	// Part is not multipart: it must be either simple or message. Take
472	// a look at interesting headers and a possible filename parameter	468	// a look at interesting headers and a possible filename parameter
473		469
474	// Get and parse content-type header.	470	// Get and parse content-type header.
475	Binc::HeaderItem hi;	471	Binc::HeaderItem hi;
476	string ctt = "text/plain";	472	string ctt = cstr_textplain;
477	if (doc->h.getFirstHeader("Content-Type", hi)) {	473	if (doc->h.getFirstHeader("Content-Type", hi)) {
478	ctt = hi.getValue();	474	ctt = hi.getValue();
479	}	475	}
480	LOGDEB2(("walkmime:content-type: %s\n", ctt.c_str()));	476	LOGDEB2(("walkmime:content-type: %s\n", ctt.c_str()));
481	MimeHeaderValue content_type;	477	MimeHeaderValue content_type;
	...		...
529	// 8 bit chars exist in a message that is stated as us-ascii. Ie the	525	// 8 bit chars exist in a message that is stated as us-ascii. Ie the
530	// mailer used by yahoo support ('KANA') does this. We could convert	526	// mailer used by yahoo support ('KANA') does this. We could convert
531	// to iso-8859 only if the transfer-encoding is 8 bit, or test for	527	// to iso-8859 only if the transfer-encoding is 8 bit, or test for
532	// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default	528	// actual 8 bit chars, but what the heck, le'ts use 8859-1 as default
533	string charset;	529	string charset;
534	it = content_type.params.find(string("charset"));	530	it = content_type.params.find(string(cstr_charset));
535	if (it != content_type.params.end())	531	if (it != content_type.params.end())
536	charset = it->second;	532	charset = it->second;
537	if (charset.empty() \|\|	533	if (charset.empty() \|\|
538	!stringlowercmp("us-ascii", charset) \|\|	534	!stringlowercmp("us-ascii", charset) \|\|
539	!stringlowercmp("default", charset) \|\|	535	!stringlowercmp("default", charset) \|\|
	...		...
553		549
554	// If the Content-Disposition is not inline, we treat it as	550	// If the Content-Disposition is not inline, we treat it as
555	// attachment, as per rfc2183.	551	// attachment, as per rfc2183.
556	// If it is inline but not text or html, same thing.	552	// If it is inline but not text or html, same thing.
557	if (stringlowercmp("inline", content_disposition.value) \|\|	553	if (stringlowercmp("inline", content_disposition.value) \|\|
558	(stringlowercmp("text/plain", content_type.value) &&	554	(stringlowercmp(cstr_textplain, content_type.value) &&
559	stringlowercmp("text/html", content_type.value)) ) {	555	stringlowercmp("text/html", content_type.value)) ) {
560	if (!filename.empty()) {	556	if (!filename.empty()) {
561	out += "\n";	557	out += "\n";
562	if (m_forPreview)	558	if (m_forPreview)
563	out += "[" + dispindic + " " + content_type.value + ": ";	559	out += "[" + dispindic + " " + content_type.value + ": ";