|
a/src/internfile/mh_text.cpp |
|
b/src/internfile/mh_text.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: mh_text.cpp,v 1.5 2006-03-20 15:14:08 dockes Exp $ (C) 2005 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: mh_text.cpp,v 1.6 2006-12-15 12:40:02 dockes Exp $ (C) 2005 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
29 |
#include "debuglog.h"
|
29 |
#include "debuglog.h"
|
30 |
#include "readfile.h"
|
30 |
#include "readfile.h"
|
31 |
#include "transcode.h"
|
31 |
#include "transcode.h"
|
32 |
|
32 |
|
33 |
// Process a plain text file
|
33 |
// Process a plain text file
|
34 |
MimeHandler::Status MimeHandlerText::mkDoc(RclConfig *conf, const string &fn,
|
34 |
bool MimeHandlerText::set_document_file(const string &fn)
|
35 |
const string &mtype, Rcl::Doc &docout, string&)
|
|
|
36 |
{
|
35 |
{
|
37 |
string otext;
|
36 |
string otext;
|
38 |
if (!file_to_string(fn, otext))
|
37 |
if (!file_to_string(fn, otext))
|
39 |
return MimeHandler::MHError;
|
38 |
return false;
|
40 |
|
39 |
return set_document_string(otext);
|
41 |
// Try to guess charset, then convert to utf-8, and fill document
|
40 |
}
|
42 |
// fields The charset guesser really doesnt work well in general
|
41 |
|
43 |
// and should be avoided (especially for short documents)
|
42 |
bool MimeHandlerText::set_document_string(const string& otext)
|
44 |
string charset;
|
43 |
{
|
45 |
if (conf->getGuessCharset()) {
|
44 |
m_text = otext;
|
46 |
charset = csguess(otext, conf->getDefCharset());
|
45 |
m_havedoc = true;
|
47 |
} else
|
46 |
return true;
|
48 |
charset = conf->getDefCharset();
|
47 |
}
|
49 |
|
48 |
|
|
|
49 |
bool MimeHandlerText::next_document()
|
|
|
50 |
{
|
|
|
51 |
if (m_havedoc == false)
|
|
|
52 |
return false;
|
|
|
53 |
m_havedoc = false;
|
50 |
LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n",
|
54 |
LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n",
|
51 |
charset.c_str()));
|
55 |
m_defcharset.c_str()));
|
52 |
|
56 |
|
53 |
string utf8;
|
57 |
// Avoid unneeded copy. This gets a reference to an empty string which is
|
|
|
58 |
// the entry for "content"
|
|
|
59 |
string& utf8 = m_metaData["content"];
|
|
|
60 |
|
|
|
61 |
// Note that we transcode always even if defcharset is already utf-8:
|
|
|
62 |
// this validates the encoding.
|
54 |
if (!transcode(otext, utf8, charset, "UTF-8")) {
|
63 |
if (!transcode(m_text, utf8, m_defcharset, "UTF-8")) {
|
55 |
LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
|
64 |
LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
|
56 |
"for charset [%s]\n", charset.c_str()));
|
65 |
"for charset [%s]\n", m_defcharset.c_str()));
|
57 |
otext.erase();
|
66 |
utf8.erase();
|
58 |
return MimeHandler::MHError;
|
67 |
return false;
|
59 |
}
|
68 |
}
|
60 |
|
69 |
|
61 |
docout.origcharset = charset;
|
70 |
m_metaData["origcharset"] = m_defcharset;
|
62 |
docout.text = utf8;
|
71 |
m_metaData["charset"] = "utf-8";
|
63 |
return MimeHandler::MHDone;
|
72 |
m_metaData["mimetype"] = "text/plain";
|
|
|
73 |
return true;
|
64 |
}
|
74 |
}
|