|
a/src/internfile/mh_html.cpp |
|
b/src/internfile/mh_html.cpp |
|
... |
|
... |
13 |
* along with this program; if not, write to the
|
13 |
* along with this program; if not, write to the
|
14 |
* Free Software Foundation, Inc.,
|
14 |
* Free Software Foundation, Inc.,
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
16 |
*/
|
16 |
*/
|
17 |
|
17 |
|
18 |
|
18 |
#include "cstr.h"
|
19 |
#include "mimehandler.h"
|
19 |
#include "mimehandler.h"
|
20 |
#include "debuglog.h"
|
20 |
#include "debuglog.h"
|
21 |
#include "csguess.h"
|
21 |
#include "csguess.h"
|
22 |
#include "readfile.h"
|
22 |
#include "readfile.h"
|
23 |
#include "transcode.h"
|
23 |
#include "transcode.h"
|
|
... |
|
... |
72 |
|
72 |
|
73 |
string charset = m_dfltInputCharset;
|
73 |
string charset = m_dfltInputCharset;
|
74 |
LOGDEB(("MHHtml::next_doc.: default supposed input charset: [%s]\n",
|
74 |
LOGDEB(("MHHtml::next_doc.: default supposed input charset: [%s]\n",
|
75 |
charset.c_str()));
|
75 |
charset.c_str()));
|
76 |
// Override default input charset if someone took care to set one:
|
76 |
// Override default input charset if someone took care to set one:
|
77 |
map<string,string>::const_iterator it = m_metaData.find("charset");
|
77 |
map<string,string>::const_iterator it = m_metaData.find(cstr_charset);
|
78 |
if (it != m_metaData.end() && !it->second.empty()) {
|
78 |
if (it != m_metaData.end() && !it->second.empty()) {
|
79 |
charset = it->second;
|
79 |
charset = it->second;
|
80 |
LOGDEB(("MHHtml: next_doc.: input charset from metadata: [%s]\n",
|
80 |
LOGDEB(("MHHtml: next_doc.: input charset from metadata: [%s]\n",
|
81 |
charset.c_str()));
|
81 |
charset.c_str()));
|
82 |
}
|
82 |
}
|
|
... |
|
... |
100 |
LOGDEB(("textHtmlToDoc: transcode failed from cs '%s' to UTF-8 for"
|
100 |
LOGDEB(("textHtmlToDoc: transcode failed from cs '%s' to UTF-8 for"
|
101 |
"[%s]", charset.c_str(), fn.empty()?"unknown":fn.c_str()));
|
101 |
"[%s]", charset.c_str(), fn.empty()?"unknown":fn.c_str()));
|
102 |
transcoded = m_html;
|
102 |
transcoded = m_html;
|
103 |
// We don't know the charset, at all
|
103 |
// We don't know the charset, at all
|
104 |
p.reset_charsets();
|
104 |
p.reset_charsets();
|
105 |
charset = "";
|
105 |
charset.clear();
|
106 |
} else {
|
106 |
} else {
|
107 |
if (ecnt) {
|
107 |
if (ecnt) {
|
108 |
if (pass == 0) {
|
108 |
if (pass == 0) {
|
109 |
LOGDEB(("textHtmlToDoc: init transcode had %d errors for "
|
109 |
LOGDEB(("textHtmlToDoc: init transcode had %d errors for "
|
110 |
"[%s]\n", ecnt, fn.empty()?"unknown":fn.c_str()));
|
110 |
"[%s]\n", ecnt, fn.empty()?"unknown":fn.c_str()));
|
|
... |
|
... |
161 |
}
|
161 |
}
|
162 |
}
|
162 |
}
|
163 |
}
|
163 |
}
|
164 |
|
164 |
|
165 |
m_metaData["origcharset"] = result.get_charset();
|
165 |
m_metaData["origcharset"] = result.get_charset();
|
166 |
m_metaData["content"] = result.dump;
|
166 |
m_metaData[cstr_content] = result.dump;
|
167 |
m_metaData["charset"] = "utf-8";
|
167 |
m_metaData[cstr_charset] = "utf-8";
|
168 |
// Avoid setting empty values which would crush ones possibly inherited
|
168 |
// Avoid setting empty values which would crush ones possibly inherited
|
169 |
// from parent (if we're an attachment)
|
169 |
// from parent (if we're an attachment)
|
170 |
if (!result.dmtime.empty())
|
170 |
if (!result.dmtime.empty())
|
171 |
m_metaData["modificationdate"] = result.dmtime;
|
171 |
m_metaData["modificationdate"] = result.dmtime;
|
172 |
m_metaData["mimetype"] = "text/plain";
|
172 |
m_metaData[cstr_mimetype] = cstr_textplain;
|
173 |
|
173 |
|
174 |
for (map<string,string>::const_iterator it = result.meta.begin();
|
174 |
for (map<string,string>::const_iterator it = result.meta.begin();
|
175 |
it != result.meta.end(); it++) {
|
175 |
it != result.meta.end(); it++) {
|
176 |
if (!it->second.empty())
|
176 |
if (!it->second.empty())
|
177 |
m_metaData[it->first] = it->second;
|
177 |
m_metaData[it->first] = it->second;
|