|
a/src/internfile/mh_execm.cpp |
|
b/src/internfile/mh_execm.cpp |
|
... |
|
... |
282 |
}
|
282 |
}
|
283 |
}
|
283 |
}
|
284 |
|
284 |
|
285 |
// Charset. For many document types it doesn't matter. For text
|
285 |
// Charset. For many document types it doesn't matter. For text
|
286 |
// and html it does. We supply a default from the
|
286 |
// and html it does. We supply a default from the
|
287 |
// configuration. We should do the text transcoding to utf-8 here
|
287 |
// configuration.
|
288 |
// like exec::finaldetails does.
|
288 |
bool trustcharset = true;
|
289 |
if (charset.empty()) {
|
289 |
if (charset.empty()) {
|
290 |
charset = cfgFilterOutputCharset.empty() ? "utf-8" :
|
290 |
charset = cfgFilterOutputCharset.empty() ? "utf-8" :
|
291 |
cfgFilterOutputCharset;
|
291 |
cfgFilterOutputCharset;
|
292 |
if (!stringlowercmp("default", charset)) {
|
292 |
if (!stringlowercmp("default", charset)) {
|
|
|
293 |
trustcharset = false;
|
293 |
charset = m_dfltInputCharset;
|
294 |
charset = m_dfltInputCharset;
|
294 |
}
|
295 |
}
|
295 |
}
|
296 |
}
|
|
|
297 |
|
|
|
298 |
string& output = m_metaData[cstr_content];
|
|
|
299 |
if (!m_metaData[cstr_mimetype].compare(cstr_textplain) &&
|
|
|
300 |
(!trustcharset || stringlowercmp("utf-8", charset))) {
|
|
|
301 |
string transcoded;
|
|
|
302 |
int ecnt;
|
|
|
303 |
if (!transcode(output, transcoded, charset, "UTF-8", &ecnt)) {
|
|
|
304 |
LOGERR(("mh_execm: transcode failed from [%s] to UTF-8\n",
|
|
|
305 |
charset.c_str()));
|
|
|
306 |
// Erase text in this case: it's garbage
|
|
|
307 |
output.clear();
|
|
|
308 |
} else {
|
|
|
309 |
if (ecnt) {
|
|
|
310 |
LOGDEB(("mh_exec: %d transcoding errors from [%s] to UTF-8\n",
|
|
|
311 |
ecnt, charset.c_str()));
|
|
|
312 |
}
|
|
|
313 |
output = transcoded;
|
|
|
314 |
charset = "utf-8";
|
|
|
315 |
}
|
|
|
316 |
}
|
|
|
317 |
|
296 |
m_metaData[cstr_charset] = charset;
|
318 |
m_metaData[cstr_charset] = charset;
|
297 |
|
319 |
|
298 |
if (eofnext_received)
|
320 |
if (eofnext_received)
|
299 |
m_havedoc = false;
|
321 |
m_havedoc = false;
|
300 |
|
322 |
|