|
a/src/internfile/mh_execm.cpp |
|
b/src/internfile/mh_execm.cpp |
|
... |
|
... |
23 |
#include "mh_execm.h"
|
23 |
#include "mh_execm.h"
|
24 |
#include "mh_html.h"
|
24 |
#include "mh_html.h"
|
25 |
#include "debuglog.h"
|
25 |
#include "debuglog.h"
|
26 |
#include "cancelcheck.h"
|
26 |
#include "cancelcheck.h"
|
27 |
#include "smallut.h"
|
27 |
#include "smallut.h"
|
28 |
#include "transcode.h"
|
|
|
29 |
#include "md5.h"
|
28 |
#include "md5.h"
|
30 |
#include "rclconfig.h"
|
29 |
#include "rclconfig.h"
|
31 |
#include "mimetype.h"
|
30 |
#include "mimetype.h"
|
32 |
#include "idfile.h"
|
31 |
#include "idfile.h"
|
33 |
|
32 |
|
|
... |
|
... |
281 |
m_fn.c_str(), reason.c_str()));
|
280 |
m_fn.c_str(), reason.c_str()));
|
282 |
}
|
281 |
}
|
283 |
}
|
282 |
}
|
284 |
|
283 |
|
285 |
// Charset. For many document types it doesn't matter. For text
|
284 |
// Charset. For many document types it doesn't matter. For text
|
286 |
// and html it does. We supply a default from the
|
285 |
// and html it does. We supply a default from the configuration.
|
287 |
// configuration.
|
|
|
288 |
bool trustcharset = true;
|
|
|
289 |
if (charset.empty()) {
|
286 |
if (charset.empty()) {
|
290 |
charset = cfgFilterOutputCharset.empty() ? "utf-8" :
|
287 |
charset = cfgFilterOutputCharset.empty() ? "utf-8" :
|
291 |
cfgFilterOutputCharset;
|
288 |
cfgFilterOutputCharset;
|
292 |
if (!stringlowercmp("default", charset)) {
|
289 |
if (!stringlowercmp("default", charset)) {
|
293 |
trustcharset = false;
|
|
|
294 |
charset = m_dfltInputCharset;
|
290 |
charset = m_dfltInputCharset;
|
295 |
}
|
291 |
}
|
296 |
}
|
292 |
}
|
|
|
293 |
m_metaData[cstr_origcharset] = charset;
|
|
|
294 |
m_metaData[cstr_charset] = charset;
|
297 |
|
295 |
|
298 |
string& output = m_metaData[cstr_content];
|
|
|
299 |
if (!m_metaData[cstr_mimetype].compare(cstr_textplain) &&
|
296 |
if (!m_metaData[cstr_mimetype].compare(cstr_textplain)) {
|
300 |
(!trustcharset || stringlowercmp("utf-8", charset))) {
|
297 |
(void)txtdcode("mh_execm");
|
301 |
string transcoded;
|
|
|
302 |
int ecnt;
|
|
|
303 |
if (!transcode(output, transcoded, charset, "UTF-8", &ecnt)) {
|
|
|
304 |
LOGERR(("mh_execm: transcode failed from [%s] to UTF-8\n",
|
|
|
305 |
charset.c_str()));
|
|
|
306 |
// Erase text in this case: it's garbage
|
|
|
307 |
output.clear();
|
|
|
308 |
} else {
|
|
|
309 |
if (ecnt) {
|
|
|
310 |
LOGDEB(("mh_exec: %d transcoding errors from [%s] to UTF-8\n",
|
|
|
311 |
ecnt, charset.c_str()));
|
|
|
312 |
}
|
|
|
313 |
output = transcoded;
|
|
|
314 |
charset = "utf-8";
|
|
|
315 |
}
|
|
|
316 |
}
|
298 |
}
|
317 |
|
299 |
|
318 |
m_metaData[cstr_charset] = charset;
|
|
|
319 |
|
300 |
|
320 |
if (eofnext_received)
|
301 |
if (eofnext_received)
|
321 |
m_havedoc = false;
|
302 |
m_havedoc = false;
|
322 |
|
303 |
|
323 |
LOGDEB0(("MHExecMultiple: returning %d bytes of content,"
|
304 |
LOGDEB0(("MHExecMultiple: returning %d bytes of content,"
|