|
a/src/internfile/mh_exec.cpp |
|
b/src/internfile/mh_exec.cpp |
|
... |
|
... |
19 |
#include "mh_exec.h"
|
19 |
#include "mh_exec.h"
|
20 |
#include "mh_html.h"
|
20 |
#include "mh_html.h"
|
21 |
#include "debuglog.h"
|
21 |
#include "debuglog.h"
|
22 |
#include "cancelcheck.h"
|
22 |
#include "cancelcheck.h"
|
23 |
#include "smallut.h"
|
23 |
#include "smallut.h"
|
24 |
#include "transcode.h"
|
|
|
25 |
#include "md5.h"
|
24 |
#include "md5.h"
|
26 |
#include "rclconfig.h"
|
25 |
#include "rclconfig.h"
|
27 |
|
26 |
|
28 |
#include <sys/types.h>
|
27 |
#include <sys/types.h>
|
29 |
#include <sys/wait.h>
|
28 |
#include <sys/wait.h>
|
|
... |
|
... |
144 |
return true;
|
143 |
return true;
|
145 |
}
|
144 |
}
|
146 |
|
145 |
|
147 |
void MimeHandlerExec::finaldetails()
|
146 |
void MimeHandlerExec::finaldetails()
|
148 |
{
|
147 |
{
|
149 |
string& output = m_metaData[cstr_content];
|
148 |
m_metaData[cstr_origcharset] = m_dfltInputCharset;
|
150 |
|
149 |
|
151 |
// If output is text/plain (not text/html), we may have to convert
|
|
|
152 |
// it to utf-8, because this is the last point where it can be done.
|
|
|
153 |
// cfgFilterOutputCharset comes from the mimeconf filter definition line
|
150 |
// cfgFilterOutputCharset comes from the mimeconf filter definition line
|
|
|
151 |
string& charset = m_metaData[cstr_charset];
|
154 |
string charset = cfgFilterOutputCharset.empty() ? "utf-8" :
|
152 |
charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset;
|
155 |
cfgFilterOutputCharset;
|
|
|
156 |
bool trustcharset = true;
|
|
|
157 |
if (!stringlowercmp("default", charset)) {
|
153 |
if (!stringlowercmp("default", charset)) {
|
158 |
charset = m_dfltInputCharset;
|
154 |
charset = m_dfltInputCharset;
|
159 |
trustcharset = false;
|
|
|
160 |
}
|
155 |
}
|
|
|
156 |
|
|
|
157 |
string& mt = m_metaData[cstr_mimetype];
|
161 |
string mt = cfgFilterOutputMtype.empty() ? "text/html" :
|
158 |
mt = cfgFilterOutputMtype.empty() ? "text/html" :
|
162 |
cfgFilterOutputMtype;
|
159 |
cfgFilterOutputMtype;
|
163 |
|
160 |
|
164 |
// If this is text/plain and not utf-8 or untrusted, transcode to utf-8.
|
161 |
// If this is text/plain transcode_to/check utf-8
|
165 |
if (!mt.compare(cstr_textplain) &&
|
162 |
if (!mt.compare(cstr_textplain)) {
|
166 |
(!trustcharset || stringlowercmp("utf-8", charset))) {
|
163 |
(void)txtdcode("mh_exec");
|
167 |
string transcoded;
|
|
|
168 |
int ecnt;
|
|
|
169 |
if (!transcode(output, transcoded, charset, "UTF-8", &ecnt)) {
|
|
|
170 |
LOGERR(("mh_exec: transcode failed from [%s] to UTF-8\n",
|
|
|
171 |
charset.c_str()));
|
|
|
172 |
// Erase text in this case: it's garbage
|
|
|
173 |
output.clear();
|
|
|
174 |
} else {
|
|
|
175 |
if (ecnt) {
|
|
|
176 |
LOGDEB(("mh_exec: %d transcoding errors from [%s] to UTF-8\n",
|
|
|
177 |
ecnt, charset.c_str()));
|
|
|
178 |
}
|
|
|
179 |
output = transcoded;
|
|
|
180 |
charset = "utf-8";
|
|
|
181 |
}
|
|
|
182 |
}
|
164 |
}
|
183 |
|
|
|
184 |
// Success. Store some external metadata
|
|
|
185 |
|
|
|
186 |
// Original charset. Can't be too sure about this actually. It's
|
|
|
187 |
// just a hint anyway
|
|
|
188 |
m_metaData["origcharset"] = m_dfltInputCharset;
|
|
|
189 |
|
|
|
190 |
// Supposed contents charset encoding. This could still be
|
|
|
191 |
// overridden by the content-type meta tag for html, but this is
|
|
|
192 |
// wasteful so we hope it's correct
|
|
|
193 |
m_metaData[cstr_charset] = charset;
|
|
|
194 |
m_metaData[cstr_mimetype] = mt;
|
|
|
195 |
|
165 |
|
196 |
string md5, xmd5, reason;
|
166 |
string md5, xmd5, reason;
|
197 |
if (MD5File(m_fn, md5, &reason)) {
|
167 |
if (MD5File(m_fn, md5, &reason)) {
|
198 |
m_metaData["md5"] = MD5HexPrint(md5, xmd5);
|
168 |
m_metaData["md5"] = MD5HexPrint(md5, xmd5);
|
199 |
} else {
|
169 |
} else {
|