|
a/src/internfile/mh_exec.cpp |
|
b/src/internfile/mh_exec.cpp |
|
... |
|
... |
145 |
|
145 |
|
146 |
void MimeHandlerExec::finaldetails()
|
146 |
void MimeHandlerExec::finaldetails()
|
147 |
{
|
147 |
{
|
148 |
string& output = m_metaData["content"];
|
148 |
string& output = m_metaData["content"];
|
149 |
|
149 |
|
150 |
// if output is text/plain (not text/html), we may have to convert
|
150 |
// If output is text/plain (not text/html), we may have to convert
|
|
|
151 |
// it to utf-8, because this is the last point where it can be done.
|
151 |
// it to utf-8. cfgCharset comes from the mimeconf filter definition line
|
152 |
// cfgFilterOutputCharset comes from the mimeconf filter definition line
|
152 |
string charset = cfgCharset.empty() ? "utf-8" : cfgCharset;
|
153 |
string charset = cfgFilterOutputCharset.empty() ? "utf-8" :
|
|
|
154 |
cfgFilterOutputCharset;
|
|
|
155 |
if (!stringlowercmp("default", charset)) {
|
|
|
156 |
charset = m_dfltInputCharset;
|
|
|
157 |
}
|
153 |
string mt = cfgMtype.empty() ? "text/html" : cfgMtype;
|
158 |
string mt = cfgFilterOutputMtype.empty() ? "text/html" :
|
|
|
159 |
cfgFilterOutputMtype;
|
154 |
if (!mt.compare("text/plain") && charset.compare("utf-8")) {
|
160 |
if (!mt.compare("text/plain") && stringlowercmp("utf-8", charset)) {
|
155 |
string transcoded;
|
161 |
string transcoded;
|
156 |
int ecnt;
|
162 |
int ecnt;
|
157 |
if (!transcode(output, transcoded, charset, "UTF-8", &ecnt)) {
|
163 |
if (!transcode(output, transcoded, charset, "UTF-8", &ecnt)) {
|
158 |
LOGERR(("mh_exec: transcode failed from [%s] to UTF-8\n",
|
164 |
LOGERR(("mh_exec: transcode failed from [%s] to UTF-8\n",
|
159 |
charset.c_str()));
|
165 |
charset.c_str()));
|
|
... |
|
... |
161 |
if (ecnt) {
|
167 |
if (ecnt) {
|
162 |
LOGDEB(("mh_exec: %d transcoding errors from [%s] to UTF-8\n",
|
168 |
LOGDEB(("mh_exec: %d transcoding errors from [%s] to UTF-8\n",
|
163 |
ecnt, charset.c_str()));
|
169 |
ecnt, charset.c_str()));
|
164 |
}
|
170 |
}
|
165 |
output = transcoded;
|
171 |
output = transcoded;
|
|
|
172 |
charset = "utf-8";
|
166 |
}
|
173 |
}
|
167 |
}
|
174 |
}
|
|
|
175 |
|
168 |
// Success. Store some external metadata
|
176 |
// Success. Store some external metadata
|
|
|
177 |
|
|
|
178 |
// Original charset. Can't be too sure about this actually. It's
|
|
|
179 |
// just a hint anyway
|
169 |
m_metaData["origcharset"] = m_defcharset;
|
180 |
m_metaData["origcharset"] = m_dfltInputCharset;
|
170 |
// Default charset: all recoll filters output utf-8, but this
|
181 |
|
|
|
182 |
// Supposed contents charset encoding. This could still be
|
171 |
// could still be overridden by the content-type meta tag for html
|
183 |
// overridden by the content-type meta tag for html, but this is
|
|
|
184 |
// wasteful so we hope it's correct
|
172 |
m_metaData["charset"] = charset;
|
185 |
m_metaData["charset"] = charset;
|
173 |
m_metaData["mimetype"] = mt;
|
186 |
m_metaData["mimetype"] = mt;
|
174 |
|
187 |
|
175 |
string md5, xmd5, reason;
|
188 |
string md5, xmd5, reason;
|
176 |
if (MD5File(m_fn, md5, &reason)) {
|
189 |
if (MD5File(m_fn, md5, &reason)) {
|