Switch to unified view

a/src/internfile/mh_exec.cpp b/src/internfile/mh_exec.cpp
...
...
145
145
146
void MimeHandlerExec::finaldetails()
146
void MimeHandlerExec::finaldetails()
147
{
147
{
148
    string& output = m_metaData["content"];
148
    string& output = m_metaData["content"];
149
149
150
    // if output is text/plain (not text/html), we may have to convert
150
    // If output is text/plain (not text/html), we may have to convert
151
    // it to utf-8, because this is the last point where it can be done.
151
    // it to utf-8. cfgCharset comes from the mimeconf filter definition line
152
    // cfgFilterOutputCharset comes from the mimeconf filter definition line
152
    string charset = cfgCharset.empty() ? "utf-8" : cfgCharset;
153
    string charset = cfgFilterOutputCharset.empty() ? "utf-8" : 
154
  cfgFilterOutputCharset;
155
    if (!stringlowercmp("default", charset)) {
156
  charset = m_dfltInputCharset;
157
    }
153
    string mt = cfgMtype.empty() ? "text/html" : cfgMtype;
158
    string mt = cfgFilterOutputMtype.empty() ? "text/html" : 
159
  cfgFilterOutputMtype;
154
    if (!mt.compare("text/plain") && charset.compare("utf-8")) {
160
    if (!mt.compare("text/plain") && stringlowercmp("utf-8", charset)) {
155
    string transcoded;
161
    string transcoded;
156
    int ecnt;
162
    int ecnt;
157
    if (!transcode(output, transcoded, charset, "UTF-8", &ecnt)) {
163
    if (!transcode(output, transcoded, charset, "UTF-8", &ecnt)) {
158
        LOGERR(("mh_exec: transcode failed from [%s] to UTF-8\n",
164
        LOGERR(("mh_exec: transcode failed from [%s] to UTF-8\n",
159
            charset.c_str()));
165
            charset.c_str()));
...
...
161
        if (ecnt) {
167
        if (ecnt) {
162
        LOGDEB(("mh_exec: %d transcoding errors  from [%s] to UTF-8\n",
168
        LOGDEB(("mh_exec: %d transcoding errors  from [%s] to UTF-8\n",
163
            ecnt, charset.c_str()));
169
            ecnt, charset.c_str()));
164
        }
170
        }
165
        output = transcoded;
171
        output = transcoded;
172
      charset = "utf-8";
166
    }
173
    }
167
    }
174
    }
175
168
    // Success. Store some external metadata
176
    // Success. Store some external metadata
177
178
    // Original charset. Can't be too sure about this actually. It's
179
    // just a hint anyway
169
    m_metaData["origcharset"] = m_defcharset;
180
    m_metaData["origcharset"] = m_dfltInputCharset;
170
    // Default charset: all recoll filters output utf-8, but this
181
182
    // Supposed contents charset encoding. This could still be
171
    // could still be overridden by the content-type meta tag for html
183
    // overridden by the content-type meta tag for html, but this is
184
    // wasteful so we hope it's correct
172
    m_metaData["charset"] = charset;
185
    m_metaData["charset"] = charset;
173
    m_metaData["mimetype"] = mt;
186
    m_metaData["mimetype"] = mt;
174
187
175
    string md5, xmd5, reason;
188
    string md5, xmd5, reason;
176
    if (MD5File(m_fn, md5, &reason)) {
189
    if (MD5File(m_fn, md5, &reason)) {