Switch to unified view

a/src/internfile/mh_exec.cpp b/src/internfile/mh_exec.cpp
...
...
19
#include "mh_exec.h"
19
#include "mh_exec.h"
20
#include "mh_html.h"
20
#include "mh_html.h"
21
#include "debuglog.h"
21
#include "debuglog.h"
22
#include "cancelcheck.h"
22
#include "cancelcheck.h"
23
#include "smallut.h"
23
#include "smallut.h"
24
#include "transcode.h"
25
#include "md5.h"
24
#include "md5.h"
26
#include "rclconfig.h"
25
#include "rclconfig.h"
27
26
28
#include <sys/types.h>
27
#include <sys/types.h>
29
#include <sys/wait.h>
28
#include <sys/wait.h>
...
...
144
    return true;
143
    return true;
145
}
144
}
146
145
147
void MimeHandlerExec::finaldetails()
146
void MimeHandlerExec::finaldetails()
148
{
147
{
149
    string& output = m_metaData[cstr_content];
148
    m_metaData[cstr_origcharset] = m_dfltInputCharset;
150
149
151
    // If output is text/plain (not text/html), we may have to convert
152
    // it to utf-8, because this is the last point where it can be done.
153
    // cfgFilterOutputCharset comes from the mimeconf filter definition line
150
    // cfgFilterOutputCharset comes from the mimeconf filter definition line
151
    string& charset = m_metaData[cstr_charset];
154
    string charset = cfgFilterOutputCharset.empty() ? "utf-8" : 
152
    charset = cfgFilterOutputCharset.empty() ? "UTF-8" : cfgFilterOutputCharset;
155
  cfgFilterOutputCharset;
156
    bool trustcharset = true;
157
    if (!stringlowercmp("default", charset)) {
153
    if (!stringlowercmp("default", charset)) {
158
    charset = m_dfltInputCharset;
154
    charset = m_dfltInputCharset;
159
  trustcharset = false;
160
    }
155
    }
156
157
    string& mt = m_metaData[cstr_mimetype];
161
    string mt = cfgFilterOutputMtype.empty() ? "text/html" : 
158
    mt = cfgFilterOutputMtype.empty() ? "text/html" : 
162
    cfgFilterOutputMtype;
159
    cfgFilterOutputMtype;
163
160
164
    // If this is text/plain and not utf-8 or untrusted, transcode to utf-8.
161
    // If this is text/plain transcode_to/check utf-8
165
    if (!mt.compare(cstr_textplain) && 
162
    if (!mt.compare(cstr_textplain)) {
166
  (!trustcharset || stringlowercmp("utf-8", charset))) {
163
  (void)txtdcode("mh_exec");
167
  string transcoded;
168
  int ecnt;
169
  if (!transcode(output, transcoded, charset, "UTF-8", &ecnt)) {
170
      LOGERR(("mh_exec: transcode failed from [%s] to UTF-8\n",
171
          charset.c_str()));
172
      // Erase text in this case: it's garbage
173
      output.clear();
174
  } else {
175
      if (ecnt) {
176
      LOGDEB(("mh_exec: %d transcoding errors  from [%s] to UTF-8\n",
177
          ecnt, charset.c_str()));
178
      }
179
      output = transcoded;
180
      charset = "utf-8";
181
  }
182
    }
164
    }
183
184
    // Success. Store some external metadata
185
186
    // Original charset. Can't be too sure about this actually. It's
187
    // just a hint anyway
188
    m_metaData["origcharset"] = m_dfltInputCharset;
189
190
    // Supposed contents charset encoding. This could still be
191
    // overridden by the content-type meta tag for html, but this is
192
    // wasteful so we hope it's correct
193
    m_metaData[cstr_charset] = charset;
194
    m_metaData[cstr_mimetype] = mt;
195
165
196
    string md5, xmd5, reason;
166
    string md5, xmd5, reason;
197
    if (MD5File(m_fn, md5, &reason)) {
167
    if (MD5File(m_fn, md5, &reason)) {
198
    m_metaData["md5"] = MD5HexPrint(md5, xmd5);
168
    m_metaData["md5"] = MD5HexPrint(md5, xmd5);
199
    } else {
169
    } else {