Switch to unified view

a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp
...
...
30
#include "cstr.h"
30
#include "cstr.h"
31
#include "mh_text.h"
31
#include "mh_text.h"
32
#include "csguess.h"
32
#include "csguess.h"
33
#include "debuglog.h"
33
#include "debuglog.h"
34
#include "readfile.h"
34
#include "readfile.h"
35
#include "transcode.h"
36
#include "md5.h"
35
#include "md5.h"
37
#include "rclconfig.h"
36
#include "rclconfig.h"
38
37
39
const int MB = 1024*1024;
38
const int MB = 1024*1024;
40
const int KB = 1024;
39
const int KB = 1024;
...
...
115
    LOGDEB(("MimeHandlerText::next_document: m_havedoc %d\n", int(m_havedoc)));
114
    LOGDEB(("MimeHandlerText::next_document: m_havedoc %d\n", int(m_havedoc)));
116
115
117
    if (m_havedoc == false)
116
    if (m_havedoc == false)
118
    return false;
117
    return false;
119
118
120
    // We transcode even if defcharset is already utf-8: 
119
    // We transcode even if defcharset is supposedly already utf-8:
121
    // this validates the encoding.
120
    // this validates the encoding.
122
    LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n", 
123
       m_dfltInputCharset.c_str()));
124
    int ecnt;
125
    bool ret;
126
    string& itext = m_metaData[cstr_content];
127
    if (!(ret=transcode(m_text, itext, m_dfltInputCharset, "UTF-8", &ecnt)) || 
128
  ecnt > int(itext.size() / 4)) {
129
  LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
130
      "for input charset [%s] ret %d ecnt %d\n", 
131
      m_dfltInputCharset.c_str(), ret, ecnt));
132
  itext.erase();
133
  return false;
134
    }
135
    m_metaData["origcharset"] = m_dfltInputCharset;
121
    m_metaData[cstr_origcharset] = m_dfltInputCharset;
136
    m_metaData[cstr_charset] = "utf-8";
137
    m_metaData[cstr_mimetype] = cstr_textplain;
122
    m_metaData[cstr_mimetype] = cstr_textplain;
138
123
124
    size_t srclen = m_text.length();
125
    m_metaData[cstr_content].swap(m_text);
126
127
    // txtdcode() truncates the text if transcoding fails
128
    (void)txtdcode("mh_text");
129
130
139
    // If text length is 0 (the file is empty or oversize), or we have
131
    // If the text length is 0 (the file is empty or oversize), or we are 
140
    // read all at once, we're done
132
    // not paging, we're done
141
    if (m_text.length() == 0 || !m_paging) {
133
    if (srclen == 0 || !m_paging) {
142
        m_havedoc = false;
134
        m_havedoc = false;
143
        return true;
135
        return true;
144
    } else {
136
    } else {
145
        // Paging: set ipath then read next chunk. 
137
        // Paging: set ipath then read next chunk. 
146
138
...
...
148
        // records for small files (one for the file, one for the
140
        // records for small files (one for the file, one for the
149
        // first chunk). This is a hack. The right thing to do would
141
        // first chunk). This is a hack. The right thing to do would
150
        // be to use a different mtype for files over the page size,
142
        // be to use a different mtype for files over the page size,
151
        // and keep text/plain only for smaller files.
143
        // and keep text/plain only for smaller files.
152
        char buf[30];
144
        char buf[30];
153
        sprintf(buf, "%lld", (long long)(m_offs - m_text.length()));
145
        sprintf(buf, "%lld", (long long)(m_offs - srclen));
154
        if (m_offs - m_text.length() != 0)
146
        if (m_offs - srclen != 0)
155
            m_metaData[cstr_ipath] = buf;
147
            m_metaData[cstr_ipath] = buf;
156
        readnext();
148
        readnext();
157
        return true;
149
        return true;
158
    }
150
    }
159
}
151
}
160
152
161
bool MimeHandlerText::readnext()
153
bool MimeHandlerText::readnext()
162
{
154
{
163
    string reason;
155
    string reason;
164
    m_text.erase();
156
    m_text.clear();
165
    if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
157
    if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
166
        LOGERR(("MimeHandlerText: can't read file: %s\n", reason.c_str()));
158
        LOGERR(("MimeHandlerText: can't read file: %s\n", reason.c_str()));
167
        m_havedoc = false;
159
        m_havedoc = false;
168
        return false;
160
        return false;
169
    }
161
    }