|
a/src/internfile/mh_text.cpp |
|
b/src/internfile/mh_text.cpp |
|
... |
|
... |
30 |
#include "cstr.h"
|
30 |
#include "cstr.h"
|
31 |
#include "mh_text.h"
|
31 |
#include "mh_text.h"
|
32 |
#include "csguess.h"
|
32 |
#include "csguess.h"
|
33 |
#include "debuglog.h"
|
33 |
#include "debuglog.h"
|
34 |
#include "readfile.h"
|
34 |
#include "readfile.h"
|
35 |
#include "transcode.h"
|
|
|
36 |
#include "md5.h"
|
35 |
#include "md5.h"
|
37 |
#include "rclconfig.h"
|
36 |
#include "rclconfig.h"
|
38 |
|
37 |
|
39 |
const int MB = 1024*1024;
|
38 |
const int MB = 1024*1024;
|
40 |
const int KB = 1024;
|
39 |
const int KB = 1024;
|
|
... |
|
... |
115 |
LOGDEB(("MimeHandlerText::next_document: m_havedoc %d\n", int(m_havedoc)));
|
114 |
LOGDEB(("MimeHandlerText::next_document: m_havedoc %d\n", int(m_havedoc)));
|
116 |
|
115 |
|
117 |
if (m_havedoc == false)
|
116 |
if (m_havedoc == false)
|
118 |
return false;
|
117 |
return false;
|
119 |
|
118 |
|
120 |
// We transcode even if defcharset is already utf-8:
|
119 |
// We transcode even if defcharset is supposedly already utf-8:
|
121 |
// this validates the encoding.
|
120 |
// this validates the encoding.
|
122 |
LOGDEB1(("MimeHandlerText::mkDoc: transcod from %s to utf-8\n",
|
|
|
123 |
m_dfltInputCharset.c_str()));
|
|
|
124 |
int ecnt;
|
|
|
125 |
bool ret;
|
|
|
126 |
string& itext = m_metaData[cstr_content];
|
|
|
127 |
if (!(ret=transcode(m_text, itext, m_dfltInputCharset, "UTF-8", &ecnt)) ||
|
|
|
128 |
ecnt > int(itext.size() / 4)) {
|
|
|
129 |
LOGERR(("MimeHandlerText::mkDoc: transcode to utf-8 failed "
|
|
|
130 |
"for input charset [%s] ret %d ecnt %d\n",
|
|
|
131 |
m_dfltInputCharset.c_str(), ret, ecnt));
|
|
|
132 |
itext.erase();
|
|
|
133 |
return false;
|
|
|
134 |
}
|
|
|
135 |
m_metaData["origcharset"] = m_dfltInputCharset;
|
121 |
m_metaData[cstr_origcharset] = m_dfltInputCharset;
|
136 |
m_metaData[cstr_charset] = "utf-8";
|
|
|
137 |
m_metaData[cstr_mimetype] = cstr_textplain;
|
122 |
m_metaData[cstr_mimetype] = cstr_textplain;
|
138 |
|
123 |
|
|
|
124 |
size_t srclen = m_text.length();
|
|
|
125 |
m_metaData[cstr_content].swap(m_text);
|
|
|
126 |
|
|
|
127 |
// txtdcode() truncates the text if transcoding fails
|
|
|
128 |
(void)txtdcode("mh_text");
|
|
|
129 |
|
|
|
130 |
|
139 |
// If text length is 0 (the file is empty or oversize), or we have
|
131 |
// If the text length is 0 (the file is empty or oversize), or we are
|
140 |
// read all at once, we're done
|
132 |
// not paging, we're done
|
141 |
if (m_text.length() == 0 || !m_paging) {
|
133 |
if (srclen == 0 || !m_paging) {
|
142 |
m_havedoc = false;
|
134 |
m_havedoc = false;
|
143 |
return true;
|
135 |
return true;
|
144 |
} else {
|
136 |
} else {
|
145 |
// Paging: set ipath then read next chunk.
|
137 |
// Paging: set ipath then read next chunk.
|
146 |
|
138 |
|
|
... |
|
... |
148 |
// records for small files (one for the file, one for the
|
140 |
// records for small files (one for the file, one for the
|
149 |
// first chunk). This is a hack. The right thing to do would
|
141 |
// first chunk). This is a hack. The right thing to do would
|
150 |
// be to use a different mtype for files over the page size,
|
142 |
// be to use a different mtype for files over the page size,
|
151 |
// and keep text/plain only for smaller files.
|
143 |
// and keep text/plain only for smaller files.
|
152 |
char buf[30];
|
144 |
char buf[30];
|
153 |
sprintf(buf, "%lld", (long long)(m_offs - m_text.length()));
|
145 |
sprintf(buf, "%lld", (long long)(m_offs - srclen));
|
154 |
if (m_offs - m_text.length() != 0)
|
146 |
if (m_offs - srclen != 0)
|
155 |
m_metaData[cstr_ipath] = buf;
|
147 |
m_metaData[cstr_ipath] = buf;
|
156 |
readnext();
|
148 |
readnext();
|
157 |
return true;
|
149 |
return true;
|
158 |
}
|
150 |
}
|
159 |
}
|
151 |
}
|
160 |
|
152 |
|
161 |
bool MimeHandlerText::readnext()
|
153 |
bool MimeHandlerText::readnext()
|
162 |
{
|
154 |
{
|
163 |
string reason;
|
155 |
string reason;
|
164 |
m_text.erase();
|
156 |
m_text.clear();
|
165 |
if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
|
157 |
if (!file_to_string(m_fn, m_text, m_offs, m_pagesz, &reason)) {
|
166 |
LOGERR(("MimeHandlerText: can't read file: %s\n", reason.c_str()));
|
158 |
LOGERR(("MimeHandlerText: can't read file: %s\n", reason.c_str()));
|
167 |
m_havedoc = false;
|
159 |
m_havedoc = false;
|
168 |
return false;
|
160 |
return false;
|
169 |
}
|
161 |
}
|