|
a/src/internfile/mh_text.cpp |
|
b/src/internfile/mh_text.cpp |
|
... |
|
... |
31 |
#include "mh_text.h"
|
31 |
#include "mh_text.h"
|
32 |
#include "debuglog.h"
|
32 |
#include "debuglog.h"
|
33 |
#include "readfile.h"
|
33 |
#include "readfile.h"
|
34 |
#include "md5.h"
|
34 |
#include "md5.h"
|
35 |
#include "rclconfig.h"
|
35 |
#include "rclconfig.h"
|
|
|
36 |
#include "pxattr.h"
|
36 |
|
37 |
|
37 |
const int MB = 1024*1024;
|
38 |
const int MB = 1024*1024;
|
38 |
const int KB = 1024;
|
39 |
const int KB = 1024;
|
39 |
|
40 |
|
40 |
// Process a plain text file
|
41 |
// Process a plain text file
|
|
... |
|
... |
50 |
if (stat(m_fn.c_str(), &st) < 0) {
|
51 |
if (stat(m_fn.c_str(), &st) < 0) {
|
51 |
LOGERR(("MimeHandlerText::set_document_file: stat(%s) errno %d\n",
|
52 |
LOGERR(("MimeHandlerText::set_document_file: stat(%s) errno %d\n",
|
52 |
m_fn.c_str(), errno));
|
53 |
m_fn.c_str(), errno));
|
53 |
return false;
|
54 |
return false;
|
54 |
}
|
55 |
}
|
|
|
56 |
|
|
|
57 |
// Check for charset defined in extended attribute as per:
|
|
|
58 |
// http://freedesktop.org/wiki/CommonExtendedAttributes
|
|
|
59 |
pxattr::get(m_fn, "charset", &m_charsetfromxattr);
|
55 |
|
60 |
|
56 |
// Max file size parameter: texts over this size are not indexed
|
61 |
// Max file size parameter: texts over this size are not indexed
|
57 |
int maxmbs = 20;
|
62 |
int maxmbs = 20;
|
58 |
m_config->getConfParam("textfilemaxmbs", &maxmbs);
|
63 |
m_config->getConfParam("textfilemaxmbs", &maxmbs);
|
59 |
|
64 |
|
|
... |
|
... |
113 |
LOGDEB(("MimeHandlerText::next_document: m_havedoc %d\n", int(m_havedoc)));
|
118 |
LOGDEB(("MimeHandlerText::next_document: m_havedoc %d\n", int(m_havedoc)));
|
114 |
|
119 |
|
115 |
if (m_havedoc == false)
|
120 |
if (m_havedoc == false)
|
116 |
return false;
|
121 |
return false;
|
117 |
|
122 |
|
118 |
// We transcode even if defcharset is supposedly already utf-8:
|
123 |
if (m_charsetfromxattr.empty())
|
119 |
// this validates the encoding.
|
|
|
120 |
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
124 |
m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
|
|
|
125 |
else
|
|
|
126 |
m_metaData[cstr_dj_keyorigcharset] = m_charsetfromxattr;
|
|
|
127 |
|
121 |
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
128 |
m_metaData[cstr_dj_keymt] = cstr_textplain;
|
122 |
|
129 |
|
123 |
size_t srclen = m_text.length();
|
130 |
size_t srclen = m_text.length();
|
124 |
m_metaData[cstr_dj_keycontent].swap(m_text);
|
131 |
m_metaData[cstr_dj_keycontent].swap(m_text);
|
125 |
|
132 |
|
|
|
133 |
// We transcode even if defcharset is supposedly already utf-8:
|
|
|
134 |
// this validates the encoding.
|
126 |
// txtdcode() truncates the text if transcoding fails
|
135 |
// txtdcode() truncates the text if transcoding fails
|
127 |
(void)txtdcode("mh_text");
|
136 |
(void)txtdcode("mh_text");
|
128 |
|
137 |
|
129 |
|
138 |
|
130 |
// If the text length is 0 (the file is empty or oversize), or we are
|
139 |
// If the text length is 0 (the file is empty or oversize), or we are
|