Switch to unified view

a/src/internfile/mh_text.cpp b/src/internfile/mh_text.cpp
...
...
31
#include "mh_text.h"
31
#include "mh_text.h"
32
#include "debuglog.h"
32
#include "debuglog.h"
33
#include "readfile.h"
33
#include "readfile.h"
34
#include "md5.h"
34
#include "md5.h"
35
#include "rclconfig.h"
35
#include "rclconfig.h"
36
#include "pxattr.h"
36
37
37
const int MB = 1024*1024;
38
const int MB = 1024*1024;
38
const int KB = 1024;
39
const int KB = 1024;
39
40
40
// Process a plain text file
41
// Process a plain text file
...
...
50
    if (stat(m_fn.c_str(), &st) < 0) {
51
    if (stat(m_fn.c_str(), &st) < 0) {
51
        LOGERR(("MimeHandlerText::set_document_file: stat(%s) errno %d\n",
52
        LOGERR(("MimeHandlerText::set_document_file: stat(%s) errno %d\n",
52
                m_fn.c_str(), errno));
53
                m_fn.c_str(), errno));
53
        return false;
54
        return false;
54
    }
55
    }
56
57
    // Check for charset defined in extended attribute as per:
58
    // http://freedesktop.org/wiki/CommonExtendedAttributes
59
    pxattr::get(m_fn, "charset", &m_charsetfromxattr);
55
60
56
    // Max file size parameter: texts over this size are not indexed
61
    // Max file size parameter: texts over this size are not indexed
57
    int maxmbs = 20;
62
    int maxmbs = 20;
58
    m_config->getConfParam("textfilemaxmbs", &maxmbs);
63
    m_config->getConfParam("textfilemaxmbs", &maxmbs);
59
64
...
...
113
    LOGDEB(("MimeHandlerText::next_document: m_havedoc %d\n", int(m_havedoc)));
118
    LOGDEB(("MimeHandlerText::next_document: m_havedoc %d\n", int(m_havedoc)));
114
119
115
    if (m_havedoc == false)
120
    if (m_havedoc == false)
116
    return false;
121
    return false;
117
122
118
    // We transcode even if defcharset is supposedly already utf-8:
123
    if (m_charsetfromxattr.empty())
119
    // this validates the encoding.
120
    m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
124
  m_metaData[cstr_dj_keyorigcharset] = m_dfltInputCharset;
125
    else 
126
  m_metaData[cstr_dj_keyorigcharset] = m_charsetfromxattr;
127
121
    m_metaData[cstr_dj_keymt] = cstr_textplain;
128
    m_metaData[cstr_dj_keymt] = cstr_textplain;
122
129
123
    size_t srclen = m_text.length();
130
    size_t srclen = m_text.length();
124
    m_metaData[cstr_dj_keycontent].swap(m_text);
131
    m_metaData[cstr_dj_keycontent].swap(m_text);
125
132
133
    // We transcode even if defcharset is supposedly already utf-8:
134
    // this validates the encoding.
126
    // txtdcode() truncates the text if transcoding fails
135
    // txtdcode() truncates the text if transcoding fails
127
    (void)txtdcode("mh_text");
136
    (void)txtdcode("mh_text");
128
137
129
138
130
    // If the text length is 0 (the file is empty or oversize), or we are 
139
    // If the text length is 0 (the file is empty or oversize), or we are