|
a/src/internfile/mh_text.cpp |
|
b/src/internfile/mh_text.cpp |
|
... |
|
... |
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
16 |
*/
|
16 |
*/
|
17 |
#include "autoconfig.h"
|
17 |
#include "autoconfig.h"
|
18 |
|
18 |
|
19 |
#include <stdio.h>
|
19 |
#include <stdio.h>
|
20 |
#include <unistd.h>
|
|
|
21 |
#include <sys/stat.h>
|
|
|
22 |
#include <errno.h>
|
20 |
#include <errno.h>
|
23 |
|
21 |
|
24 |
#include <iostream>
|
22 |
#include <iostream>
|
25 |
#include <string>
|
23 |
#include <string>
|
26 |
#ifndef NO_NAMESPACES
|
|
|
27 |
using namespace std;
|
24 |
using namespace std;
|
28 |
#endif /* NO_NAMESPACES */
|
|
|
29 |
|
25 |
|
30 |
#include "cstr.h"
|
26 |
#include "cstr.h"
|
31 |
#include "mh_text.h"
|
27 |
#include "mh_text.h"
|
32 |
#include "debuglog.h"
|
28 |
#include "debuglog.h"
|
33 |
#include "readfile.h"
|
29 |
#include "readfile.h"
|
34 |
#include "md5ut.h"
|
30 |
#include "md5ut.h"
|
35 |
#include "rclconfig.h"
|
31 |
#include "rclconfig.h"
|
36 |
#include "pxattr.h"
|
32 |
#include "pxattr.h"
|
|
|
33 |
#include "pathut.h"
|
37 |
|
34 |
|
38 |
const int MB = 1024*1024;
|
35 |
const int MB = 1024*1024;
|
39 |
const int KB = 1024;
|
36 |
const int KB = 1024;
|
40 |
|
37 |
|
41 |
// Process a plain text file
|
38 |
// Process a plain text file
|
|
... |
|
... |
45 |
|
42 |
|
46 |
RecollFilter::set_document_file(mt, fn);
|
43 |
RecollFilter::set_document_file(mt, fn);
|
47 |
m_fn = fn;
|
44 |
m_fn = fn;
|
48 |
|
45 |
|
49 |
// file size for oversize check
|
46 |
// file size for oversize check
|
50 |
struct stat st;
|
47 |
long long fsize = path_filesize(m_fn);
|
51 |
if (stat(m_fn.c_str(), &st) < 0) {
|
48 |
if (fsize < 0) {
|
52 |
LOGERR(("MimeHandlerText::set_document_file: stat(%s) errno %d\n",
|
49 |
LOGERR(("MimeHandlerText::set_document_file: stat(%s) errno %d\n",
|
53 |
m_fn.c_str(), errno));
|
50 |
m_fn.c_str(), errno));
|
54 |
return false;
|
51 |
return false;
|
55 |
}
|
52 |
}
|
56 |
|
53 |
|
|
... |
|
... |
60 |
|
57 |
|
61 |
// Max file size parameter: texts over this size are not indexed
|
58 |
// Max file size parameter: texts over this size are not indexed
|
62 |
int maxmbs = 20;
|
59 |
int maxmbs = 20;
|
63 |
m_config->getConfParam("textfilemaxmbs", &maxmbs);
|
60 |
m_config->getConfParam("textfilemaxmbs", &maxmbs);
|
64 |
|
61 |
|
65 |
if (maxmbs == -1 || st.st_size / MB <= maxmbs) {
|
62 |
if (maxmbs == -1 || fsize / MB <= maxmbs) {
|
66 |
// Text file page size: if set, we split text files into
|
63 |
// Text file page size: if set, we split text files into
|
67 |
// multiple documents
|
64 |
// multiple documents
|
68 |
int ps = 1000;
|
65 |
int ps = 1000;
|
69 |
m_config->getConfParam("textfilepagekbs", &ps);
|
66 |
m_config->getConfParam("textfilepagekbs", &ps);
|
70 |
if (ps != -1) {
|
67 |
if (ps != -1) {
|