|
a/src/internfile/internfile.cpp |
|
b/src/internfile/internfile.cpp |
|
... |
|
... |
42 |
#include "execmd.h"
|
42 |
#include "execmd.h"
|
43 |
#include "pathut.h"
|
43 |
#include "pathut.h"
|
44 |
#include "wipedir.h"
|
44 |
#include "wipedir.h"
|
45 |
#include "rclconfig.h"
|
45 |
#include "rclconfig.h"
|
46 |
#include "mh_html.h"
|
46 |
#include "mh_html.h"
|
|
|
47 |
#ifdef RCL_USE_XATTR
|
|
|
48 |
#include "pxattr.h"
|
|
|
49 |
#endif // RCL_USE_XATTR
|
47 |
|
50 |
|
48 |
// The internal path element separator. This can't be the same as the rcldb
|
51 |
// The internal path element separator. This can't be the same as the rcldb
|
49 |
// file to ipath separator : "|"
|
52 |
// file to ipath separator : "|"
|
50 |
static const string isep(":");
|
53 |
static const string isep(":");
|
51 |
static const string stxtplain("text/plain");
|
54 |
static const string stxtplain("text/plain");
|
52 |
|
55 |
|
53 |
set<string> FileInterner::o_missingExternal;
|
56 |
set<string> FileInterner::o_missingExternal;
|
54 |
map<string, set<string> > FileInterner::o_typesForMissing;
|
57 |
map<string, set<string> > FileInterner::o_typesForMissing;
|
|
|
58 |
|
|
|
59 |
#ifdef RCL_USE_XATTR
|
|
|
60 |
void FileInterner::reapXAttrs(const string& path)
|
|
|
61 |
{
|
|
|
62 |
vector<string> xnames;
|
|
|
63 |
if (!pxattr::list(path, &xnames)) {
|
|
|
64 |
LOGERR(("FileInterner::reapXattrs: pxattr::list: errno %d\n", errno));
|
|
|
65 |
return;
|
|
|
66 |
}
|
|
|
67 |
const map<string, string>& xtof = m_cfg->getXattrToField();
|
|
|
68 |
for (vector<string>::const_iterator it = xnames.begin();
|
|
|
69 |
it != xnames.end(); it++) {
|
|
|
70 |
map<string, string>::const_iterator mit;
|
|
|
71 |
if ((mit = xtof.find(*it)) != xtof.end()) {
|
|
|
72 |
string value;
|
|
|
73 |
if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) {
|
|
|
74 |
LOGERR(("FileInterner::reapXattrs: pxattr::get failed"
|
|
|
75 |
"for %s, errno %d\n", (*it).c_str(), errno));
|
|
|
76 |
continue;
|
|
|
77 |
}
|
|
|
78 |
// Encode should we ?
|
|
|
79 |
m_XAttrsFields[mit->second] = value;
|
|
|
80 |
}
|
|
|
81 |
}
|
|
|
82 |
}
|
|
|
83 |
#endif // RCL_USE_XATTR
|
55 |
|
84 |
|
56 |
// This is used when the user wants to retrieve a search result doc's parent
|
85 |
// This is used when the user wants to retrieve a search result doc's parent
|
57 |
// (ie message having a given attachment)
|
86 |
// (ie message having a given attachment)
|
58 |
bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
87 |
bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
59 |
string &eurl, string &eipath)
|
88 |
string &eurl, string &eipath)
|
|
... |
|
... |
191 |
Dijon::Filter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
|
220 |
Dijon::Filter *df = getMimeHandler(l_mime, m_cfg, !m_forPreview);
|
192 |
|
221 |
|
193 |
if (!df) {
|
222 |
if (!df) {
|
194 |
// No handler for this type, for now :( if indexallfilenames
|
223 |
// No handler for this type, for now :( if indexallfilenames
|
195 |
// is set in the config, this normally wont happen (we get mh_unknown)
|
224 |
// is set in the config, this normally wont happen (we get mh_unknown)
|
196 |
LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n", f.c_str(), l_mime.c_str()));
|
225 |
LOGERR(("FileInterner:: ignored: [%s] mime [%s]\n",
|
|
|
226 |
f.c_str(), l_mime.c_str()));
|
197 |
return;
|
227 |
return;
|
198 |
}
|
228 |
}
|
199 |
df->set_property(Dijon::Filter::OPERATING_MODE,
|
229 |
df->set_property(Dijon::Filter::OPERATING_MODE,
|
200 |
m_forPreview ? "view" : "index");
|
230 |
m_forPreview ? "view" : "index");
|
201 |
|
231 |
|
202 |
string charset = m_cfg->getDefCharset();
|
232 |
string charset = m_cfg->getDefCharset();
|
203 |
df->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
233 |
df->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
|
|
234 |
|
|
|
235 |
#ifdef RCL_USE_XATTR
|
|
|
236 |
// Get fields computed from extended attributes. We use the
|
|
|
237 |
// original file, not the m_fn which may be the uncompressed temp
|
|
|
238 |
// file
|
|
|
239 |
reapXAttrs(f);
|
|
|
240 |
#endif //RCL_USE_XATTR
|
|
|
241 |
|
204 |
if (!df->set_document_file(m_fn)) {
|
242 |
if (!df->set_document_file(m_fn)) {
|
205 |
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
|
243 |
LOGERR(("FileInterner:: error parsing %s\n", m_fn.c_str()));
|
206 |
return;
|
244 |
return;
|
207 |
}
|
245 |
}
|
208 |
m_handlers.reserve(MAXHANDLERS);
|
246 |
m_handlers.reserve(MAXHANDLERS);
|
|
... |
|
... |
361 |
}
|
399 |
}
|
362 |
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[keyds].empty()) {
|
400 |
if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[keyds].empty()) {
|
363 |
doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds];
|
401 |
doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds];
|
364 |
doc.meta.erase(keyds);
|
402 |
doc.meta.erase(keyds);
|
365 |
}
|
403 |
}
|
366 |
#ifdef RCL_USE_XATTR
|
|
|
367 |
// Finally set any data possibly coming out of the extended file attributes
|
|
|
368 |
// these override any values from inside the file.
|
|
|
369 |
RecollFilter *rf = dynamic_cast<RecollFilter*>(df);
|
|
|
370 |
if (rf != 0) {
|
|
|
371 |
const map<string, string>& ffa = rf->getFieldsFromAttrs();
|
|
|
372 |
for (map<string,string>::const_iterator it = ffa.begin();
|
|
|
373 |
it != ffa.end(); it++) {
|
|
|
374 |
doc.meta[it->first] = it->second;
|
|
|
375 |
}
|
|
|
376 |
}
|
|
|
377 |
#endif //RCL_USE_XATTR
|
|
|
378 |
return true;
|
404 |
return true;
|
379 |
}
|
405 |
}
|
380 |
|
406 |
|
381 |
// Collect the ipath from the current path in the document tree.
|
407 |
// Collect the ipath from the current path in the document tree.
|
382 |
// While we're at it, we also set the mimetype and filename, which are special
|
408 |
// While we're at it, we also set the mimetype and filename, which are special
|
|
... |
|
... |
385 |
// We also set the author and modification time from the last doc
|
411 |
// We also set the author and modification time from the last doc
|
386 |
// which has them.
|
412 |
// which has them.
|
387 |
void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
|
413 |
void FileInterner::collectIpathAndMT(Rcl::Doc& doc, string& ipath) const
|
388 |
{
|
414 |
{
|
389 |
bool hasipath = false;
|
415 |
bool hasipath = false;
|
|
|
416 |
|
|
|
417 |
#ifdef RCL_USE_XATTR
|
|
|
418 |
// Set fields from extended file attributes.
|
|
|
419 |
// These can be overriden by values from inside the file
|
|
|
420 |
for (map<string,string>::const_iterator it = m_XAttrsFields.begin();
|
|
|
421 |
it != m_XAttrsFields.end(); it++) {
|
|
|
422 |
doc.meta[it->first] = it->second;
|
|
|
423 |
}
|
|
|
424 |
#endif //RCL_USE_XATTR
|
390 |
|
425 |
|
391 |
// If there is no ipath stack, the mimetype is the one from the file
|
426 |
// If there is no ipath stack, the mimetype is the one from the file
|
392 |
doc.mimetype = m_mimetype;
|
427 |
doc.mimetype = m_mimetype;
|
393 |
|
428 |
|
394 |
string ipathel;
|
429 |
string ipathel;
|
|
... |
|
... |
608 |
LOGERR(("FileInterner::internfile: can't skip\n"));
|
643 |
LOGERR(("FileInterner::internfile: can't skip\n"));
|
609 |
return FIError;
|
644 |
return FIError;
|
610 |
}
|
645 |
}
|
611 |
}
|
646 |
}
|
612 |
breakloop:
|
647 |
breakloop:
|
613 |
|
|
|
614 |
if (m_handlers.empty()) {
|
648 |
if (m_handlers.empty()) {
|
615 |
LOGDEB(("FileInterner::internfile: conversion ended with no doc\n"));
|
649 |
LOGDEB(("FileInterner::internfile: conversion ended with no doc\n"));
|
616 |
return FIError;
|
650 |
return FIError;
|
617 |
}
|
651 |
}
|
618 |
|
652 |
|
619 |
// If indexing compute ipath and significant mimetype.
|
653 |
// If indexing compute ipath and significant mimetype. ipath is
|
620 |
// ipath is returned through the parameter not doc.ipath We also
|
654 |
// returned through the parameter not doc.ipath We also retrieve
|
621 |
// retrieve some metadata fields from the ancesters (like date or
|
655 |
// some metadata fields from the ancesters (like date or
|
622 |
// author). This is useful for email attachments. The values will
|
656 |
// author). This is useful for email attachments. The values will
|
623 |
// be replaced by those found by dijontorcl if any, so the order
|
657 |
// be replaced by those internal to the document (by dijontorcl())
|
624 |
// of calls is important.
|
658 |
// if any, so the order of calls is important.
|
625 |
if (!m_forPreview)
|
659 |
if (!m_forPreview)
|
626 |
collectIpathAndMT(doc, ipath);
|
660 |
collectIpathAndMT(doc, ipath);
|
627 |
else
|
661 |
else
|
628 |
doc.mimetype = m_reachedMType;
|
662 |
doc.mimetype = m_reachedMType;
|
629 |
|
663 |
|