|
a/src/internfile/internfile.cpp |
|
b/src/internfile/internfile.cpp |
|
... |
|
... |
44 |
#include "mh_html.h"
|
44 |
#include "mh_html.h"
|
45 |
#include "fileudi.h"
|
45 |
#include "fileudi.h"
|
46 |
#include "cancelcheck.h"
|
46 |
#include "cancelcheck.h"
|
47 |
#include "copyfile.h"
|
47 |
#include "copyfile.h"
|
48 |
#include "fetcher.h"
|
48 |
#include "fetcher.h"
|
49 |
|
|
|
50 |
#ifdef RCL_USE_XATTR
|
|
|
51 |
#include "pxattr.h"
|
49 |
#include "extrameta.h"
|
52 |
#endif // RCL_USE_XATTR
|
|
|
53 |
|
|
|
54 |
|
50 |
|
55 |
// The internal path element separator. This can't be the same as the rcldb
|
51 |
// The internal path element separator. This can't be the same as the rcldb
|
56 |
// file to ipath separator : "|"
|
52 |
// file to ipath separator : "|"
|
57 |
// We replace it with a control char if it comes out of a filter (ie:
|
53 |
// We replace it with a control char if it comes out of a filter (ie:
|
58 |
// rclzip or rclchm can do this). If you want the SOH control char
|
54 |
// rclzip or rclchm can do this). If you want the SOH control char
|
|
... |
|
... |
73 |
string out;
|
69 |
string out;
|
74 |
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
|
70 |
for (string::const_iterator it = in.begin(); it != in.end(); it++) {
|
75 |
out += *it == cchar_colon_repl ? ':' : *it;
|
71 |
out += *it == cchar_colon_repl ? ':' : *it;
|
76 |
}
|
72 |
}
|
77 |
return out;
|
73 |
return out;
|
78 |
}
|
|
|
79 |
|
|
|
80 |
#ifdef RCL_USE_XATTR
|
|
|
81 |
void FileInterner::reapXAttrs(const string& path)
|
|
|
82 |
{
|
|
|
83 |
LOGDEB2(("FileInterner::reapXAttrs: [%s]\n", path.c_str()));
|
|
|
84 |
|
|
|
85 |
// Retrieve xattrs names from files and mapping table from config
|
|
|
86 |
vector<string> xnames;
|
|
|
87 |
if (!pxattr::list(path, &xnames)) {
|
|
|
88 |
LOGERR(("FileInterner::reapXattrs: pxattr::list: errno %d\n", errno));
|
|
|
89 |
return;
|
|
|
90 |
}
|
|
|
91 |
const map<string, string>& xtof = m_cfg->getXattrToField();
|
|
|
92 |
|
|
|
93 |
// Record the xattrs: names found in the config are either skipped
|
|
|
94 |
// or mapped depending if the translation is empty. Other names
|
|
|
95 |
// are recorded as-is
|
|
|
96 |
for (vector<string>::const_iterator it = xnames.begin();
|
|
|
97 |
it != xnames.end(); it++) {
|
|
|
98 |
string key = *it;
|
|
|
99 |
map<string, string>::const_iterator mit = xtof.find(*it);
|
|
|
100 |
if (mit != xtof.end()) {
|
|
|
101 |
if (mit->second.empty()) {
|
|
|
102 |
continue;
|
|
|
103 |
} else {
|
|
|
104 |
key = mit->second;
|
|
|
105 |
}
|
|
|
106 |
}
|
|
|
107 |
string value;
|
|
|
108 |
if (!pxattr::get(path, *it, &value, pxattr::PXATTR_NOFOLLOW)) {
|
|
|
109 |
LOGERR(("FileInterner::reapXattrs: pxattr::get failed"
|
|
|
110 |
"for %s, errno %d\n", (*it).c_str(), errno));
|
|
|
111 |
continue;
|
|
|
112 |
}
|
|
|
113 |
// Encode should we ?
|
|
|
114 |
m_XAttrsFields[key] = value;
|
|
|
115 |
LOGDEB2(("FileInterner::reapXAttrs: [%s] -> [%s]\n",
|
|
|
116 |
key.c_str(), value.c_str()));
|
|
|
117 |
}
|
|
|
118 |
}
|
|
|
119 |
#endif // RCL_USE_XATTR
|
|
|
120 |
|
|
|
121 |
void FileInterner::reapCmdMetadata(const string& fn)
|
|
|
122 |
{
|
|
|
123 |
const vector<MDReaper>& reapers = m_cfg->getMDReapers();
|
|
|
124 |
if (reapers.empty())
|
|
|
125 |
return;
|
|
|
126 |
map<char,string> smap = create_map<char, string>('f', fn);
|
|
|
127 |
for (vector<MDReaper>::const_iterator rp = reapers.begin();
|
|
|
128 |
rp != reapers.end(); rp++) {
|
|
|
129 |
vector<string> cmd;
|
|
|
130 |
for (vector<string>::const_iterator it = rp->cmdv.begin();
|
|
|
131 |
it != rp->cmdv.end(); it++) {
|
|
|
132 |
string s;
|
|
|
133 |
pcSubst(*it, s, smap);
|
|
|
134 |
cmd.push_back(s);
|
|
|
135 |
}
|
|
|
136 |
string output;
|
|
|
137 |
if (ExecCmd::backtick(cmd, output)) {
|
|
|
138 |
m_cmdFields[rp->fieldname] = output;
|
|
|
139 |
}
|
|
|
140 |
}
|
|
|
141 |
}
|
74 |
}
|
142 |
|
75 |
|
143 |
// This is used when the user wants to retrieve a search result doc's parent
|
76 |
// This is used when the user wants to retrieve a search result doc's parent
|
144 |
// (ie message having a given attachment)
|
77 |
// (ie message having a given attachment)
|
145 |
bool FileInterner::getEnclosingUDI(const Rcl::Doc &doc, string& udi)
|
78 |
bool FileInterner::getEnclosingUDI(const Rcl::Doc &doc, string& udi)
|
|
... |
|
... |
298 |
#ifdef RCL_USE_XATTR
|
231 |
#ifdef RCL_USE_XATTR
|
299 |
// Get fields computed from extended attributes. We use the
|
232 |
// Get fields computed from extended attributes. We use the
|
300 |
// original file, not the m_fn which may be the uncompressed temp
|
233 |
// original file, not the m_fn which may be the uncompressed temp
|
301 |
// file
|
234 |
// file
|
302 |
if (!m_noxattrs)
|
235 |
if (!m_noxattrs)
|
303 |
reapXAttrs(f);
|
236 |
reapXAttrs(m_cfg, f, m_XAttrsFields);
|
304 |
#endif //RCL_USE_XATTR
|
237 |
#endif //RCL_USE_XATTR
|
305 |
reapCmdMetadata(f);
|
238 |
|
|
|
239 |
// Gather metadata from external commands as configured.
|
|
|
240 |
reapMetaCmds(m_cfg, f, m_cmdFields);
|
306 |
|
241 |
|
307 |
df->set_docsize(docsize);
|
242 |
df->set_docsize(docsize);
|
308 |
if (!df->set_document_file(l_mime, m_fn)) {
|
243 |
if (!df->set_document_file(l_mime, m_fn)) {
|
309 |
delete df;
|
244 |
delete df;
|
310 |
LOGERR(("FileInterner:: error converting %s\n", m_fn.c_str()));
|
245 |
LOGERR(("FileInterner:: error converting %s\n", m_fn.c_str()));
|
|
... |
|
... |
617 |
doc.meta.erase(cstr_dj_keyds);
|
552 |
doc.meta.erase(cstr_dj_keyds);
|
618 |
}
|
553 |
}
|
619 |
return true;
|
554 |
return true;
|
620 |
}
|
555 |
}
|
621 |
|
556 |
|
622 |
static void docfieldfrommeta(RclConfig* cfg, const string& name,
|
|
|
623 |
const string &value, Rcl::Doc& doc)
|
|
|
624 |
{
|
|
|
625 |
string fieldname = cfg->fieldCanon(name);
|
|
|
626 |
LOGDEB0(("Internfile:: setting [%s] from cmd value [%s]\n",
|
|
|
627 |
fieldname.c_str(), value.c_str()));
|
|
|
628 |
if (fieldname == cstr_dj_keymd) {
|
|
|
629 |
doc.dmtime = value;
|
|
|
630 |
} else {
|
|
|
631 |
doc.meta[fieldname] = value;
|
|
|
632 |
}
|
|
|
633 |
}
|
|
|
634 |
|
|
|
635 |
// Collect the ipath from the current path in the document tree.
|
557 |
// Collect the ipath from the current path in the document tree.
|
636 |
// While we're at it, we also set the mimetype and filename,
|
558 |
// While we're at it, we also set the mimetype and filename,
|
637 |
// which are special properties: we want to get them from the topmost
|
559 |
// which are special properties: we want to get them from the topmost
|
638 |
// doc with an ipath, not the last one which is usually text/plain We
|
560 |
// doc with an ipath, not the last one which is usually text/plain We
|
639 |
// also set the author and modification time from the last doc which
|
561 |
// also set the author and modification time from the last doc which
|
|
... |
|
... |
652 |
LOGDEB2(("FileInterner::collectIpathAndMT\n"));
|
574 |
LOGDEB2(("FileInterner::collectIpathAndMT\n"));
|
653 |
bool hasipath = false;
|
575 |
bool hasipath = false;
|
654 |
|
576 |
|
655 |
#ifdef RCL_USE_XATTR
|
577 |
#ifdef RCL_USE_XATTR
|
656 |
if (!m_noxattrs) {
|
578 |
if (!m_noxattrs) {
|
657 |
// Set fields from extended file attributes.
|
579 |
docFieldsFromXattrs(m_cfg, m_XAttrsFields, doc);
|
658 |
// These can be later augmented by values from inside the file
|
|
|
659 |
for (map<string,string>::const_iterator it = m_XAttrsFields.begin();
|
|
|
660 |
it != m_XAttrsFields.end(); it++) {
|
|
|
661 |
LOGDEB1(("Internfile:: setting [%s] from xattrs value [%s]\n",
|
|
|
662 |
m_cfg->fieldCanon(it->first).c_str(), it->second.c_str()));
|
|
|
663 |
doc.meta[m_cfg->fieldCanon(it->first)] = it->second;
|
|
|
664 |
}
|
|
|
665 |
}
|
580 |
}
|
666 |
#endif //RCL_USE_XATTR
|
581 |
#endif //RCL_USE_XATTR
|
667 |
|
582 |
|
668 |
// Set fields from external commands
|
583 |
docFieldsFromMetaCmds(m_cfg, m_cmdFields, doc);
|
669 |
// These override those from xattrs and can be later augmented by
|
|
|
670 |
// values from inside the file.
|
|
|
671 |
//
|
|
|
672 |
// This is a bit atrocious because some entry names are special:
|
|
|
673 |
// "modificationdate" will set mtime instead of an ordinary field,
|
|
|
674 |
// and the output from anything beginning with "rclmulti" will be
|
|
|
675 |
// interpreted as multiple fields in configuration file format...
|
|
|
676 |
for (map<string,string>::const_iterator it = m_cmdFields.begin();
|
|
|
677 |
it != m_cmdFields.end(); it++) {
|
|
|
678 |
if (!it->first.compare(0, 8, "rclmulti")) {
|
|
|
679 |
ConfSimple simple(it->second);
|
|
|
680 |
if (simple.ok()) {
|
|
|
681 |
vector<string> names = simple.getNames("");
|
|
|
682 |
for (vector<string>::const_iterator nm = names.begin();
|
|
|
683 |
nm != names.end(); nm++) {
|
|
|
684 |
string value;
|
|
|
685 |
if (simple.get(*nm, value)) {
|
|
|
686 |
docfieldfrommeta(m_cfg, *nm, value, doc);
|
|
|
687 |
}
|
|
|
688 |
}
|
|
|
689 |
}
|
|
|
690 |
} else {
|
|
|
691 |
docfieldfrommeta(m_cfg, it->first, it->second, doc);
|
|
|
692 |
}
|
|
|
693 |
}
|
|
|
694 |
|
584 |
|
695 |
// If there is no ipath stack, the mimetype is the one from the file
|
585 |
// If there is no ipath stack, the mimetype is the one from the file
|
696 |
doc.mimetype = m_mimetype;
|
586 |
doc.mimetype = m_mimetype;
|
697 |
|
587 |
|
698 |
string ipathel;
|
588 |
string ipathel;
|