|
a/src/internfile/internfile.cpp |
|
b/src/internfile/internfile.cpp |
|
... |
|
... |
30 |
#include <map>
|
30 |
#include <map>
|
31 |
#ifndef NO_NAMESPACES
|
31 |
#ifndef NO_NAMESPACES
|
32 |
using namespace std;
|
32 |
using namespace std;
|
33 |
#endif /* NO_NAMESPACES */
|
33 |
#endif /* NO_NAMESPACES */
|
34 |
|
34 |
|
|
|
35 |
#include "cstr.h"
|
35 |
#include "internfile.h"
|
36 |
#include "internfile.h"
|
36 |
#include "rcldoc.h"
|
37 |
#include "rcldoc.h"
|
37 |
#include "mimetype.h"
|
38 |
#include "mimetype.h"
|
38 |
#include "debuglog.h"
|
39 |
#include "debuglog.h"
|
39 |
#include "mimehandler.h"
|
40 |
#include "mimehandler.h"
|
|
... |
|
... |
49 |
|
50 |
|
50 |
#ifdef RCL_USE_XATTR
|
51 |
#ifdef RCL_USE_XATTR
|
51 |
#include "pxattr.h"
|
52 |
#include "pxattr.h"
|
52 |
#endif // RCL_USE_XATTR
|
53 |
#endif // RCL_USE_XATTR
|
53 |
|
54 |
|
54 |
static const string cstr_stxtplain("text/plain");
|
|
|
55 |
|
55 |
|
56 |
// The internal path element separator. This can't be the same as the rcldb
|
56 |
// The internal path element separator. This can't be the same as the rcldb
|
57 |
// file to ipath separator : "|"
|
57 |
// file to ipath separator : "|"
|
58 |
// We replace it with a control char if it comes out of a filter (ie:
|
58 |
// We replace it with a control char if it comes out of a filter (ie:
|
59 |
// rclzip or rclchm can do this). If you want the SOH control char
|
59 |
// rclzip or rclchm can do this). If you want the SOH control char
|
|
... |
|
... |
204 |
|
204 |
|
205 |
// This is used by filters which manage some kind of cache.
|
205 |
// This is used by filters which manage some kind of cache.
|
206 |
// Indexing by udi makes things easier (because they sometimes get a temp
|
206 |
// Indexing by udi makes things easier (because they sometimes get a temp
|
207 |
// as input
|
207 |
// as input
|
208 |
string udi;
|
208 |
string udi;
|
209 |
make_udi(f, "", udi);
|
209 |
make_udi(f, cstr_null, udi);
|
210 |
|
210 |
|
211 |
cnf->setKeyDir(path_getfather(m_fn));
|
211 |
cnf->setKeyDir(path_getfather(m_fn));
|
212 |
|
212 |
|
213 |
string l_mime;
|
213 |
string l_mime;
|
214 |
bool usfci = false;
|
214 |
bool usfci = false;
|
|
... |
|
... |
364 |
m_forPreview = ((flags & FIF_forPreview) != 0);
|
364 |
m_forPreview = ((flags & FIF_forPreview) != 0);
|
365 |
// Initialize handler stack.
|
365 |
// Initialize handler stack.
|
366 |
m_handlers.reserve(MAXHANDLERS);
|
366 |
m_handlers.reserve(MAXHANDLERS);
|
367 |
for (unsigned int i = 0; i < MAXHANDLERS; i++)
|
367 |
for (unsigned int i = 0; i < MAXHANDLERS; i++)
|
368 |
m_tmpflgs[i] = false;
|
368 |
m_tmpflgs[i] = false;
|
369 |
m_targetMType = cstr_stxtplain;
|
369 |
m_targetMType = cstr_textplain;
|
370 |
}
|
370 |
}
|
371 |
|
371 |
|
372 |
// We used a single beagle cache object to access beagle data. We protect it
|
372 |
// We used a single beagle cache object to access beagle data. We protect it
|
373 |
// against multiple thread access.
|
373 |
// against multiple thread access.
|
374 |
static PTMutexInit o_beagler_mutex;
|
374 |
static PTMutexInit o_beagler_mutex;
|
|
... |
|
... |
396 |
backend = it->second;
|
396 |
backend = it->second;
|
397 |
|
397 |
|
398 |
if (backend.empty() || !backend.compare("FS")) {
|
398 |
if (backend.empty() || !backend.compare("FS")) {
|
399 |
// Filesystem document. Intern from file.
|
399 |
// Filesystem document. Intern from file.
|
400 |
// The url has to be like file://
|
400 |
// The url has to be like file://
|
401 |
if (idoc.url.find("file://") != 0) {
|
401 |
if (idoc.url.find(cstr_fileu) != 0) {
|
402 |
LOGERR(("FileInterner: FS backend and non fs url: [%s]\n",
|
402 |
LOGERR(("FileInterner: FS backend and non fs url: [%s]\n",
|
403 |
idoc.url.c_str()));
|
403 |
idoc.url.c_str()));
|
404 |
return;
|
404 |
return;
|
405 |
}
|
405 |
}
|
406 |
string fn = idoc.url.substr(7, string::npos);
|
406 |
string fn = idoc.url.substr(7, string::npos);
|
|
... |
|
... |
563 |
}
|
563 |
}
|
564 |
|
564 |
|
565 |
// These defs are for the Dijon meta array. Rcl::Doc predefined field
|
565 |
// These defs are for the Dijon meta array. Rcl::Doc predefined field
|
566 |
// names are used where appropriate. In some cases, Rcl::Doc names are
|
566 |
// names are used where appropriate. In some cases, Rcl::Doc names are
|
567 |
// used inside the Dijon metadata (ex: origcharset)
|
567 |
// used inside the Dijon metadata (ex: origcharset)
|
568 |
static const string cstr_keyau("author");
|
|
|
569 |
static const string cstr_keycs("charset");
|
|
|
570 |
static const string cstr_keyct("content");
|
|
|
571 |
static const string cstr_keyds("description");
|
568 |
static const string cstr_keyds("description");
|
572 |
static const string cstr_keyfn("filename");
|
569 |
static const string cstr_keyfn("filename");
|
573 |
static const string cstr_keymd("modificationdate");
|
570 |
static const string cstr_keymd("modificationdate");
|
574 |
static const string cstr_keymt("mimetype");
|
|
|
575 |
static const string cstr_keytt("title");
|
571 |
static const string cstr_keytt("title");
|
576 |
|
572 |
|
577 |
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
573 |
bool FileInterner::dijontorcl(Rcl::Doc& doc)
|
578 |
{
|
574 |
{
|
579 |
Dijon::Filter *df = m_handlers.back();
|
575 |
Dijon::Filter *df = m_handlers.back();
|
|
... |
|
... |
584 |
}
|
580 |
}
|
585 |
const map<string, string>& docdata = df->get_meta_data();
|
581 |
const map<string, string>& docdata = df->get_meta_data();
|
586 |
|
582 |
|
587 |
for (map<string,string>::const_iterator it = docdata.begin();
|
583 |
for (map<string,string>::const_iterator it = docdata.begin();
|
588 |
it != docdata.end(); it++) {
|
584 |
it != docdata.end(); it++) {
|
589 |
if (it->first == cstr_keyct) {
|
585 |
if (it->first == cstr_content) {
|
590 |
doc.text = it->second;
|
586 |
doc.text = it->second;
|
591 |
} else if (it->first == cstr_keymd) {
|
587 |
} else if (it->first == cstr_keymd) {
|
592 |
doc.dmtime = it->second;
|
588 |
doc.dmtime = it->second;
|
593 |
} else if (it->first == Rcl::Doc::keyoc) {
|
589 |
} else if (it->first == Rcl::Doc::keyoc) {
|
594 |
doc.origcharset = it->second;
|
590 |
doc.origcharset = it->second;
|
595 |
} else if (it->first == cstr_keymt || it->first == cstr_keycs) {
|
591 |
} else if (it->first == cstr_mimetype || it->first == cstr_charset) {
|
596 |
// don't need/want these.
|
592 |
// don't need/want these.
|
597 |
} else {
|
593 |
} else {
|
598 |
doc.meta[it->first] = it->second;
|
594 |
doc.meta[it->first] = it->second;
|
599 |
}
|
595 |
}
|
600 |
}
|
596 |
}
|
|
... |
|
... |
630 |
|
626 |
|
631 |
string ipathel;
|
627 |
string ipathel;
|
632 |
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
628 |
for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
|
633 |
hit != m_handlers.end(); hit++) {
|
629 |
hit != m_handlers.end(); hit++) {
|
634 |
const map<string, string>& docdata = (*hit)->get_meta_data();
|
630 |
const map<string, string>& docdata = (*hit)->get_meta_data();
|
635 |
if (getKeyValue(docdata, "ipath", ipathel)) {
|
631 |
if (getKeyValue(docdata, cstr_ipath, ipathel)) {
|
636 |
if (!ipathel.empty()) {
|
632 |
if (!ipathel.empty()) {
|
637 |
// We have a non-empty ipath
|
633 |
// We have a non-empty ipath
|
638 |
hasipath = true;
|
634 |
hasipath = true;
|
639 |
getKeyValue(docdata, cstr_keymt, doc.mimetype);
|
635 |
getKeyValue(docdata, cstr_mimetype, doc.mimetype);
|
640 |
getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
|
636 |
getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
|
641 |
}
|
637 |
}
|
642 |
doc.ipath += colon_hide(ipathel) + cstr_isep;
|
638 |
doc.ipath += colon_hide(ipathel) + cstr_isep;
|
643 |
} else {
|
639 |
} else {
|
644 |
doc.ipath += cstr_isep;
|
640 |
doc.ipath += cstr_isep;
|
645 |
}
|
641 |
}
|
646 |
getKeyValue(docdata, cstr_keyau, doc.meta[Rcl::Doc::keyau]);
|
642 |
getKeyValue(docdata, cstr_author, doc.meta[Rcl::Doc::keyau]);
|
647 |
getKeyValue(docdata, cstr_keymd, doc.dmtime);
|
643 |
getKeyValue(docdata, cstr_keymd, doc.dmtime);
|
648 |
}
|
644 |
}
|
649 |
|
645 |
|
650 |
// Trim empty tail elements in ipath.
|
646 |
// Trim empty tail elements in ipath.
|
651 |
if (hasipath) {
|
647 |
if (hasipath) {
|
|
... |
|
... |
680 |
// and possibly add a filter/handler to the stack
|
676 |
// and possibly add a filter/handler to the stack
|
681 |
int FileInterner::addHandler()
|
677 |
int FileInterner::addHandler()
|
682 |
{
|
678 |
{
|
683 |
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
679 |
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
684 |
string charset, mimetype;
|
680 |
string charset, mimetype;
|
685 |
getKeyValue(docdata, cstr_keycs, charset);
|
681 |
getKeyValue(docdata, cstr_charset, charset);
|
686 |
getKeyValue(docdata, cstr_keymt, mimetype);
|
682 |
getKeyValue(docdata, cstr_mimetype, mimetype);
|
687 |
|
683 |
|
688 |
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
|
684 |
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
|
689 |
|
685 |
|
690 |
// If we find a document of the target type (text/plain in
|
686 |
// If we find a document of the target type (text/plain in
|
691 |
// general), we're done decoding. If we hit text/plain, we're done
|
687 |
// general), we're done decoding. If we hit text/plain, we're done
|
692 |
// in any case
|
688 |
// in any case
|
693 |
if (!stringicmp(mimetype, m_targetMType) ||
|
689 |
if (!stringicmp(mimetype, m_targetMType) ||
|
694 |
!stringicmp(mimetype, cstr_stxtplain)) {
|
690 |
!stringicmp(mimetype, cstr_textplain)) {
|
695 |
m_reachedMType = mimetype;
|
691 |
m_reachedMType = mimetype;
|
696 |
LOGDEB1(("FileInterner::addHandler: target reached\n"));
|
692 |
LOGDEB1(("FileInterner::addHandler: target reached\n"));
|
697 |
return ADD_BREAK;
|
693 |
return ADD_BREAK;
|
698 |
}
|
694 |
}
|
699 |
|
695 |
|
|
... |
|
... |
722 |
// copying the text, which may be big.
|
718 |
// copying the text, which may be big.
|
723 |
string ns;
|
719 |
string ns;
|
724 |
const string *txt = &ns;
|
720 |
const string *txt = &ns;
|
725 |
{
|
721 |
{
|
726 |
map<string,string>::const_iterator it;
|
722 |
map<string,string>::const_iterator it;
|
727 |
it = docdata.find(cstr_keyct);
|
723 |
it = docdata.find(cstr_content);
|
728 |
if (it != docdata.end())
|
724 |
if (it != docdata.end())
|
729 |
txt = &it->second;
|
725 |
txt = &it->second;
|
730 |
}
|
726 |
}
|
731 |
bool setres = false;
|
727 |
bool setres = false;
|
732 |
if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
728 |
if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|