a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp
...
...
30
#include <map>
30
#include <map>
31
#ifndef NO_NAMESPACES
31
#ifndef NO_NAMESPACES
32
using namespace std;
32
using namespace std;
33
#endif /* NO_NAMESPACES */
33
#endif /* NO_NAMESPACES */
34
34
35
#include "cstr.h"
35
#include "internfile.h"
36
#include "internfile.h"
36
#include "rcldoc.h"
37
#include "rcldoc.h"
37
#include "mimetype.h"
38
#include "mimetype.h"
38
#include "debuglog.h"
39
#include "debuglog.h"
39
#include "mimehandler.h"
40
#include "mimehandler.h"
...
...
49
50
50
#ifdef RCL_USE_XATTR
51
#ifdef RCL_USE_XATTR
51
#include "pxattr.h"
52
#include "pxattr.h"
52
#endif // RCL_USE_XATTR
53
#endif // RCL_USE_XATTR
53
54
54
static const string cstr_stxtplain("text/plain");
55
55
56
// The internal path element separator. This can't be the same as the rcldb 
56
// The internal path element separator. This can't be the same as the rcldb 
57
// file to ipath separator : "|"
57
// file to ipath separator : "|"
58
// We replace it with a control char if it comes out of a filter (ie:
58
// We replace it with a control char if it comes out of a filter (ie:
59
// rclzip or rclchm can do this). If you want the SOH control char
59
// rclzip or rclchm can do this). If you want the SOH control char
...
...
204
204
205
    // This is used by filters which manage some kind of cache.
205
    // This is used by filters which manage some kind of cache.
206
    // Indexing by udi makes things easier (because they sometimes get a temp 
206
    // Indexing by udi makes things easier (because they sometimes get a temp 
207
    // as input
207
    // as input
208
    string udi;
208
    string udi;
209
    make_udi(f, "", udi);
209
    make_udi(f, cstr_null, udi);
210
210
211
    cnf->setKeyDir(path_getfather(m_fn));
211
    cnf->setKeyDir(path_getfather(m_fn));
212
212
213
    string l_mime;
213
    string l_mime;
214
    bool usfci = false;
214
    bool usfci = false;
...
...
364
    m_forPreview = ((flags & FIF_forPreview) != 0);
364
    m_forPreview = ((flags & FIF_forPreview) != 0);
365
    // Initialize handler stack.
365
    // Initialize handler stack.
366
    m_handlers.reserve(MAXHANDLERS);
366
    m_handlers.reserve(MAXHANDLERS);
367
    for (unsigned int i = 0; i < MAXHANDLERS; i++)
367
    for (unsigned int i = 0; i < MAXHANDLERS; i++)
368
    m_tmpflgs[i] = false;
368
    m_tmpflgs[i] = false;
369
    m_targetMType = cstr_stxtplain;
369
    m_targetMType = cstr_textplain;
370
}
370
}
371
371
372
// We used a single beagle cache object to access beagle data. We protect it 
372
// We used a single beagle cache object to access beagle data. We protect it 
373
// against multiple thread access.
373
// against multiple thread access.
374
static PTMutexInit o_beagler_mutex;
374
static PTMutexInit o_beagler_mutex;
...
...
396
        backend = it->second;
396
        backend = it->second;
397
    
397
    
398
    if (backend.empty() || !backend.compare("FS")) {
398
    if (backend.empty() || !backend.compare("FS")) {
399
        // Filesystem document. Intern from file.
399
        // Filesystem document. Intern from file.
400
        // The url has to be like file://
400
        // The url has to be like file://
401
        if (idoc.url.find("file://") != 0) {
401
        if (idoc.url.find(cstr_fileu) != 0) {
402
            LOGERR(("FileInterner: FS backend and non fs url: [%s]\n",
402
            LOGERR(("FileInterner: FS backend and non fs url: [%s]\n",
403
                    idoc.url.c_str()));
403
                    idoc.url.c_str()));
404
            return;
404
            return;
405
        }
405
        }
406
        string fn = idoc.url.substr(7, string::npos);
406
        string fn = idoc.url.substr(7, string::npos);
...
...
563
}
563
}
564
564
565
// These defs are for the Dijon meta array. Rcl::Doc predefined field
565
// These defs are for the Dijon meta array. Rcl::Doc predefined field
566
// names are used where appropriate. In some cases, Rcl::Doc names are
566
// names are used where appropriate. In some cases, Rcl::Doc names are
567
// used inside the Dijon metadata (ex: origcharset)
567
// used inside the Dijon metadata (ex: origcharset)
568
static const string cstr_keyau("author");
569
static const string cstr_keycs("charset");
570
static const string cstr_keyct("content");
571
static const string cstr_keyds("description");
568
static const string cstr_keyds("description");
572
static const string cstr_keyfn("filename");
569
static const string cstr_keyfn("filename");
573
static const string cstr_keymd("modificationdate");
570
static const string cstr_keymd("modificationdate");
574
static const string cstr_keymt("mimetype");
575
static const string cstr_keytt("title");
571
static const string cstr_keytt("title");
576
572
577
bool FileInterner::dijontorcl(Rcl::Doc& doc)
573
bool FileInterner::dijontorcl(Rcl::Doc& doc)
578
{
574
{
579
    Dijon::Filter *df = m_handlers.back();
575
    Dijon::Filter *df = m_handlers.back();
...
...
584
    }
580
    }
585
    const map<string, string>& docdata = df->get_meta_data();
581
    const map<string, string>& docdata = df->get_meta_data();
586
582
587
    for (map<string,string>::const_iterator it = docdata.begin(); 
583
    for (map<string,string>::const_iterator it = docdata.begin(); 
588
     it != docdata.end(); it++) {
584
     it != docdata.end(); it++) {
589
    if (it->first == cstr_keyct) {
585
    if (it->first == cstr_content) {
590
        doc.text = it->second;
586
        doc.text = it->second;
591
    } else if (it->first == cstr_keymd) {
587
    } else if (it->first == cstr_keymd) {
592
        doc.dmtime = it->second;
588
        doc.dmtime = it->second;
593
    } else if (it->first == Rcl::Doc::keyoc) {
589
    } else if (it->first == Rcl::Doc::keyoc) {
594
        doc.origcharset = it->second;
590
        doc.origcharset = it->second;
595
    } else if (it->first == cstr_keymt || it->first == cstr_keycs) {
591
    } else if (it->first == cstr_mimetype || it->first == cstr_charset) {
596
        // don't need/want these.
592
        // don't need/want these.
597
    } else {
593
    } else {
598
        doc.meta[it->first] = it->second;
594
        doc.meta[it->first] = it->second;
599
    }
595
    }
600
    }
596
    }
...
...
630
626
631
    string ipathel;
627
    string ipathel;
632
    for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
628
    for (vector<Dijon::Filter*>::const_iterator hit = m_handlers.begin();
633
     hit != m_handlers.end(); hit++) {
629
     hit != m_handlers.end(); hit++) {
634
    const map<string, string>& docdata = (*hit)->get_meta_data();
630
    const map<string, string>& docdata = (*hit)->get_meta_data();
635
    if (getKeyValue(docdata, "ipath", ipathel)) {
631
    if (getKeyValue(docdata, cstr_ipath, ipathel)) {
636
        if (!ipathel.empty()) {
632
        if (!ipathel.empty()) {
637
        // We have a non-empty ipath
633
        // We have a non-empty ipath
638
        hasipath = true;
634
        hasipath = true;
639
        getKeyValue(docdata, cstr_keymt, doc.mimetype);
635
        getKeyValue(docdata, cstr_mimetype, doc.mimetype);
640
        getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
636
        getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
641
        }
637
        }
642
        doc.ipath += colon_hide(ipathel) + cstr_isep;
638
        doc.ipath += colon_hide(ipathel) + cstr_isep;
643
    } else {
639
    } else {
644
        doc.ipath += cstr_isep;
640
        doc.ipath += cstr_isep;
645
    }
641
    }
646
    getKeyValue(docdata, cstr_keyau, doc.meta[Rcl::Doc::keyau]);
642
    getKeyValue(docdata, cstr_author, doc.meta[Rcl::Doc::keyau]);
647
    getKeyValue(docdata, cstr_keymd, doc.dmtime);
643
    getKeyValue(docdata, cstr_keymd, doc.dmtime);
648
    }
644
    }
649
645
650
    // Trim empty tail elements in ipath.
646
    // Trim empty tail elements in ipath.
651
    if (hasipath) {
647
    if (hasipath) {
...
...
680
// and possibly add a filter/handler to the stack
676
// and possibly add a filter/handler to the stack
681
int FileInterner::addHandler()
677
int FileInterner::addHandler()
682
{
678
{
683
    const map<string, string>& docdata = m_handlers.back()->get_meta_data();
679
    const map<string, string>& docdata = m_handlers.back()->get_meta_data();
684
    string charset, mimetype;
680
    string charset, mimetype;
685
    getKeyValue(docdata, cstr_keycs, charset);
681
    getKeyValue(docdata, cstr_charset, charset);
686
    getKeyValue(docdata, cstr_keymt, mimetype);
682
    getKeyValue(docdata, cstr_mimetype, mimetype);
687
683
688
    LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
684
    LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
689
685
690
    // If we find a document of the target type (text/plain in
686
    // If we find a document of the target type (text/plain in
691
    // general), we're done decoding. If we hit text/plain, we're done
687
    // general), we're done decoding. If we hit text/plain, we're done
692
    // in any case
688
    // in any case
693
    if (!stringicmp(mimetype, m_targetMType) || 
689
    if (!stringicmp(mimetype, m_targetMType) || 
694
    !stringicmp(mimetype, cstr_stxtplain)) {
690
    !stringicmp(mimetype, cstr_textplain)) {
695
    m_reachedMType = mimetype;
691
    m_reachedMType = mimetype;
696
    LOGDEB1(("FileInterner::addHandler: target reached\n"));
692
    LOGDEB1(("FileInterner::addHandler: target reached\n"));
697
    return ADD_BREAK;
693
    return ADD_BREAK;
698
    }
694
    }
699
695
...
...
722
    // copying the text, which may be big.
718
    // copying the text, which may be big.
723
    string ns;
719
    string ns;
724
    const string *txt = &ns;
720
    const string *txt = &ns;
725
    {
721
    {
726
    map<string,string>::const_iterator it;
722
    map<string,string>::const_iterator it;
727
    it = docdata.find(cstr_keyct);
723
    it = docdata.find(cstr_content);
728
    if (it != docdata.end())
724
    if (it != docdata.end())
729
        txt = &it->second;
725
        txt = &it->second;
730
    }
726
    }
731
    bool setres = false;
727
    bool setres = false;
732
    if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
728
    if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {