a/src/internfile/internfile.cpp b/src/internfile/internfile.cpp
...
...
49
49
50
#ifdef RCL_USE_XATTR
50
#ifdef RCL_USE_XATTR
51
#include "pxattr.h"
51
#include "pxattr.h"
52
#endif // RCL_USE_XATTR
52
#endif // RCL_USE_XATTR
53
53
54
static const string stxtplain("text/plain");
54
static const string cstr_stxtplain("text/plain");
55
55
56
// The internal path element separator. This can't be the same as the rcldb 
56
// The internal path element separator. This can't be the same as the rcldb 
57
// file to ipath separator : "|"
57
// file to ipath separator : "|"
58
// We replace it with a control char if it comes out of a filter (ie:
58
// We replace it with a control char if it comes out of a filter (ie:
59
// rclzip or rclchm can do this). If you want the SOH control char
59
// rclzip or rclchm can do this). If you want the SOH control char
60
// inside an ipath, you're out of luck (and a bit weird).
60
// inside an ipath, you're out of luck (and a bit weird).
61
static const string isep(":");
61
static const string cstr_isep(":");
62
62
static const char colon_repl = '\x01';
63
static const char cchar_colon_repl = '\x01';
63
static string colon_hide(const string& in)
64
static string colon_hide(const string& in)
64
{
65
{
65
    string out;
66
    string out;
66
    for (string::const_iterator it = in.begin(); it != in.end(); it++) {
67
    for (string::const_iterator it = in.begin(); it != in.end(); it++) {
67
    out += *it == ':' ? colon_repl : *it;
68
    out += *it == ':' ? cchar_colon_repl : *it;
68
    }
69
    }
69
    return out;
70
    return out;
70
}
71
}
71
static string colon_restore(const string& in)
72
static string colon_restore(const string& in)
72
{
73
{
73
    string out;
74
    string out;
74
    for (string::const_iterator it = in.begin(); it != in.end(); it++) {
75
    for (string::const_iterator it = in.begin(); it != in.end(); it++) {
75
    out += *it == colon_repl ? ':' : *it;
76
    out += *it == cchar_colon_repl ? ':' : *it;
76
    }
77
    }
77
    return out;
78
    return out;
78
}
79
}
79
80
80
#ifdef RCL_USE_XATTR
81
#ifdef RCL_USE_XATTR
...
...
113
    string::size_type colon;
114
    string::size_type colon;
114
    LOGDEB(("FileInterner::getEnclosing(): url [%s] ipath [%s]\n", 
115
    LOGDEB(("FileInterner::getEnclosing(): url [%s] ipath [%s]\n", 
115
        url.c_str(), eipath.c_str()));
116
        url.c_str(), eipath.c_str()));
116
    if (eipath.empty())
117
    if (eipath.empty())
117
    return false;
118
    return false;
118
    if ((colon =  eipath.find_last_of(isep)) != string::npos) {
119
    if ((colon =  eipath.find_last_of(cstr_isep)) != string::npos) {
119
    eipath.erase(colon);
120
    eipath.erase(colon);
120
    } else {
121
    } else {
121
    eipath.erase();
122
    eipath.erase();
122
    }
123
    }
123
    make_udi(url_gpath(eurl), eipath, udi);
124
    make_udi(url_gpath(eurl), eipath, udi);
...
...
363
    m_forPreview = ((flags & FIF_forPreview) != 0);
364
    m_forPreview = ((flags & FIF_forPreview) != 0);
364
    // Initialize handler stack.
365
    // Initialize handler stack.
365
    m_handlers.reserve(MAXHANDLERS);
366
    m_handlers.reserve(MAXHANDLERS);
366
    for (unsigned int i = 0; i < MAXHANDLERS; i++)
367
    for (unsigned int i = 0; i < MAXHANDLERS; i++)
367
    m_tmpflgs[i] = false;
368
    m_tmpflgs[i] = false;
368
    m_targetMType = stxtplain;
369
    m_targetMType = cstr_stxtplain;
369
}
370
}
370
371
371
// We used a single beagle cache object to access beagle data. We protect it 
372
// We used a single beagle cache object to access beagle data. We protect it 
372
// against multiple thread access.
373
// against multiple thread access.
373
static PTMutexInit o_lock;
374
static PTMutexInit o_beagler_mutex;
374
375
375
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, 
376
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, 
376
                           TempDir& td, int flags)
377
                           TempDir& td, int flags)
377
    : m_tdir(td), m_ok(false), m_missingdatap(0)
378
    : m_tdir(td), m_ok(false), m_missingdatap(0)
378
{
379
{
...
...
420
            return;
421
            return;
421
        }
422
        }
422
        string udi = it->second;
423
        string udi = it->second;
423
424
424
    {
425
    {
425
        PTMutexLocker locker(o_lock);
426
        PTMutexLocker locker(o_beagler_mutex);
426
        // Retrieve from our webcache (beagle data). The beagler
427
        // Retrieve from our webcache (beagle data). The beagler
427
        // object is created at the first call of this routine and
428
        // object is created at the first call of this routine and
428
        // deleted when the program exits.
429
        // deleted when the program exits.
429
        static BeagleQueueCache beagler(cnf);
430
        static BeagleQueueCache o_beagler(cnf);
430
        if (!beagler.getFromCache(udi, dotdoc, data)) {
431
        if (!o_beagler.getFromCache(udi, dotdoc, data)) {
431
        LOGINFO(("FileInterner:: failed fetch from Beagle cache for [%s]\n",
432
        LOGINFO(("FileInterner:: failed fetch from Beagle cache for [%s]\n",
432
             udi.c_str()));
433
             udi.c_str()));
433
        return;
434
        return;
434
        }
435
        }
435
    }
436
    }
...
...
562
}
563
}
563
564
564
// These defs are for the Dijon meta array. Rcl::Doc predefined field
565
// These defs are for the Dijon meta array. Rcl::Doc predefined field
565
// names are used where appropriate. In some cases, Rcl::Doc names are
566
// names are used where appropriate. In some cases, Rcl::Doc names are
566
// used inside the Dijon metadata (ex: origcharset)
567
// used inside the Dijon metadata (ex: origcharset)
567
static const string keyau("author");
568
static const string cstr_keyau("author");
568
static const string keycs("charset");
569
static const string cstr_keycs("charset");
569
static const string keyct("content");
570
static const string cstr_keyct("content");
570
static const string keyds("description");
571
static const string cstr_keyds("description");
571
static const string keyfn("filename");
572
static const string cstr_keyfn("filename");
572
static const string keymd("modificationdate");
573
static const string cstr_keymd("modificationdate");
573
static const string keymt("mimetype");
574
static const string cstr_keymt("mimetype");
574
static const string keytt("title");
575
static const string cstr_keytt("title");
575
576
576
bool FileInterner::dijontorcl(Rcl::Doc& doc)
577
bool FileInterner::dijontorcl(Rcl::Doc& doc)
577
{
578
{
578
    Dijon::Filter *df = m_handlers.back();
579
    Dijon::Filter *df = m_handlers.back();
579
    if (df == 0) {
580
    if (df == 0) {
...
...
583
    }
584
    }
584
    const map<string, string>& docdata = df->get_meta_data();
585
    const map<string, string>& docdata = df->get_meta_data();
585
586
586
    for (map<string,string>::const_iterator it = docdata.begin(); 
587
    for (map<string,string>::const_iterator it = docdata.begin(); 
587
     it != docdata.end(); it++) {
588
     it != docdata.end(); it++) {
588
    if (it->first == keyct) {
589
    if (it->first == cstr_keyct) {
589
        doc.text = it->second;
590
        doc.text = it->second;
590
    } else if (it->first == keymd) {
591
    } else if (it->first == cstr_keymd) {
591
        doc.dmtime = it->second;
592
        doc.dmtime = it->second;
592
    } else if (it->first == Rcl::Doc::keyoc) {
593
    } else if (it->first == Rcl::Doc::keyoc) {
593
        doc.origcharset = it->second;
594
        doc.origcharset = it->second;
594
    } else if (it->first == keymt || it->first == keycs) {
595
    } else if (it->first == cstr_keymt || it->first == cstr_keycs) {
595
        // don't need/want these.
596
        // don't need/want these.
596
    } else {
597
    } else {
597
        doc.meta[it->first] = it->second;
598
        doc.meta[it->first] = it->second;
598
    }
599
    }
599
    }
600
    }
600
    if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[keyds].empty()) {
601
    if (doc.meta[Rcl::Doc::keyabs].empty() && !doc.meta[cstr_keyds].empty()) {
601
    doc.meta[Rcl::Doc::keyabs] = doc.meta[keyds];
602
    doc.meta[Rcl::Doc::keyabs] = doc.meta[cstr_keyds];
602
    doc.meta.erase(keyds);
603
    doc.meta.erase(cstr_keyds);
603
    }
604
    }
604
    return true;
605
    return true;
605
}
606
}
606
607
607
// Collect the ipath from the current path in the document tree.
608
// Collect the ipath from the current path in the document tree.
...
...
633
    const map<string, string>& docdata = (*hit)->get_meta_data();
634
    const map<string, string>& docdata = (*hit)->get_meta_data();
634
    if (getKeyValue(docdata, "ipath", ipathel)) {
635
    if (getKeyValue(docdata, "ipath", ipathel)) {
635
        if (!ipathel.empty()) {
636
        if (!ipathel.empty()) {
636
        // We have a non-empty ipath
637
        // We have a non-empty ipath
637
        hasipath = true;
638
        hasipath = true;
638
        getKeyValue(docdata, keymt, doc.mimetype);
639
        getKeyValue(docdata, cstr_keymt, doc.mimetype);
639
        getKeyValue(docdata, keyfn, doc.utf8fn);
640
        getKeyValue(docdata, cstr_keyfn, doc.utf8fn);
640
        }
641
        }
641
        doc.ipath += colon_hide(ipathel) + isep;
642
        doc.ipath += colon_hide(ipathel) + cstr_isep;
642
    } else {
643
    } else {
643
        doc.ipath += isep;
644
        doc.ipath += cstr_isep;
644
    }
645
    }
645
    getKeyValue(docdata, keyau, doc.meta[Rcl::Doc::keyau]);
646
    getKeyValue(docdata, cstr_keyau, doc.meta[Rcl::Doc::keyau]);
646
    getKeyValue(docdata, keymd, doc.dmtime);
647
    getKeyValue(docdata, cstr_keymd, doc.dmtime);
647
    }
648
    }
648
649
649
    // Trim empty tail elements in ipath.
650
    // Trim empty tail elements in ipath.
650
    if (hasipath) {
651
    if (hasipath) {
651
    LOGDEB2(("IPATH [%s]\n", doc.ipath.c_str()));
652
    LOGDEB2(("IPATH [%s]\n", doc.ipath.c_str()));
652
    string::size_type sit = doc.ipath.find_last_not_of(isep);
653
    string::size_type sit = doc.ipath.find_last_not_of(cstr_isep);
653
    if (sit == string::npos)
654
    if (sit == string::npos)
654
        doc.ipath.erase();
655
        doc.ipath.erase();
655
    else if (sit < doc.ipath.length() -1)
656
    else if (sit < doc.ipath.length() -1)
656
        doc.ipath.erase(sit+1);
657
        doc.ipath.erase(sit+1);
657
    } else {
658
    } else {
...
...
679
// and possibly add a filter/handler to the stack
680
// and possibly add a filter/handler to the stack
680
int FileInterner::addHandler()
681
int FileInterner::addHandler()
681
{
682
{
682
    const map<string, string>& docdata = m_handlers.back()->get_meta_data();
683
    const map<string, string>& docdata = m_handlers.back()->get_meta_data();
683
    string charset, mimetype;
684
    string charset, mimetype;
684
    getKeyValue(docdata, keycs, charset);
685
    getKeyValue(docdata, cstr_keycs, charset);
685
    getKeyValue(docdata, keymt, mimetype);
686
    getKeyValue(docdata, cstr_keymt, mimetype);
686
687
687
    LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
688
    LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
688
689
689
    // If we find a document of the target type (text/plain in
690
    // If we find a document of the target type (text/plain in
690
    // general), we're done decoding. If we hit text/plain, we're done
691
    // general), we're done decoding. If we hit text/plain, we're done
691
    // in any case
692
    // in any case
692
    if (!stringicmp(mimetype, m_targetMType) || 
693
    if (!stringicmp(mimetype, m_targetMType) || 
693
    !stringicmp(mimetype, stxtplain)) {
694
    !stringicmp(mimetype, cstr_stxtplain)) {
694
    m_reachedMType = mimetype;
695
    m_reachedMType = mimetype;
695
    LOGDEB1(("FileInterner::addHandler: target reached\n"));
696
    LOGDEB1(("FileInterner::addHandler: target reached\n"));
696
    return ADD_BREAK;
697
    return ADD_BREAK;
697
    }
698
    }
698
699
...
...
721
    // copying the text, which may be big.
722
    // copying the text, which may be big.
722
    string ns;
723
    string ns;
723
    const string *txt = &ns;
724
    const string *txt = &ns;
724
    {
725
    {
725
    map<string,string>::const_iterator it;
726
    map<string,string>::const_iterator it;
726
    it = docdata.find(keyct);
727
    it = docdata.find(cstr_keyct);
727
    if (it != docdata.end())
728
    if (it != docdata.end())
728
        txt = &it->second;
729
        txt = &it->second;
729
    }
730
    }
730
    bool setres = false;
731
    bool setres = false;
731
    if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
732
    if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
...
...
793
    // when they're pushed on the stack
794
    // when they're pushed on the stack
794
    vector<string> vipath;
795
    vector<string> vipath;
795
    int vipathidx = 0;
796
    int vipathidx = 0;
796
    if (!ipath.empty()) {
797
    if (!ipath.empty()) {
797
    vector<string> lipath;
798
    vector<string> lipath;
798
    stringToTokens(ipath, lipath, isep, true);
799
    stringToTokens(ipath, lipath, cstr_isep, true);
799
    for (vector<string>::iterator it = lipath.begin();
800
    for (vector<string>::iterator it = lipath.begin();
800
         it != lipath.end(); it++) {
801
         it != lipath.end(); it++) {
801
        *it = colon_restore(*it);
802
        *it = colon_restore(*it);
802
    }
803
    }
803
    vipath.insert(vipath.begin(), lipath.begin(), lipath.end());
804
    vipath.insert(vipath.begin(), lipath.begin(), lipath.end());