Switch to unified view

a/src/common/rclconfig.cpp b/src/common/rclconfig.cpp
...
...
52
#include "cpuconf.h"
52
#include "cpuconf.h"
53
#include "execmd.h"
53
#include "execmd.h"
54
54
55
using namespace std;
55
using namespace std;
56
56
57
// Static, logically const, RclConfig members are initialized once from the
57
// Static, logically const, RclConfig members or module static
58
// first object build during process initialization.
58
// variables are initialized once from the first object build during
59
// process initialization.
59
60
60
// We default to a case- and diacritics-less index for now
61
// We default to a case- and diacritics-less index for now
61
bool o_index_stripchars = true;
62
bool o_index_stripchars = true;
63
// Default to storing the text contents for generating snippets. This
64
// is only an approximate 10% bigger index and produces nicer
65
// snippets.
66
bool o_index_storedoctext = true;
62
67
63
bool o_uptodate_test_use_mtime = false;
68
bool o_uptodate_test_use_mtime = false;
64
69
65
string RclConfig::o_localecharset; 
70
string RclConfig::o_localecharset; 
66
string RclConfig::o_origcwd; 
71
string RclConfig::o_origcwd; 
...
...
389
    }
394
    }
390
395
391
    static int m_index_stripchars_init = 0;
396
    static int m_index_stripchars_init = 0;
392
    if (!m_index_stripchars_init) {
397
    if (!m_index_stripchars_init) {
393
    getConfParam("indexStripChars", &o_index_stripchars);
398
    getConfParam("indexStripChars", &o_index_stripchars);
399
        getConfParam("indexStoreDocText", &o_index_storedoctext);
394
        getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime);
400
        getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime);
395
    m_index_stripchars_init = 1;
401
    m_index_stripchars_init = 1;
396
    }
402
    }
397
403
398
    if (getConfParam("cachedir", m_cachedir)) {
404
    if (getConfParam("cachedir", m_cachedir)) {
...
...
924
    }
930
    }
925
931
926
    // Build a direct map avoiding all indirections for field to
932
    // Build a direct map avoiding all indirections for field to
927
    // prefix translation
933
    // prefix translation
928
    // Add direct prefixes from the [prefixes] section
934
    // Add direct prefixes from the [prefixes] section
929
    vector<string>tps = m_fields->getNames("prefixes");
935
    vector<string> tps = m_fields->getNames("prefixes");
930
    for (vector<string>::const_iterator it = tps.begin(); 
936
    for (const auto& fieldname : tps) {
931
   it != tps.end(); it++) {
932
    string val;
937
    string val;
933
    m_fields->get(*it, val, "prefixes");
938
    m_fields->get(fieldname, val, "prefixes");
934
    ConfSimple attrs;
939
    ConfSimple attrs;
935
    FieldTraits ft;
940
    FieldTraits ft;
941
        // fieldname = prefix ; attr1=val;attr2=val...
936
    if (!valueSplitAttributes(val, ft.pfx, attrs)) {
942
    if (!valueSplitAttributes(val, ft.pfx, attrs)) {
937
        LOGERR("readFieldsConfig: bad config line for ["  << *it <<
943
        LOGERR("readFieldsConfig: bad config line for ["  << fieldname <<
938
                   "]: [" << val << "]\n");
944
                   "]: [" << val << "]\n");
939
        return 0;
945
        return 0;
940
    }
946
    }
941
    string tval;
947
    string tval;
942
    if (attrs.get("wdfinc", tval))
948
    if (attrs.get("wdfinc", tval))
...
...
945
        ft.boost = atof(tval.c_str());
951
        ft.boost = atof(tval.c_str());
946
    if (attrs.get("pfxonly", tval))
952
    if (attrs.get("pfxonly", tval))
947
        ft.pfxonly = stringToBool(tval);
953
        ft.pfxonly = stringToBool(tval);
948
    if (attrs.get("noterms", tval))
954
    if (attrs.get("noterms", tval))
949
        ft.noterms = stringToBool(tval);
955
        ft.noterms = stringToBool(tval);
950
    m_fldtotraits[stringtolower(*it)] = ft;
956
    m_fldtotraits[stringtolower(fieldname)] = ft;
951
    LOGDEB2("readFieldsConfig: ["  << *it << "] -> ["  << ft.pfx <<
957
    LOGDEB2("readFieldsConfig: ["  << fieldname << "] -> ["  << ft.pfx <<
952
                "] " << ft.wdfinc << " " << ft.boost << "\n");
958
                "] " << ft.wdfinc << " " << ft.boost << "\n");
953
    }
959
    }
954
960
961
    // Values section
962
    tps = m_fields->getNames("values");
963
    for (const auto& fieldname : tps) {
964
  string canonic = stringtolower(fieldname); // canonic name
965
  string val;
966
  m_fields->get(fieldname, val, "values");
967
  ConfSimple attrs;
968
        string svslot;
969
        // fieldname = valueslot ; attr1=val;attr2=val...
970
  if (!valueSplitAttributes(val, svslot, attrs)) {
971
      LOGERR("readFieldsConfig: bad value line for ["  << fieldname <<
972
                   "]: [" << val << "]\n");
973
      return 0;
974
  }
975
        uint32_t valueslot = uint32_t(atoi(svslot.c_str()));
976
        if (valueslot == 0) {
977
            LOGERR("readFieldsConfig: found 0 value slot for [" << fieldname <<
978
                   "]: [" << val << "]\n");
979
            continue;
980
        }
981
982
        string tval;
983
        FieldTraits::ValueType valuetype{FieldTraits::STR};
984
        if (attrs.get("type", tval)) {
985
            if (tval == "string") {
986
                valuetype = FieldTraits::STR;
987
            } else if (tval == "int") {
988
                valuetype = FieldTraits::INT;
989
            } else {
990
                LOGERR("readFieldsConfig: bad type for value for " <<
991
                       fieldname << " : " << tval << endl);
992
                return 0;
993
            }
994
        }
995
        int valuelen{0};
996
        if (attrs.get("len", tval)) {
997
            valuelen = atoi(tval.c_str());
998
        }
999
        
1000
        // Find or insert traits entry
1001
  const auto pit =
1002
      m_fldtotraits.insert(
1003
                pair<string, FieldTraits>(canonic, FieldTraits())).first;
1004
        pit->second.valueslot = valueslot;
1005
        pit->second.valuetype = valuetype;
1006
        pit->second.valuelen = valuelen;
1007
    }
1008
    
955
    // Add prefixes for aliases and build alias-to-canonic map while
1009
    // Add prefixes for aliases and build alias-to-canonic map while
956
    // we're at it. Having the aliases in the prefix map avoids an
1010
    // we're at it. Having the aliases in the prefix map avoids an
957
    // additional indirection at index time.
1011
    // additional indirection at index time.
958
    tps = m_fields->getNames("aliases");
1012
    tps = m_fields->getNames("aliases");
959
    for (vector<string>::const_iterator it = tps.begin(); 
1013
    for (const auto& fieldname : tps) {
960
         it != tps.end(); it++){
961
    string canonic = stringtolower(*it); // canonic name
1014
    string canonic = stringtolower(fieldname); // canonic name
962
    FieldTraits ft;
1015
    FieldTraits ft;
963
  map<string, FieldTraits>::const_iterator pit = 
964
        m_fldtotraits.find(canonic);
1016
    const auto pit = m_fldtotraits.find(canonic);
965
    if (pit != m_fldtotraits.end()) {
1017
    if (pit != m_fldtotraits.end()) {
966
        ft = pit->second;
1018
        ft = pit->second;
967
    }
1019
    }
968
    string aliases;
1020
    string aliases;
969
    m_fields->get(canonic, aliases, "aliases");
1021
    m_fields->get(canonic, aliases, "aliases");
970
    vector<string> l;
1022
    vector<string> l;
971
    stringToStrings(aliases, l);
1023
    stringToStrings(aliases, l);
972
  for (vector<string>::const_iterator ait = l.begin();
1024
  for (const auto& alias : l) {
973
       ait != l.end(); ait++) {
974
        if (pit != m_fldtotraits.end())
1025
        if (pit != m_fldtotraits.end())
975
        m_fldtotraits[stringtolower(*ait)] = ft;
1026
        m_fldtotraits[stringtolower(alias)] = ft;
976
        m_aliastocanon[stringtolower(*ait)] = canonic;
1027
        m_aliastocanon[stringtolower(alias)] = canonic;
977
    }
1028
    }
978
    }
1029
    }
979
1030
980
    // Query aliases map
1031
    // Query aliases map
981
    tps = m_fields->getNames("queryaliases");
1032
    tps = m_fields->getNames("queryaliases");
982
    for (vector<string>::const_iterator it = tps.begin(); 
1033
    for (const auto& entry: tps) {
983
         it != tps.end(); it++){
984
    string canonic = stringtolower(*it); // canonic name
1034
    string canonic = stringtolower(entry); // canonic name
985
    string aliases;
1035
    string aliases;
986
    m_fields->get(canonic, aliases, "queryaliases");
1036
    m_fields->get(canonic, aliases, "queryaliases");
987
    vector<string> l;
1037
    vector<string> l;
988
    stringToStrings(aliases, l);
1038
    stringToStrings(aliases, l);
989
  for (vector<string>::const_iterator ait = l.begin();
1039
  for (const auto& alias : l) {
990
       ait != l.end(); ait++) {
991
        m_aliastoqcanon[stringtolower(*ait)] = canonic;
1040
        m_aliastoqcanon[stringtolower(alias)] = canonic;
992
    }
1041
    }
993
    }
1042
    }
994
1043
995
#if 0
1044
#if 0
996
    for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin();
1045
    for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin();
997
     it != m_fldtotraits.end(); it++) {
1046
     it != m_fldtotraits.end(); it++) {
998
    LOGDEB("readFieldsConfig: ["  << *it << "] -> ["  << it->second.pfx <<
1047
    LOGDEB("readFieldsConfig: ["  << entry << "] -> ["  << it->second.pfx <<
999
               "] " << it->second.wdfinc << " " << it->second.boost << "\n");
1048
               "] " << it->second.wdfinc << " " << it->second.boost << "\n");
1000
    }
1049
    }
1001
#endif
1050
#endif
1002
1051
1003
    vector<string> sl = m_fields->getNames("stored");
1052
    vector<string> sl = m_fields->getNames("stored");
1004
    if (!sl.empty()) {
1053
    for (const auto& fieldname : sl) {
1005
  for (vector<string>::const_iterator it = sl.begin(); 
1054
        m_storedFields.insert(fieldCanon(stringtolower(fieldname)));
1006
       it != sl.end(); it++) {
1007
      string fld = fieldCanon(stringtolower(*it));
1008
      m_storedFields.insert(fld);
1009
  }
1010
    }
1055
    }
1011
1056
1012
    // Extended file attribute to field translations
1057
    // Extended file attribute to field translations
1013
    vector<string>xattrs = m_fields->getNames("xattrtofields");
1058
    vector<string>xattrs = m_fields->getNames("xattrtofields");
1014
    for (vector<string>::const_iterator it = xattrs.begin(); 
1059
    for (const auto& xattr : xattrs) {
1015
   it != xattrs.end(); it++) {
1016
    string val;
1060
    string val;
1017
    m_fields->get(*it, val, "xattrtofields");
1061
    m_fields->get(xattr, val, "xattrtofields");
1018
    m_xattrtofld[*it] = val;
1062
    m_xattrtofld[xattr] = val;
1019
    }
1063
    }
1020
1064
1021
    return true;
1065
    return true;
1022
}
1066
}
1023
1067
...
...
1316
string RclConfig::getPidfile() const
1360
string RclConfig::getPidfile() const
1317
{
1361
{
1318
    return path_cat(getCacheDir(), "index.pid");
1362
    return path_cat(getCacheDir(), "index.pid");
1319
}
1363
}
1320
1364
1365
/* Eliminate the common leaf part of file paths p1 and p2. Example: 
1366
 * /mnt1/common/part /mnt2/common/part -> /mnt1 /mnt2. This is used
1367
 * for computing translations for paths when the dataset has been
1368
 * moved. Of course this could be done more efficiently than by splitting 
1369
 * into vectors, but we don't care.*/
1370
static string path_diffstems(const string& p1, const string& p2,
1371
                            string& r1, string& r2)
1372
{
1373
    string reason;
1374
    r1.clear();
1375
    r2.clear();
1376
    vector<string> v1, v2;
1377
    stringToTokens(p1, v1, "/");
1378
    stringToTokens(p2, v2, "/");
1379
    unsigned int l1 = v1.size();
1380
    unsigned int l2 = v2.size();
1381
        
1382
    // Search for common leaf part
1383
    unsigned int cl = 0;
1384
    for (; cl < MIN(l1, l2); cl++) {
1385
        if (v1[l1-cl-1] != v2[l2-cl-1]) {
1386
            break;
1387
        }
1388
    }
1389
    //cerr << "Common length = " << cl << endl;
1390
    if (cl == 0) {
1391
        reason = "Input paths are empty or have no common part";
1392
        return reason;
1393
    }
1394
    for (unsigned i = 0; i < l1 - cl; i++) {
1395
        r1 += "/" + v1[i];
1396
    }
1397
    for (unsigned i = 0; i < l2 - cl; i++) {
1398
        r2 += "/" + v2[i];
1399
    }
1400
        
1401
    return reason;
1402
}
1403
1321
void RclConfig::urlrewrite(const string& dbdir, string& url) const
1404
void RclConfig::urlrewrite(const string& dbdir, string& url) const
1322
{
1405
{
1323
    LOGDEB2("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
1406
    LOGDEB1("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
1324
            "]\n");
1407
            "]\n");
1325
1408
1409
    // If orgidxconfdir is set, we assume that this index is for a
1410
    // movable dataset, with the configuration directory stored inside
1411
    // the dataset tree. This allows computing automatic path
1412
    // translations if the dataset has been moved.
1413
    string orig_confdir;
1414
    string cur_confdir;
1415
    string confstemorg, confstemrep;
1416
    if (m_conf->get("orgidxconfdir", orig_confdir, "")) {
1417
        if (!m_conf->get("curidxconfdir", cur_confdir, "")) {
1418
            cur_confdir = m_confdir;
1419
        }
1420
        LOGDEB1("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
1421
               " cur_confdir " << cur_confdir << endl);
1422
        string reason = path_diffstems(orig_confdir, cur_confdir,
1423
                                       confstemorg, confstemrep);
1424
        if (!reason.empty()) {
1425
            LOGERR("urlrewrite: path_diffstems failed: " << reason <<
1426
                   " : orig_confdir [" << orig_confdir <<
1427
                   "] cur_confdir [" << cur_confdir << endl);
1428
            confstemorg = confstemrep = "";
1429
        }
1430
    }
1431
    
1326
    // Do path translations exist for this index ?
1432
    // Do path translations exist for this index ?
1433
    bool needptrans = true;
1327
    if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
1434
    if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
1328
    LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
1435
    LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
1329
                m_ptrans << ")\n");
1436
                m_ptrans << ")\n");
1330
  return;
1437
        needptrans = false;
1438
    }
1439
1440
    if (!needptrans && confstemorg.empty()) {
1441
        return;
1442
    }
1443
    bool computeurl = false;
1331
    }
1444
    
1332
1333
    string path = fileurltolocalpath(url);
1445
    string path = fileurltolocalpath(url);
1334
    if (path.empty()) {
1446
    if (path.empty()) {
1335
    LOGDEB2("RclConfig::urlrewrite: not file url\n");
1447
    LOGDEB2("RclConfig::urlrewrite: not file url\n");
1336
    return;
1448
    return;
1337
    }
1449
    }
1450
    
1451
    // Do the movable volume thing.
1452
    if (!confstemorg.empty() && confstemorg.size() <= path.size() &&
1453
        !path.compare(0, confstemorg.size(), confstemorg)) {
1454
        path = path.replace(0, confstemorg.size(), confstemrep);
1455
        computeurl = true;
1456
    }
1338
1457
1458
    if (needptrans) {
1339
    // For each translation check if the prefix matches the input path,
1459
        // For each translation check if the prefix matches the input path,
1340
    // replace and return the result if it does.
1460
        // replace and return the result if it does.
1341
    vector<string> opaths = m_ptrans->getNames(dbdir);
1461
        vector<string> opaths = m_ptrans->getNames(dbdir);
1342
    for (vector<string>::const_iterator it = opaths.begin(); 
1462
        for (const auto& opath: opaths) {
1343
   it != opaths.end(); it++) {
1463
            if (opath.size() <= path.size() &&
1344
  if (it->size() <= path.size() && !path.compare(0, it->size(), *it)) {
1464
                !path.compare(0, opath.size(), opath)) {
1345
      string npath;
1465
                string npath;
1346
      // This call always succeeds because the key comes from getNames()
1466
                // Key comes from getNames()=> call must succeed
1347
      if (m_ptrans->get(*it, npath, dbdir)) { 
1467
                if (m_ptrans->get(opath, npath, dbdir)) { 
1348
      path = path.replace(0, it->size(), npath);
1468
                    path = path.replace(0, opath.size(), npath);
1349
      url = path_pathtofileurl(path);
1469
                    computeurl = true;
1470
                }
1471
                break;
1472
            }
1473
        }
1350
      }
1474
    }
1351
      break;
1475
    if (computeurl) {
1352
  }
1476
        url = path_pathtofileurl(path);
1353
    }
1477
    }
1354
}
1478
}
1355
1479
1356
bool RclConfig::sourceChanged() const
1480
bool RclConfig::sourceChanged() const
1357
{
1481
{