|
a/src/common/rclconfig.cpp |
|
b/src/common/rclconfig.cpp |
|
... |
|
... |
52 |
#include "cpuconf.h"
|
52 |
#include "cpuconf.h"
|
53 |
#include "execmd.h"
|
53 |
#include "execmd.h"
|
54 |
|
54 |
|
55 |
using namespace std;
|
55 |
using namespace std;
|
56 |
|
56 |
|
57 |
// Static, logically const, RclConfig members are initialized once from the
|
57 |
// Static, logically const, RclConfig members or module static
|
58 |
// first object build during process initialization.
|
58 |
// variables are initialized once from the first object build during
|
|
|
59 |
// process initialization.
|
59 |
|
60 |
|
60 |
// We default to a case- and diacritics-less index for now
|
61 |
// We default to a case- and diacritics-less index for now
|
61 |
bool o_index_stripchars = true;
|
62 |
bool o_index_stripchars = true;
|
|
|
63 |
// Default to storing the text contents for generating snippets. This
|
|
|
64 |
// is only an approximate 10% bigger index and produces nicer
|
|
|
65 |
// snippets.
|
|
|
66 |
bool o_index_storedoctext = true;
|
62 |
|
67 |
|
63 |
bool o_uptodate_test_use_mtime = false;
|
68 |
bool o_uptodate_test_use_mtime = false;
|
64 |
|
69 |
|
65 |
string RclConfig::o_localecharset;
|
70 |
string RclConfig::o_localecharset;
|
66 |
string RclConfig::o_origcwd;
|
71 |
string RclConfig::o_origcwd;
|
|
... |
|
... |
389 |
}
|
394 |
}
|
390 |
|
395 |
|
391 |
static int m_index_stripchars_init = 0;
|
396 |
static int m_index_stripchars_init = 0;
|
392 |
if (!m_index_stripchars_init) {
|
397 |
if (!m_index_stripchars_init) {
|
393 |
getConfParam("indexStripChars", &o_index_stripchars);
|
398 |
getConfParam("indexStripChars", &o_index_stripchars);
|
|
|
399 |
getConfParam("indexStoreDocText", &o_index_storedoctext);
|
394 |
getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime);
|
400 |
getConfParam("testmodifusemtime", &o_uptodate_test_use_mtime);
|
395 |
m_index_stripchars_init = 1;
|
401 |
m_index_stripchars_init = 1;
|
396 |
}
|
402 |
}
|
397 |
|
403 |
|
398 |
if (getConfParam("cachedir", m_cachedir)) {
|
404 |
if (getConfParam("cachedir", m_cachedir)) {
|
|
... |
|
... |
924 |
}
|
930 |
}
|
925 |
|
931 |
|
926 |
// Build a direct map avoiding all indirections for field to
|
932 |
// Build a direct map avoiding all indirections for field to
|
927 |
// prefix translation
|
933 |
// prefix translation
|
928 |
// Add direct prefixes from the [prefixes] section
|
934 |
// Add direct prefixes from the [prefixes] section
|
929 |
vector<string>tps = m_fields->getNames("prefixes");
|
935 |
vector<string> tps = m_fields->getNames("prefixes");
|
930 |
for (vector<string>::const_iterator it = tps.begin();
|
936 |
for (const auto& fieldname : tps) {
|
931 |
it != tps.end(); it++) {
|
|
|
932 |
string val;
|
937 |
string val;
|
933 |
m_fields->get(*it, val, "prefixes");
|
938 |
m_fields->get(fieldname, val, "prefixes");
|
934 |
ConfSimple attrs;
|
939 |
ConfSimple attrs;
|
935 |
FieldTraits ft;
|
940 |
FieldTraits ft;
|
|
|
941 |
// fieldname = prefix ; attr1=val;attr2=val...
|
936 |
if (!valueSplitAttributes(val, ft.pfx, attrs)) {
|
942 |
if (!valueSplitAttributes(val, ft.pfx, attrs)) {
|
937 |
LOGERR("readFieldsConfig: bad config line for [" << *it <<
|
943 |
LOGERR("readFieldsConfig: bad config line for [" << fieldname <<
|
938 |
"]: [" << val << "]\n");
|
944 |
"]: [" << val << "]\n");
|
939 |
return 0;
|
945 |
return 0;
|
940 |
}
|
946 |
}
|
941 |
string tval;
|
947 |
string tval;
|
942 |
if (attrs.get("wdfinc", tval))
|
948 |
if (attrs.get("wdfinc", tval))
|
|
... |
|
... |
945 |
ft.boost = atof(tval.c_str());
|
951 |
ft.boost = atof(tval.c_str());
|
946 |
if (attrs.get("pfxonly", tval))
|
952 |
if (attrs.get("pfxonly", tval))
|
947 |
ft.pfxonly = stringToBool(tval);
|
953 |
ft.pfxonly = stringToBool(tval);
|
948 |
if (attrs.get("noterms", tval))
|
954 |
if (attrs.get("noterms", tval))
|
949 |
ft.noterms = stringToBool(tval);
|
955 |
ft.noterms = stringToBool(tval);
|
950 |
m_fldtotraits[stringtolower(*it)] = ft;
|
956 |
m_fldtotraits[stringtolower(fieldname)] = ft;
|
951 |
LOGDEB2("readFieldsConfig: [" << *it << "] -> [" << ft.pfx <<
|
957 |
LOGDEB2("readFieldsConfig: [" << fieldname << "] -> [" << ft.pfx <<
|
952 |
"] " << ft.wdfinc << " " << ft.boost << "\n");
|
958 |
"] " << ft.wdfinc << " " << ft.boost << "\n");
|
953 |
}
|
959 |
}
|
954 |
|
960 |
|
|
|
961 |
// Values section
|
|
|
962 |
tps = m_fields->getNames("values");
|
|
|
963 |
for (const auto& fieldname : tps) {
|
|
|
964 |
string canonic = stringtolower(fieldname); // canonic name
|
|
|
965 |
string val;
|
|
|
966 |
m_fields->get(fieldname, val, "values");
|
|
|
967 |
ConfSimple attrs;
|
|
|
968 |
string svslot;
|
|
|
969 |
// fieldname = valueslot ; attr1=val;attr2=val...
|
|
|
970 |
if (!valueSplitAttributes(val, svslot, attrs)) {
|
|
|
971 |
LOGERR("readFieldsConfig: bad value line for [" << fieldname <<
|
|
|
972 |
"]: [" << val << "]\n");
|
|
|
973 |
return 0;
|
|
|
974 |
}
|
|
|
975 |
uint32_t valueslot = uint32_t(atoi(svslot.c_str()));
|
|
|
976 |
if (valueslot == 0) {
|
|
|
977 |
LOGERR("readFieldsConfig: found 0 value slot for [" << fieldname <<
|
|
|
978 |
"]: [" << val << "]\n");
|
|
|
979 |
continue;
|
|
|
980 |
}
|
|
|
981 |
|
|
|
982 |
string tval;
|
|
|
983 |
FieldTraits::ValueType valuetype{FieldTraits::STR};
|
|
|
984 |
if (attrs.get("type", tval)) {
|
|
|
985 |
if (tval == "string") {
|
|
|
986 |
valuetype = FieldTraits::STR;
|
|
|
987 |
} else if (tval == "int") {
|
|
|
988 |
valuetype = FieldTraits::INT;
|
|
|
989 |
} else {
|
|
|
990 |
LOGERR("readFieldsConfig: bad type for value for " <<
|
|
|
991 |
fieldname << " : " << tval << endl);
|
|
|
992 |
return 0;
|
|
|
993 |
}
|
|
|
994 |
}
|
|
|
995 |
int valuelen{0};
|
|
|
996 |
if (attrs.get("len", tval)) {
|
|
|
997 |
valuelen = atoi(tval.c_str());
|
|
|
998 |
}
|
|
|
999 |
|
|
|
1000 |
// Find or insert traits entry
|
|
|
1001 |
const auto pit =
|
|
|
1002 |
m_fldtotraits.insert(
|
|
|
1003 |
pair<string, FieldTraits>(canonic, FieldTraits())).first;
|
|
|
1004 |
pit->second.valueslot = valueslot;
|
|
|
1005 |
pit->second.valuetype = valuetype;
|
|
|
1006 |
pit->second.valuelen = valuelen;
|
|
|
1007 |
}
|
|
|
1008 |
|
955 |
// Add prefixes for aliases and build alias-to-canonic map while
|
1009 |
// Add prefixes for aliases and build alias-to-canonic map while
|
956 |
// we're at it. Having the aliases in the prefix map avoids an
|
1010 |
// we're at it. Having the aliases in the prefix map avoids an
|
957 |
// additional indirection at index time.
|
1011 |
// additional indirection at index time.
|
958 |
tps = m_fields->getNames("aliases");
|
1012 |
tps = m_fields->getNames("aliases");
|
959 |
for (vector<string>::const_iterator it = tps.begin();
|
1013 |
for (const auto& fieldname : tps) {
|
960 |
it != tps.end(); it++){
|
|
|
961 |
string canonic = stringtolower(*it); // canonic name
|
1014 |
string canonic = stringtolower(fieldname); // canonic name
|
962 |
FieldTraits ft;
|
1015 |
FieldTraits ft;
|
963 |
map<string, FieldTraits>::const_iterator pit =
|
|
|
964 |
m_fldtotraits.find(canonic);
|
1016 |
const auto pit = m_fldtotraits.find(canonic);
|
965 |
if (pit != m_fldtotraits.end()) {
|
1017 |
if (pit != m_fldtotraits.end()) {
|
966 |
ft = pit->second;
|
1018 |
ft = pit->second;
|
967 |
}
|
1019 |
}
|
968 |
string aliases;
|
1020 |
string aliases;
|
969 |
m_fields->get(canonic, aliases, "aliases");
|
1021 |
m_fields->get(canonic, aliases, "aliases");
|
970 |
vector<string> l;
|
1022 |
vector<string> l;
|
971 |
stringToStrings(aliases, l);
|
1023 |
stringToStrings(aliases, l);
|
972 |
for (vector<string>::const_iterator ait = l.begin();
|
1024 |
for (const auto& alias : l) {
|
973 |
ait != l.end(); ait++) {
|
|
|
974 |
if (pit != m_fldtotraits.end())
|
1025 |
if (pit != m_fldtotraits.end())
|
975 |
m_fldtotraits[stringtolower(*ait)] = ft;
|
1026 |
m_fldtotraits[stringtolower(alias)] = ft;
|
976 |
m_aliastocanon[stringtolower(*ait)] = canonic;
|
1027 |
m_aliastocanon[stringtolower(alias)] = canonic;
|
977 |
}
|
1028 |
}
|
978 |
}
|
1029 |
}
|
979 |
|
1030 |
|
980 |
// Query aliases map
|
1031 |
// Query aliases map
|
981 |
tps = m_fields->getNames("queryaliases");
|
1032 |
tps = m_fields->getNames("queryaliases");
|
982 |
for (vector<string>::const_iterator it = tps.begin();
|
1033 |
for (const auto& entry: tps) {
|
983 |
it != tps.end(); it++){
|
|
|
984 |
string canonic = stringtolower(*it); // canonic name
|
1034 |
string canonic = stringtolower(entry); // canonic name
|
985 |
string aliases;
|
1035 |
string aliases;
|
986 |
m_fields->get(canonic, aliases, "queryaliases");
|
1036 |
m_fields->get(canonic, aliases, "queryaliases");
|
987 |
vector<string> l;
|
1037 |
vector<string> l;
|
988 |
stringToStrings(aliases, l);
|
1038 |
stringToStrings(aliases, l);
|
989 |
for (vector<string>::const_iterator ait = l.begin();
|
1039 |
for (const auto& alias : l) {
|
990 |
ait != l.end(); ait++) {
|
|
|
991 |
m_aliastoqcanon[stringtolower(*ait)] = canonic;
|
1040 |
m_aliastoqcanon[stringtolower(alias)] = canonic;
|
992 |
}
|
1041 |
}
|
993 |
}
|
1042 |
}
|
994 |
|
1043 |
|
995 |
#if 0
|
1044 |
#if 0
|
996 |
for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin();
|
1045 |
for (map<string, FieldTraits>::const_iterator it = m_fldtotraits.begin();
|
997 |
it != m_fldtotraits.end(); it++) {
|
1046 |
it != m_fldtotraits.end(); it++) {
|
998 |
LOGDEB("readFieldsConfig: [" << *it << "] -> [" << it->second.pfx <<
|
1047 |
LOGDEB("readFieldsConfig: [" << entry << "] -> [" << it->second.pfx <<
|
999 |
"] " << it->second.wdfinc << " " << it->second.boost << "\n");
|
1048 |
"] " << it->second.wdfinc << " " << it->second.boost << "\n");
|
1000 |
}
|
1049 |
}
|
1001 |
#endif
|
1050 |
#endif
|
1002 |
|
1051 |
|
1003 |
vector<string> sl = m_fields->getNames("stored");
|
1052 |
vector<string> sl = m_fields->getNames("stored");
|
1004 |
if (!sl.empty()) {
|
1053 |
for (const auto& fieldname : sl) {
|
1005 |
for (vector<string>::const_iterator it = sl.begin();
|
1054 |
m_storedFields.insert(fieldCanon(stringtolower(fieldname)));
|
1006 |
it != sl.end(); it++) {
|
|
|
1007 |
string fld = fieldCanon(stringtolower(*it));
|
|
|
1008 |
m_storedFields.insert(fld);
|
|
|
1009 |
}
|
|
|
1010 |
}
|
1055 |
}
|
1011 |
|
1056 |
|
1012 |
// Extended file attribute to field translations
|
1057 |
// Extended file attribute to field translations
|
1013 |
vector<string>xattrs = m_fields->getNames("xattrtofields");
|
1058 |
vector<string>xattrs = m_fields->getNames("xattrtofields");
|
1014 |
for (vector<string>::const_iterator it = xattrs.begin();
|
1059 |
for (const auto& xattr : xattrs) {
|
1015 |
it != xattrs.end(); it++) {
|
|
|
1016 |
string val;
|
1060 |
string val;
|
1017 |
m_fields->get(*it, val, "xattrtofields");
|
1061 |
m_fields->get(xattr, val, "xattrtofields");
|
1018 |
m_xattrtofld[*it] = val;
|
1062 |
m_xattrtofld[xattr] = val;
|
1019 |
}
|
1063 |
}
|
1020 |
|
1064 |
|
1021 |
return true;
|
1065 |
return true;
|
1022 |
}
|
1066 |
}
|
1023 |
|
1067 |
|
|
... |
|
... |
1316 |
string RclConfig::getPidfile() const
|
1360 |
string RclConfig::getPidfile() const
|
1317 |
{
|
1361 |
{
|
1318 |
return path_cat(getCacheDir(), "index.pid");
|
1362 |
return path_cat(getCacheDir(), "index.pid");
|
1319 |
}
|
1363 |
}
|
1320 |
|
1364 |
|
|
|
1365 |
/* Eliminate the common leaf part of file paths p1 and p2. Example:
|
|
|
1366 |
* /mnt1/common/part /mnt2/common/part -> /mnt1 /mnt2. This is used
|
|
|
1367 |
* for computing translations for paths when the dataset has been
|
|
|
1368 |
* moved. Of course this could be done more efficiently than by splitting
|
|
|
1369 |
* into vectors, but we don't care.*/
|
|
|
1370 |
static string path_diffstems(const string& p1, const string& p2,
|
|
|
1371 |
string& r1, string& r2)
|
|
|
1372 |
{
|
|
|
1373 |
string reason;
|
|
|
1374 |
r1.clear();
|
|
|
1375 |
r2.clear();
|
|
|
1376 |
vector<string> v1, v2;
|
|
|
1377 |
stringToTokens(p1, v1, "/");
|
|
|
1378 |
stringToTokens(p2, v2, "/");
|
|
|
1379 |
unsigned int l1 = v1.size();
|
|
|
1380 |
unsigned int l2 = v2.size();
|
|
|
1381 |
|
|
|
1382 |
// Search for common leaf part
|
|
|
1383 |
unsigned int cl = 0;
|
|
|
1384 |
for (; cl < MIN(l1, l2); cl++) {
|
|
|
1385 |
if (v1[l1-cl-1] != v2[l2-cl-1]) {
|
|
|
1386 |
break;
|
|
|
1387 |
}
|
|
|
1388 |
}
|
|
|
1389 |
//cerr << "Common length = " << cl << endl;
|
|
|
1390 |
if (cl == 0) {
|
|
|
1391 |
reason = "Input paths are empty or have no common part";
|
|
|
1392 |
return reason;
|
|
|
1393 |
}
|
|
|
1394 |
for (unsigned i = 0; i < l1 - cl; i++) {
|
|
|
1395 |
r1 += "/" + v1[i];
|
|
|
1396 |
}
|
|
|
1397 |
for (unsigned i = 0; i < l2 - cl; i++) {
|
|
|
1398 |
r2 += "/" + v2[i];
|
|
|
1399 |
}
|
|
|
1400 |
|
|
|
1401 |
return reason;
|
|
|
1402 |
}
|
|
|
1403 |
|
1321 |
void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
1404 |
void RclConfig::urlrewrite(const string& dbdir, string& url) const
|
1322 |
{
|
1405 |
{
|
1323 |
LOGDEB2("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
|
1406 |
LOGDEB1("RclConfig::urlrewrite: dbdir [" << dbdir << "] url [" << url <<
|
1324 |
"]\n");
|
1407 |
"]\n");
|
1325 |
|
1408 |
|
|
|
1409 |
// If orgidxconfdir is set, we assume that this index is for a
|
|
|
1410 |
// movable dataset, with the configuration directory stored inside
|
|
|
1411 |
// the dataset tree. This allows computing automatic path
|
|
|
1412 |
// translations if the dataset has been moved.
|
|
|
1413 |
string orig_confdir;
|
|
|
1414 |
string cur_confdir;
|
|
|
1415 |
string confstemorg, confstemrep;
|
|
|
1416 |
if (m_conf->get("orgidxconfdir", orig_confdir, "")) {
|
|
|
1417 |
if (!m_conf->get("curidxconfdir", cur_confdir, "")) {
|
|
|
1418 |
cur_confdir = m_confdir;
|
|
|
1419 |
}
|
|
|
1420 |
LOGDEB1("RclConfig::urlrewrite: orgidxconfdir: " << orig_confdir <<
|
|
|
1421 |
" cur_confdir " << cur_confdir << endl);
|
|
|
1422 |
string reason = path_diffstems(orig_confdir, cur_confdir,
|
|
|
1423 |
confstemorg, confstemrep);
|
|
|
1424 |
if (!reason.empty()) {
|
|
|
1425 |
LOGERR("urlrewrite: path_diffstems failed: " << reason <<
|
|
|
1426 |
" : orig_confdir [" << orig_confdir <<
|
|
|
1427 |
"] cur_confdir [" << cur_confdir << endl);
|
|
|
1428 |
confstemorg = confstemrep = "";
|
|
|
1429 |
}
|
|
|
1430 |
}
|
|
|
1431 |
|
1326 |
// Do path translations exist for this index ?
|
1432 |
// Do path translations exist for this index ?
|
|
|
1433 |
bool needptrans = true;
|
1327 |
if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
|
1434 |
if (m_ptrans == 0 || !m_ptrans->hasSubKey(dbdir)) {
|
1328 |
LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
|
1435 |
LOGDEB2("RclConfig::urlrewrite: no paths translations (m_ptrans " <<
|
1329 |
m_ptrans << ")\n");
|
1436 |
m_ptrans << ")\n");
|
1330 |
return;
|
1437 |
needptrans = false;
|
|
|
1438 |
}
|
|
|
1439 |
|
|
|
1440 |
if (!needptrans && confstemorg.empty()) {
|
|
|
1441 |
return;
|
|
|
1442 |
}
|
|
|
1443 |
bool computeurl = false;
|
1331 |
}
|
1444 |
|
1332 |
|
|
|
1333 |
string path = fileurltolocalpath(url);
|
1445 |
string path = fileurltolocalpath(url);
|
1334 |
if (path.empty()) {
|
1446 |
if (path.empty()) {
|
1335 |
LOGDEB2("RclConfig::urlrewrite: not file url\n");
|
1447 |
LOGDEB2("RclConfig::urlrewrite: not file url\n");
|
1336 |
return;
|
1448 |
return;
|
1337 |
}
|
1449 |
}
|
|
|
1450 |
|
|
|
1451 |
// Do the movable volume thing.
|
|
|
1452 |
if (!confstemorg.empty() && confstemorg.size() <= path.size() &&
|
|
|
1453 |
!path.compare(0, confstemorg.size(), confstemorg)) {
|
|
|
1454 |
path = path.replace(0, confstemorg.size(), confstemrep);
|
|
|
1455 |
computeurl = true;
|
|
|
1456 |
}
|
1338 |
|
1457 |
|
|
|
1458 |
if (needptrans) {
|
1339 |
// For each translation check if the prefix matches the input path,
|
1459 |
// For each translation check if the prefix matches the input path,
|
1340 |
// replace and return the result if it does.
|
1460 |
// replace and return the result if it does.
|
1341 |
vector<string> opaths = m_ptrans->getNames(dbdir);
|
1461 |
vector<string> opaths = m_ptrans->getNames(dbdir);
|
1342 |
for (vector<string>::const_iterator it = opaths.begin();
|
1462 |
for (const auto& opath: opaths) {
|
1343 |
it != opaths.end(); it++) {
|
1463 |
if (opath.size() <= path.size() &&
|
1344 |
if (it->size() <= path.size() && !path.compare(0, it->size(), *it)) {
|
1464 |
!path.compare(0, opath.size(), opath)) {
|
1345 |
string npath;
|
1465 |
string npath;
|
1346 |
// This call always succeeds because the key comes from getNames()
|
1466 |
// Key comes from getNames()=> call must succeed
|
1347 |
if (m_ptrans->get(*it, npath, dbdir)) {
|
1467 |
if (m_ptrans->get(opath, npath, dbdir)) {
|
1348 |
path = path.replace(0, it->size(), npath);
|
1468 |
path = path.replace(0, opath.size(), npath);
|
1349 |
url = path_pathtofileurl(path);
|
1469 |
computeurl = true;
|
|
|
1470 |
}
|
|
|
1471 |
break;
|
|
|
1472 |
}
|
|
|
1473 |
}
|
1350 |
}
|
1474 |
}
|
1351 |
break;
|
1475 |
if (computeurl) {
|
1352 |
}
|
1476 |
url = path_pathtofileurl(path);
|
1353 |
}
|
1477 |
}
|
1354 |
}
|
1478 |
}
|
1355 |
|
1479 |
|
1356 |
bool RclConfig::sourceChanged() const
|
1480 |
bool RclConfig::sourceChanged() const
|
1357 |
{
|
1481 |
{
|