|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
973 |
if (!splitter.text_to_words(doc.text))
|
973 |
if (!splitter.text_to_words(doc.text))
|
974 |
LOGDEB(("Db::addOrUpdate: split failed for main text\n"));
|
974 |
LOGDEB(("Db::addOrUpdate: split failed for main text\n"));
|
975 |
|
975 |
|
976 |
////// Special terms for other metadata. No positions for these.
|
976 |
////// Special terms for other metadata. No positions for these.
|
977 |
// Mime type
|
977 |
// Mime type
|
978 |
newdocument.add_term(wrap_prefix(mimetype_prefix) + doc.mimetype);
|
978 |
newdocument.add_boolean_term(wrap_prefix(mimetype_prefix) + doc.mimetype);
|
979 |
|
979 |
|
980 |
// Simple file name indexed unsplit for specific "file name"
|
980 |
// Simple file name indexed unsplit for specific "file name"
|
981 |
// searches. This is not the same as a filename: clause inside the
|
981 |
// searches. This is not the same as a filename: clause inside the
|
982 |
// query language.
|
982 |
// query language.
|
983 |
// We also add a term for the filename extension if any.
|
983 |
// We also add a term for the filename extension if any.
|
|
... |
|
... |
989 |
// a pathological case anyway
|
989 |
// a pathological case anyway
|
990 |
if (fn.size() > 230)
|
990 |
if (fn.size() > 230)
|
991 |
utf8truncate(fn, 230);
|
991 |
utf8truncate(fn, 230);
|
992 |
string::size_type pos = fn.rfind('.');
|
992 |
string::size_type pos = fn.rfind('.');
|
993 |
if (pos != string::npos && pos != fn.length() - 1) {
|
993 |
if (pos != string::npos && pos != fn.length() - 1) {
|
994 |
newdocument.add_term(wrap_prefix(fileext_prefix) +
|
994 |
newdocument.add_boolean_term(wrap_prefix(fileext_prefix) +
|
995 |
fn.substr(pos + 1));
|
995 |
fn.substr(pos + 1));
|
996 |
}
|
996 |
}
|
997 |
newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn);
|
997 |
newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn, 0);
|
998 |
}
|
998 |
}
|
999 |
}
|
999 |
}
|
1000 |
|
1000 |
|
1001 |
// Udi unique term: this is used for file existence/uptodate
|
1001 |
// Udi unique term: this is used for file existence/uptodate
|
1002 |
// checks, and unique id for the replace_document() call.
|
1002 |
// checks, and unique id for the replace_document() call.
|
1003 |
string uniterm = make_uniterm(udi);
|
1003 |
string uniterm = make_uniterm(udi);
|
1004 |
newdocument.add_term(uniterm);
|
1004 |
newdocument.add_boolean_term(uniterm);
|
1005 |
// Parent term. This is used to find all descendents, mostly to delete them
|
1005 |
// Parent term. This is used to find all descendents, mostly to delete them
|
1006 |
// when the parent goes away
|
1006 |
// when the parent goes away
|
1007 |
if (!parent_udi.empty()) {
|
1007 |
if (!parent_udi.empty()) {
|
1008 |
newdocument.add_term(make_parentterm(parent_udi));
|
1008 |
newdocument.add_boolean_term(make_parentterm(parent_udi));
|
1009 |
}
|
1009 |
}
|
1010 |
// Dates etc.
|
1010 |
// Dates etc.
|
1011 |
time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :
|
1011 |
time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :
|
1012 |
doc.dmtime.c_str());
|
1012 |
doc.dmtime.c_str());
|
1013 |
struct tm *tm = localtime(&mtime);
|
1013 |
struct tm *tm = localtime(&mtime);
|
1014 |
char buf[9];
|
1014 |
char buf[9];
|
1015 |
snprintf(buf, 9, "%04d%02d%02d",
|
1015 |
snprintf(buf, 9, "%04d%02d%02d",
|
1016 |
tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
|
1016 |
tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
|
1017 |
// Date (YYYYMMDD)
|
1017 |
// Date (YYYYMMDD)
|
1018 |
newdocument.add_term(wrap_prefix(xapday_prefix) + string(buf));
|
1018 |
newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf));
|
1019 |
// Month (YYYYMM)
|
1019 |
// Month (YYYYMM)
|
1020 |
buf[6] = '\0';
|
1020 |
buf[6] = '\0';
|
1021 |
newdocument.add_term(wrap_prefix(xapmonth_prefix) + string(buf));
|
1021 |
newdocument.add_boolean_term(wrap_prefix(xapmonth_prefix) + string(buf));
|
1022 |
// Year (YYYY)
|
1022 |
// Year (YYYY)
|
1023 |
buf[4] = '\0';
|
1023 |
buf[4] = '\0';
|
1024 |
newdocument.add_term(wrap_prefix(xapyear_prefix) + string(buf));
|
1024 |
newdocument.add_boolean_term(wrap_prefix(xapyear_prefix) + string(buf));
|
1025 |
|
1025 |
|
1026 |
|
1026 |
|
1027 |
//////////////////////////////////////////////////////////////////
|
1027 |
//////////////////////////////////////////////////////////////////
|
1028 |
// Document data record. omindex has the following nl separated fields:
|
1028 |
// Document data record. omindex has the following nl separated fields:
|
1029 |
// - url
|
1029 |
// - url
|