|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
91 |
|
91 |
|
92 |
// Synthetic abstract marker (to discriminate from abstract actually
|
92 |
// Synthetic abstract marker (to discriminate from abstract actually
|
93 |
// found in document)
|
93 |
// found in document)
|
94 |
static const string cstr_syntAbs("?!#@");
|
94 |
static const string cstr_syntAbs("?!#@");
|
95 |
|
95 |
|
96 |
// Only ONE field name inside the index data record differs from the
|
|
|
97 |
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
|
|
|
98 |
// omega
|
|
|
99 |
|
|
|
100 |
// Static/Default table for field->prefix/weight translation.
|
|
|
101 |
// This is logically const after initialization. Can't use a
|
|
|
102 |
// static object to init this as the static std::string objects may
|
|
|
103 |
// not be ready.
|
|
|
104 |
//
|
|
|
105 |
// This map is searched if a match is not found in the dynamic
|
|
|
106 |
// "fields" configuration (cf: Db::fieldToTraits()), meaning that the
|
|
|
107 |
// entries can be overriden in the configuration, but not
|
|
|
108 |
// suppressed.
|
|
|
109 |
|
|
|
110 |
static map<string, FieldTraits> fldToTraits;
|
|
|
111 |
static PTMutexInit o_fldToTraits_mutex;
|
|
|
112 |
|
96 |
|
113 |
// A bogus fldToTraits key (bogus because not a real field) used to
|
97 |
// A bogus fldToTraits key (bogus because not a real field) used to
|
114 |
// retrieve the prefix used for specific filename searches (unsplit
|
98 |
// retrieve the prefix used for specific filename searches (unsplit
|
115 |
// filename, not "filename as 'filename:' field" searches)
|
99 |
// filename, not "filename as 'filename:' field" searches)
|
116 |
static const string keySysFilenamePrefix("rclUnsplitFN");
|
100 |
static const string keySysFilenamePrefix("rclUnsplitFN");
|
117 |
// The prefix for regular "filename:" field searches.
|
|
|
118 |
static const string cstr_fnAsFieldPrefix("XSFN");
|
|
|
119 |
// The prefix for unsplit filename terms used with specific -f or
|
|
|
120 |
// "File Name" GUI entries. There is a compile option to use the same prefix
|
|
|
121 |
// for both.
|
|
|
122 |
// #define UNSPLIT_FN_PREFIX_SAME_AS_SPLIT
|
|
|
123 |
#if defined(UNSPLIT_FN_PREFIX_SAME_AS_SPLIT)
|
|
|
124 |
static const string cstr_fnUnsplitPrefix(cstr_fnAsFieldPrefix);
|
|
|
125 |
#else
|
|
|
126 |
static const string cstr_fnUnsplitPrefix("XSFS");
|
101 |
static const string cstr_fnUnsplitPrefix("XSFS");
|
127 |
#endif
|
|
|
128 |
|
|
|
129 |
static void initFldToTraits()
|
|
|
130 |
{
|
|
|
131 |
PTMutexLocker locker(o_fldToTraits_mutex);
|
|
|
132 |
// As we perform non-locked testing of initialization, check again with
|
|
|
133 |
// the lock held
|
|
|
134 |
if (fldToTraits.size())
|
|
|
135 |
return;
|
|
|
136 |
|
|
|
137 |
// Can't remember why "abstract" is indexed without a prefix
|
|
|
138 |
// (result: it's indexed twice actually). Maybe I'll dare change
|
|
|
139 |
// this one day
|
|
|
140 |
fldToTraits[Doc::keyabs] = FieldTraits();
|
|
|
141 |
|
|
|
142 |
fldToTraits["ext"] = FieldTraits("XE");
|
|
|
143 |
|
|
|
144 |
fldToTraits[Doc::keyfn] = FieldTraits(cstr_fnAsFieldPrefix);
|
|
|
145 |
fldToTraits[keySysFilenamePrefix] = FieldTraits(cstr_fnUnsplitPrefix);
|
|
|
146 |
|
|
|
147 |
fldToTraits[cstr_caption] = FieldTraits("S");
|
|
|
148 |
fldToTraits[Doc::keytt] = FieldTraits("S");
|
|
|
149 |
fldToTraits["subject"] = FieldTraits("S");
|
|
|
150 |
|
|
|
151 |
fldToTraits[Doc::keyau] = FieldTraits("A");
|
|
|
152 |
fldToTraits["creator"] = FieldTraits("A");
|
|
|
153 |
fldToTraits["from"] = FieldTraits("A");
|
|
|
154 |
|
|
|
155 |
fldToTraits[Doc::keykw] = FieldTraits("K");
|
|
|
156 |
fldToTraits["keyword"] = FieldTraits("K");
|
|
|
157 |
fldToTraits["tag"] = FieldTraits("K");
|
|
|
158 |
fldToTraits["tags"] = FieldTraits("K");
|
|
|
159 |
|
|
|
160 |
fldToTraits["xapyear"] = FieldTraits("Y");
|
|
|
161 |
fldToTraits["xapyearmon"] = FieldTraits("M");
|
|
|
162 |
fldToTraits["xapdate"] = FieldTraits("D");
|
|
|
163 |
fldToTraits[Doc::keytp] = FieldTraits("T");
|
|
|
164 |
}
|
|
|
165 |
|
102 |
|
166 |
// Compute the unique term used to link documents to their origin.
|
103 |
// Compute the unique term used to link documents to their origin.
|
167 |
// "Q" + external udi
|
104 |
// "Q" + external udi
|
168 |
static inline string make_uniterm(const string& udi)
|
105 |
static inline string make_uniterm(const string& udi)
|
169 |
{
|
106 |
{
|
170 |
string uniterm("Q");
|
107 |
string uniterm("Q");
|
171 |
uniterm.append(udi);
|
108 |
uniterm.append(udi);
|
172 |
return uniterm;
|
109 |
return uniterm;
|
173 |
}
|
110 |
}
|
|
|
111 |
|
174 |
// Compute parent term used to link documents to their parent document (if any)
|
112 |
// Compute parent term used to link documents to their parent document (if any)
|
175 |
// "F" + parent external udi
|
113 |
// "F" + parent external udi
|
176 |
static inline string make_parentterm(const string& udi)
|
114 |
static inline string make_parentterm(const string& udi)
|
177 |
{
|
115 |
{
|
178 |
// I prefer to be in possible conflict with omega than with
|
116 |
// I prefer to be in possible conflict with omega than with
|
|
... |
|
... |
683 |
: m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
621 |
: m_ndb(0), m_config(cfp), m_idxAbsTruncLen(250), m_synthAbsLen(250),
|
684 |
m_synthAbsWordCtxLen(4), m_flushMb(-1),
|
622 |
m_synthAbsWordCtxLen(4), m_flushMb(-1),
|
685 |
m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
|
623 |
m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0), m_occFirstCheck(1),
|
686 |
m_maxFsOccupPc(0), m_mode(Db::DbRO)
|
624 |
m_maxFsOccupPc(0), m_mode(Db::DbRO)
|
687 |
{
|
625 |
{
|
688 |
if (!fldToTraits.size())
|
|
|
689 |
initFldToTraits();
|
|
|
690 |
|
|
|
691 |
m_ndb = new Native(this);
|
626 |
m_ndb = new Native(this);
|
692 |
if (m_config) {
|
627 |
if (m_config) {
|
693 |
m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
|
628 |
m_config->getConfParam("maxfsoccuppc", &m_maxFsOccupPc);
|
694 |
m_config->getConfParam("idxflushmb", &m_flushMb);
|
629 |
m_config->getConfParam("idxflushmb", &m_flushMb);
|
695 |
}
|
630 |
}
|
|
... |
|
... |
974 |
if (m_ndb == 0)
|
909 |
if (m_ndb == 0)
|
975 |
return false;
|
910 |
return false;
|
976 |
return m_ndb->m_isopen;
|
911 |
return m_ndb->m_isopen;
|
977 |
}
|
912 |
}
|
978 |
|
913 |
|
979 |
// Try to translate field specification into field prefix. We have a
|
914 |
// Try to translate field specification into field prefix.
|
980 |
// default table used if translations are not in the config for some
|
|
|
981 |
// reason (old config not updated ?). We use it only if the config
|
|
|
982 |
// translation fails. Also we add in there fields which should be
|
|
|
983 |
// indexed with no prefix (ie: abstract)
|
|
|
984 |
bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp)
|
915 |
bool Db::fieldToTraits(const string& fld, const FieldTraits **ftpp)
|
985 |
{
|
916 |
{
|
986 |
if (m_config && m_config->getFieldTraits(fld, ftpp))
|
917 |
if (m_config && m_config->getFieldTraits(fld, ftpp))
|
987 |
return true;
|
918 |
return true;
|
988 |
|
919 |
|
989 |
// No data in rclconfig? Check default values
|
|
|
990 |
map<string, FieldTraits>::const_iterator it = fldToTraits.find(fld);
|
|
|
991 |
if (it != fldToTraits.end()) {
|
|
|
992 |
*ftpp = &it->second;
|
|
|
993 |
return true;
|
|
|
994 |
}
|
|
|
995 |
*ftpp = 0;
|
920 |
*ftpp = 0;
|
996 |
return false;
|
921 |
return false;
|
997 |
}
|
922 |
}
|
998 |
|
923 |
|
999 |
|
924 |
|
|
... |
|
... |
1308 |
|
1233 |
|
1309 |
////// Special terms for other metadata. No positions for these.
|
1234 |
////// Special terms for other metadata. No positions for these.
|
1310 |
// Mime type
|
1235 |
// Mime type
|
1311 |
newdocument.add_term("T" + doc.mimetype);
|
1236 |
newdocument.add_term("T" + doc.mimetype);
|
1312 |
|
1237 |
|
1313 |
// Simple file name indexed unsplit for file name searches with a
|
1238 |
// Simple file name indexed unsplit for specific "file name"
|
1314 |
// term prefix We also add a term for the filename extension if
|
1239 |
// searches. This is not the same as a filename: clause inside the
|
1315 |
// any.
|
1240 |
// query language.
|
|
|
1241 |
// We also add a term for the filename extension if any.
|
1316 |
string utf8fn;
|
1242 |
string utf8fn;
|
1317 |
if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
|
1243 |
if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
|
1318 |
string fn;
|
1244 |
string fn;
|
1319 |
if (unacmaybefold(utf8fn, fn, "UTF-8", UNACOP_UNACFOLD)) {
|
1245 |
if (unacmaybefold(utf8fn, fn, "UTF-8", UNACOP_UNACFOLD)) {
|
1320 |
// We should truncate after extracting the extension, but this is
|
1246 |
// We should truncate after extracting the extension, but this is
|