|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
49 |
#include "rclquery.h"
|
49 |
#include "rclquery.h"
|
50 |
#include "rclquery_p.h"
|
50 |
#include "rclquery_p.h"
|
51 |
#include "md5.h"
|
51 |
#include "md5.h"
|
52 |
#include "rclversion.h"
|
52 |
#include "rclversion.h"
|
53 |
#include "cancelcheck.h"
|
53 |
#include "cancelcheck.h"
|
|
|
54 |
#include "ptmutex.h"
|
54 |
|
55 |
|
55 |
#ifndef MAX
|
56 |
#ifndef MAX
|
56 |
#define MAX(A,B) (A>B?A:B)
|
57 |
#define MAX(A,B) (A>B?A:B)
|
57 |
#endif
|
58 |
#endif
|
58 |
#ifndef MIN
|
59 |
#ifndef MIN
|
59 |
#define MIN(A,B) (A<B?A:B)
|
60 |
#define MIN(A,B) (A<B?A:B)
|
60 |
#endif
|
61 |
#endif
|
61 |
|
62 |
|
62 |
// Recoll index format version is stored in user metadata. When this change,
|
63 |
// Recoll index format version is stored in user metadata. When this change,
|
63 |
// we can't open the db and will have to reindex.
|
64 |
// we can't open the db and will have to reindex.
|
64 |
static const string RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY");
|
65 |
static const string cstr_RCL_IDX_VERSION_KEY("RCL_IDX_VERSION_KEY");
|
65 |
static const string RCL_IDX_VERSION("1");
|
66 |
static const string cstr_RCL_IDX_VERSION("1");
|
66 |
|
67 |
|
67 |
// This is the word position offset at which we index the body text
|
68 |
// This is the word position offset at which we index the body text
|
68 |
// (abstract, keywords, etc.. are stored before this)
|
69 |
// (abstract, keywords, etc.. are stored before this)
|
69 |
static const unsigned int baseTextPosition = 100000;
|
70 |
static const unsigned int baseTextPosition = 100000;
|
70 |
|
71 |
|
|
... |
|
... |
77 |
const string end_of_field_term = "XXND";
|
78 |
const string end_of_field_term = "XXND";
|
78 |
|
79 |
|
79 |
// This is used as a marker inside the abstract frag lists, but
|
80 |
// This is used as a marker inside the abstract frag lists, but
|
80 |
// normally doesn't remain in final output (which is built with a
|
81 |
// normally doesn't remain in final output (which is built with a
|
81 |
// custom sep. by our caller).
|
82 |
// custom sep. by our caller).
|
82 |
static const string ellipsis("...");
|
83 |
static const string cstr_ellipsis("...");
|
83 |
|
84 |
|
84 |
string version_string(){
|
85 |
string version_string(){
|
85 |
return string("Recoll ") + string(rclversionstr) + string(" + Xapian ") +
|
86 |
return string("Recoll ") + string(rclversionstr) + string(" + Xapian ") +
|
86 |
string(Xapian::version_string());
|
87 |
string(Xapian::version_string());
|
87 |
}
|
88 |
}
|
88 |
|
89 |
|
89 |
// Synthetic abstract marker (to discriminate from abstract actually
|
90 |
// Synthetic abstract marker (to discriminate from abstract actually
|
90 |
// found in document)
|
91 |
// found in document)
|
91 |
static const string rclSyntAbs("?!#@");
|
92 |
static const string cstr_syntAbs("?!#@");
|
92 |
|
93 |
|
93 |
// Only ONE field name inside the index data record differs from the
|
94 |
// Only ONE field name inside the index data record differs from the
|
94 |
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
|
95 |
// Rcl::Doc ones: caption<->title, for a remnant of compatibility with
|
95 |
// omega
|
96 |
// omega
|
96 |
static const string keycap("caption");
|
97 |
static const string cstr_keycap("caption");
|
97 |
|
98 |
|
98 |
// Static/Default table for field->prefix/weight translation.
|
99 |
// Static/Default table for field->prefix/weight translation.
|
99 |
// This is logically const after initialization. Can't use a
|
100 |
// This is logically const after initialization. Can't use a
|
100 |
// static object to init this as the static std::string objects may
|
101 |
// static object to init this as the static std::string objects may
|
101 |
// not be ready.
|
102 |
// not be ready.
|
|
... |
|
... |
104 |
// "fields" configuration (cf: Db::fieldToTraits()), meaning that the
|
105 |
// "fields" configuration (cf: Db::fieldToTraits()), meaning that the
|
105 |
// entries can be overriden in the configuration, but not
|
106 |
// entries can be overriden in the configuration, but not
|
106 |
// suppressed.
|
107 |
// suppressed.
|
107 |
|
108 |
|
108 |
static map<string, FieldTraits> fldToTraits;
|
109 |
static map<string, FieldTraits> fldToTraits;
|
|
|
110 |
static PTMutexInit o_fldToTraits_mutex;
|
|
|
111 |
|
109 |
static void initFldToTraits()
|
112 |
static void initFldToTraits()
|
110 |
{
|
113 |
{
|
|
|
114 |
PTMutexLocker locker(o_fldToTraits_mutex);
|
|
|
115 |
// As we perform non-locked testing of initialization, check again with
|
|
|
116 |
// the lock held
|
|
|
117 |
if (fldToTraits.size())
|
|
|
118 |
return;
|
|
|
119 |
|
111 |
// Can't remember why "abstract" is indexed without a prefix
|
120 |
// Can't remember why "abstract" is indexed without a prefix
|
112 |
// (result: it's indexed twice actually). Maybe I'll dare change
|
121 |
// (result: it's indexed twice actually). Maybe I'll dare change
|
113 |
// this one day
|
122 |
// this one day
|
114 |
fldToTraits[Doc::keyabs] = FieldTraits();
|
123 |
fldToTraits[Doc::keyabs] = FieldTraits();
|
115 |
|
124 |
|
116 |
fldToTraits["ext"] = FieldTraits("XE");
|
125 |
fldToTraits["ext"] = FieldTraits("XE");
|
117 |
fldToTraits[Doc::keyfn] = FieldTraits("XSFN");
|
126 |
fldToTraits[Doc::keyfn] = FieldTraits("XSFN");
|
118 |
|
127 |
|
119 |
fldToTraits[keycap] = FieldTraits("S");
|
128 |
fldToTraits[cstr_keycap] = FieldTraits("S");
|
120 |
fldToTraits[Doc::keytt] = FieldTraits("S");
|
129 |
fldToTraits[Doc::keytt] = FieldTraits("S");
|
121 |
fldToTraits["subject"] = FieldTraits("S");
|
130 |
fldToTraits["subject"] = FieldTraits("S");
|
122 |
|
131 |
|
123 |
fldToTraits[Doc::keyau] = FieldTraits("A");
|
132 |
fldToTraits[Doc::keyau] = FieldTraits("A");
|
124 |
fldToTraits["creator"] = FieldTraits("A");
|
133 |
fldToTraits["creator"] = FieldTraits("A");
|
|
... |
|
... |
187 |
parms.get(Doc::keyurl, doc.url);
|
196 |
parms.get(Doc::keyurl, doc.url);
|
188 |
parms.get(Doc::keytp, doc.mimetype);
|
197 |
parms.get(Doc::keytp, doc.mimetype);
|
189 |
parms.get(Doc::keyfmt, doc.fmtime);
|
198 |
parms.get(Doc::keyfmt, doc.fmtime);
|
190 |
parms.get(Doc::keydmt, doc.dmtime);
|
199 |
parms.get(Doc::keydmt, doc.dmtime);
|
191 |
parms.get(Doc::keyoc, doc.origcharset);
|
200 |
parms.get(Doc::keyoc, doc.origcharset);
|
192 |
parms.get(keycap, doc.meta[Doc::keytt]);
|
201 |
parms.get(cstr_keycap, doc.meta[Doc::keytt]);
|
193 |
parms.get(Doc::keykw, doc.meta[Doc::keykw]);
|
202 |
parms.get(Doc::keykw, doc.meta[Doc::keykw]);
|
194 |
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
|
203 |
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
|
195 |
// Possibly remove synthetic abstract indicator (if it's there, we
|
204 |
// Possibly remove synthetic abstract indicator (if it's there, we
|
196 |
// used to index the beginning of the text as abstract).
|
205 |
// used to index the beginning of the text as abstract).
|
197 |
doc.syntabs = false;
|
206 |
doc.syntabs = false;
|
198 |
if (doc.meta[Doc::keyabs].find(rclSyntAbs) == 0) {
|
207 |
if (doc.meta[Doc::keyabs].find(cstr_syntAbs) == 0) {
|
199 |
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(rclSyntAbs.length());
|
208 |
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(cstr_syntAbs.length());
|
200 |
doc.syntabs = true;
|
209 |
doc.syntabs = true;
|
201 |
}
|
210 |
}
|
202 |
parms.get(Doc::keyipt, doc.ipath);
|
211 |
parms.get(Doc::keyipt, doc.ipath);
|
203 |
parms.get(Doc::keyfs, doc.fbytes);
|
212 |
parms.get(Doc::keyfs, doc.fbytes);
|
204 |
parms.get(Doc::keyds, doc.dbytes);
|
213 |
parms.get(Doc::keyds, doc.dbytes);
|
|
... |
|
... |
415 |
if (ii == (unsigned int)ipos) {
|
424 |
if (ii == (unsigned int)ipos) {
|
416 |
sparseDoc[ii] = qterm;
|
425 |
sparseDoc[ii] = qterm;
|
417 |
} else if (ii > (unsigned int)ipos &&
|
426 |
} else if (ii > (unsigned int)ipos &&
|
418 |
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
427 |
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
419 |
sparseDoc[ii] = occupiedmarker;
|
428 |
sparseDoc[ii] = occupiedmarker;
|
420 |
} else if (!sparseDoc[ii].compare(ellipsis)) {
|
429 |
} else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
|
421 |
// For an empty slot, the test has a side
|
430 |
// For an empty slot, the test has a side
|
422 |
// effect of inserting an empty string which
|
431 |
// effect of inserting an empty string which
|
423 |
// is what we want
|
432 |
// is what we want
|
424 |
sparseDoc[ii] = emptys;
|
433 |
sparseDoc[ii] = emptys;
|
425 |
}
|
434 |
}
|
|
... |
|
... |
427 |
// Add ellipsis at the end. This may be replaced later by
|
436 |
// Add ellipsis at the end. This may be replaced later by
|
428 |
// an overlapping extract. Take care not to replace an
|
437 |
// an overlapping extract. Take care not to replace an
|
429 |
// empty string here, we really want an empty slot,
|
438 |
// empty string here, we really want an empty slot,
|
430 |
// use find()
|
439 |
// use find()
|
431 |
if (sparseDoc.find(sto+1) == sparseDoc.end()) {
|
440 |
if (sparseDoc.find(sto+1) == sparseDoc.end()) {
|
432 |
sparseDoc[sto+1] = ellipsis;
|
441 |
sparseDoc[sto+1] = cstr_ellipsis;
|
433 |
}
|
442 |
}
|
434 |
|
443 |
|
435 |
// Limit to allocated occurences and total size
|
444 |
// Limit to allocated occurences and total size
|
436 |
if (++occurrences >= maxoccs ||
|
445 |
if (++occurrences >= maxoccs ||
|
437 |
qtermposs.size() >= maxtotaloccs)
|
446 |
qtermposs.size() >= maxtotaloccs)
|
|
... |
|
... |
529 |
if (TextSplit::isCJK(*uit))
|
538 |
if (TextSplit::isCJK(*uit))
|
530 |
newcjk = true;
|
539 |
newcjk = true;
|
531 |
if (!incjk || (incjk && !newcjk))
|
540 |
if (!incjk || (incjk && !newcjk))
|
532 |
chunk += " ";
|
541 |
chunk += " ";
|
533 |
incjk = newcjk;
|
542 |
incjk = newcjk;
|
534 |
if (it->second == ellipsis) {
|
543 |
if (it->second == cstr_ellipsis) {
|
535 |
vabs.push_back(chunk);
|
544 |
vabs.push_back(chunk);
|
536 |
chunk.clear();
|
545 |
chunk.clear();
|
537 |
} else {
|
546 |
} else {
|
538 |
chunk += it->second;
|
547 |
chunk += it->second;
|
539 |
}
|
548 |
}
|
|
... |
|
... |
610 |
Xapian::DB_CREATE_OR_OVERWRITE;
|
619 |
Xapian::DB_CREATE_OR_OVERWRITE;
|
611 |
m_ndb->xwdb = Xapian::WritableDatabase(dir, action);
|
620 |
m_ndb->xwdb = Xapian::WritableDatabase(dir, action);
|
612 |
// If db is empty, write the data format version at once
|
621 |
// If db is empty, write the data format version at once
|
613 |
// to avoid stupid error messages:
|
622 |
// to avoid stupid error messages:
|
614 |
if (m_ndb->xwdb.get_doccount() == 0)
|
623 |
if (m_ndb->xwdb.get_doccount() == 0)
|
615 |
m_ndb->xwdb.set_metadata(RCL_IDX_VERSION_KEY,
|
624 |
m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY,
|
616 |
RCL_IDX_VERSION);
|
625 |
cstr_RCL_IDX_VERSION);
|
617 |
m_ndb->m_iswritable = true;
|
626 |
m_ndb->m_iswritable = true;
|
618 |
// We open a readonly object in all cases (possibly in
|
627 |
// We open a readonly object in all cases (possibly in
|
619 |
// addition to the r/w one) because some operations
|
628 |
// addition to the r/w one) because some operations
|
620 |
// are faster when performed through a Database: no
|
629 |
// are faster when performed through a Database: no
|
621 |
// forced flushes on allterms_begin(), ie, used in
|
630 |
// forced flushes on allterms_begin(), ie, used in
|
|
... |
|
... |
648 |
*error = DbOpenMainDb;
|
657 |
*error = DbOpenMainDb;
|
649 |
|
658 |
|
650 |
// Check index format version. Must not try to check a just created or
|
659 |
// Check index format version. Must not try to check a just created or
|
651 |
// truncated db
|
660 |
// truncated db
|
652 |
if (mode != DbTrunc && m_ndb->xdb().get_doccount() > 0) {
|
661 |
if (mode != DbTrunc && m_ndb->xdb().get_doccount() > 0) {
|
653 |
string version = m_ndb->xdb().get_metadata(RCL_IDX_VERSION_KEY);
|
662 |
string version = m_ndb->xdb().get_metadata(cstr_RCL_IDX_VERSION_KEY);
|
654 |
if (version.compare(RCL_IDX_VERSION)) {
|
663 |
if (version.compare(cstr_RCL_IDX_VERSION)) {
|
655 |
m_ndb->m_noversionwrite = true;
|
664 |
m_ndb->m_noversionwrite = true;
|
656 |
LOGERR(("Rcl::Db::open: file index [%s], software [%s]\n",
|
665 |
LOGERR(("Rcl::Db::open: file index [%s], software [%s]\n",
|
657 |
version.c_str(), RCL_IDX_VERSION.c_str()));
|
666 |
version.c_str(), cstr_RCL_IDX_VERSION.c_str()));
|
658 |
throw Xapian::DatabaseError("Recoll index version mismatch",
|
667 |
throw Xapian::DatabaseError("Recoll index version mismatch",
|
659 |
"", "");
|
668 |
"", "");
|
660 |
}
|
669 |
}
|
661 |
}
|
670 |
}
|
662 |
m_mode = mode;
|
671 |
m_mode = mode;
|
|
... |
|
... |
691 |
string ermsg;
|
700 |
string ermsg;
|
692 |
try {
|
701 |
try {
|
693 |
bool w = m_ndb->m_iswritable;
|
702 |
bool w = m_ndb->m_iswritable;
|
694 |
if (w) {
|
703 |
if (w) {
|
695 |
if (!m_ndb->m_noversionwrite)
|
704 |
if (!m_ndb->m_noversionwrite)
|
696 |
m_ndb->xwdb.set_metadata(RCL_IDX_VERSION_KEY, RCL_IDX_VERSION);
|
705 |
m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY, cstr_RCL_IDX_VERSION);
|
697 |
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
|
706 |
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
|
698 |
}
|
707 |
}
|
699 |
// Used to do a flush here. Cant see why it should be necessary.
|
708 |
// Used to do a flush here. Cant see why it should be necessary.
|
700 |
deleteZ(m_ndb);
|
709 |
deleteZ(m_ndb);
|
701 |
if (w)
|
710 |
if (w)
|
|
... |
|
... |
950 |
if (syntctxlen > 0)
|
959 |
if (syntctxlen > 0)
|
951 |
m_synthAbsWordCtxLen = syntctxlen;
|
960 |
m_synthAbsWordCtxLen = syntctxlen;
|
952 |
}
|
961 |
}
|
953 |
|
962 |
|
954 |
static const int MB = 1024 * 1024;
|
963 |
static const int MB = 1024 * 1024;
|
955 |
static const string nc("\n\r\x0c");
|
964 |
static const string cstr_nc("\n\r\x0c");
|
956 |
|
965 |
|
957 |
#define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";}
|
966 |
#define RECORD_APPEND(R, NM, VAL) {R += NM + "=" + VAL + "\n";}
|
958 |
|
967 |
|
959 |
// Add document in internal form to the database: index the terms in
|
968 |
// Add document in internal form to the database: index the terms in
|
960 |
// the title abstract and body and add special terms for file name,
|
969 |
// the title abstract and body and add special terms for file name,
|
|
... |
|
... |
1166 |
RECORD_APPEND(record, Doc::keyipt, doc.ipath);
|
1175 |
RECORD_APPEND(record, Doc::keyipt, doc.ipath);
|
1167 |
|
1176 |
|
1168 |
if (doc.meta[Doc::keytt].empty())
|
1177 |
if (doc.meta[Doc::keytt].empty())
|
1169 |
doc.meta[Doc::keytt] = doc.utf8fn;
|
1178 |
doc.meta[Doc::keytt] = doc.utf8fn;
|
1170 |
doc.meta[Doc::keytt] =
|
1179 |
doc.meta[Doc::keytt] =
|
1171 |
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), nc);
|
1180 |
neutchars(truncate_to_word(doc.meta[Doc::keytt], 150), cstr_nc);
|
1172 |
if (!doc.meta[Doc::keytt].empty())
|
1181 |
if (!doc.meta[Doc::keytt].empty())
|
1173 |
RECORD_APPEND(record, keycap, doc.meta[Doc::keytt]);
|
1182 |
RECORD_APPEND(record, cstr_keycap, doc.meta[Doc::keytt]);
|
1174 |
|
1183 |
|
1175 |
trimstring(doc.meta[Doc::keykw], " \t\r\n");
|
1184 |
trimstring(doc.meta[Doc::keykw], " \t\r\n");
|
1176 |
doc.meta[Doc::keykw] =
|
1185 |
doc.meta[Doc::keykw] =
|
1177 |
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), nc);
|
1186 |
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
|
1178 |
if (!doc.meta[Doc::keykw].empty())
|
1187 |
if (!doc.meta[Doc::keykw].empty())
|
1179 |
RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);
|
1188 |
RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);
|
1180 |
|
1189 |
|
1181 |
// If abstract is empty, we make up one with the beginning of the
|
1190 |
// If abstract is empty, we make up one with the beginning of the
|
1182 |
// document. This is then not indexed, but part of the doc data so
|
1191 |
// document. This is then not indexed, but part of the doc data so
|
|
... |
|
... |
1187 |
// don't exist yet.
|
1196 |
// don't exist yet.
|
1188 |
trimstring(doc.meta[Doc::keyabs], " \t\r\n");
|
1197 |
trimstring(doc.meta[Doc::keyabs], " \t\r\n");
|
1189 |
if (doc.meta[Doc::keyabs].empty()) {
|
1198 |
if (doc.meta[Doc::keyabs].empty()) {
|
1190 |
syntabs = true;
|
1199 |
syntabs = true;
|
1191 |
if (!doc.text.empty())
|
1200 |
if (!doc.text.empty())
|
1192 |
doc.meta[Doc::keyabs] = rclSyntAbs +
|
1201 |
doc.meta[Doc::keyabs] = cstr_syntAbs +
|
1193 |
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), nc);
|
1202 |
neutchars(truncate_to_word(doc.text, m_idxAbsTruncLen), cstr_nc);
|
1194 |
} else {
|
1203 |
} else {
|
1195 |
doc.meta[Doc::keyabs] =
|
1204 |
doc.meta[Doc::keyabs] =
|
1196 |
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
1205 |
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
1197 |
nc);
|
1206 |
cstr_nc);
|
1198 |
}
|
1207 |
}
|
1199 |
if (!doc.meta[Doc::keyabs].empty())
|
1208 |
if (!doc.meta[Doc::keyabs].empty())
|
1200 |
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
|
1209 |
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
|
1201 |
|
1210 |
|
1202 |
const set<string>& stored = m_config->getStoredFields();
|
1211 |
const set<string>& stored = m_config->getStoredFields();
|
1203 |
for (set<string>::const_iterator it = stored.begin();
|
1212 |
for (set<string>::const_iterator it = stored.begin();
|
1204 |
it != stored.end(); it++) {
|
1213 |
it != stored.end(); it++) {
|
1205 |
string nm = m_config->fieldCanon(*it);
|
1214 |
string nm = m_config->fieldCanon(*it);
|
1206 |
if (!doc.meta[*it].empty()) {
|
1215 |
if (!doc.meta[*it].empty()) {
|
1207 |
string value =
|
1216 |
string value =
|
1208 |
neutchars(truncate_to_word(doc.meta[*it], 150), nc);
|
1217 |
neutchars(truncate_to_word(doc.meta[*it], 150), cstr_nc);
|
1209 |
RECORD_APPEND(record, nm, value);
|
1218 |
RECORD_APPEND(record, nm, value);
|
1210 |
}
|
1219 |
}
|
1211 |
}
|
1220 |
}
|
1212 |
|
1221 |
|
1213 |
// If the file's md5 was computed, add value. This is optionally
|
1222 |
// If the file's md5 was computed, add value. This is optionally
|
|
... |
|
... |
1609 |
}
|
1618 |
}
|
1610 |
|
1619 |
|
1611 |
// Characters that can begin a wildcard or regexp expression. We use skipto
|
1620 |
// Characters that can begin a wildcard or regexp expression. We use skipto
|
1612 |
// to begin the allterms search with terms that begin with the portion of
|
1621 |
// to begin the allterms search with terms that begin with the portion of
|
1613 |
// the input string prior to these chars.
|
1622 |
// the input string prior to these chars.
|
1614 |
const string wildSpecChars = "*?[";
|
1623 |
const string cstr_wildSpecChars = "*?[";
|
1615 |
const string regSpecChars = "(.[{";
|
1624 |
const string cstr_regSpecChars = "(.[{";
|
1616 |
|
1625 |
|
1617 |
// Find all index terms that match a wildcard or regular expression
|
1626 |
// Find all index terms that match a wildcard or regular expression
|
1618 |
bool Db::termMatch(MatchType typ, const string &lang,
|
1627 |
bool Db::termMatch(MatchType typ, const string &lang,
|
1619 |
const string &root,
|
1628 |
const string &root,
|
1620 |
TermMatchResult& res,
|
1629 |
TermMatchResult& res,
|
|
... |
|
... |
1637 |
string droot;
|
1646 |
string droot;
|
1638 |
if (!unacmaybefold(root, droot, "UTF-8", true)) {
|
1647 |
if (!unacmaybefold(root, droot, "UTF-8", true)) {
|
1639 |
LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
|
1648 |
LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
|
1640 |
return false;
|
1649 |
return false;
|
1641 |
}
|
1650 |
}
|
1642 |
string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars;
|
1651 |
string nochars = typ == ET_WILD ? cstr_wildSpecChars : cstr_regSpecChars;
|
1643 |
|
1652 |
|
1644 |
string prefix;
|
1653 |
string prefix;
|
1645 |
if (!field.empty()) {
|
1654 |
if (!field.empty()) {
|
1646 |
const FieldTraits *ftp = 0;
|
1655 |
const FieldTraits *ftp = 0;
|
1647 |
if (!fieldToTraits(field, &ftp) || ftp->pfx.empty()) {
|
1656 |
if (!fieldToTraits(field, &ftp) || ftp->pfx.empty()) {
|
|
... |
|
... |
1850 |
XAPTRY(vab = m_ndb->makeAbstract(doc.xdocid, query),
|
1859 |
XAPTRY(vab = m_ndb->makeAbstract(doc.xdocid, query),
|
1851 |
m_ndb->xrdb, m_reason);
|
1860 |
m_ndb->xrdb, m_reason);
|
1852 |
for (vector<string>::const_iterator it = vab.begin();
|
1861 |
for (vector<string>::const_iterator it = vab.begin();
|
1853 |
it != vab.end(); it++) {
|
1862 |
it != vab.end(); it++) {
|
1854 |
abstract.append(*it);
|
1863 |
abstract.append(*it);
|
1855 |
abstract.append(ellipsis);
|
1864 |
abstract.append(cstr_ellipsis);
|
1856 |
}
|
1865 |
}
|
1857 |
return m_reason.empty() ? true : false;
|
1866 |
return m_reason.empty() ? true : false;
|
1858 |
}
|
1867 |
}
|
1859 |
|
1868 |
|
1860 |
// Retrieve document defined by Unique doc identifier. This is mainly used
|
1869 |
// Retrieve document defined by Unique doc identifier. This is mainly used
|