|
a/src/rcldb/rclterms.cpp |
|
b/src/rcldb/rclterms.cpp |
|
... |
|
... |
166 |
stripped = true;
|
166 |
stripped = true;
|
167 |
#else
|
167 |
#else
|
168 |
stripped = o_index_stripchars;
|
168 |
stripped = o_index_stripchars;
|
169 |
#endif
|
169 |
#endif
|
170 |
|
170 |
|
171 |
LOGDEB(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s] "
|
171 |
LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
|
172 |
"max %d field [%s] stripped %d\n",
|
172 |
" max %d field [%s] stripped %d init res.size %u\n",
|
173 |
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
173 |
tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
|
174 |
_term.c_str(), max, field.c_str(), stripped));
|
174 |
_term.c_str(), max, field.c_str(), stripped, res.entries.size()));
|
175 |
|
175 |
|
176 |
// If index is stripped, no case or diac expansion can be needed:
|
176 |
// If index is stripped, no case or diac expansion can be needed:
|
177 |
// for the processing inside this routine, everything looks like
|
177 |
// for the processing inside this routine, everything looks like
|
178 |
// we're all-sensitive: no use of expansion db.
|
178 |
// we're all-sensitive: no use of expansion db.
|
179 |
// Also, convert input to lowercase and strip its accents.
|
179 |
// Also, convert input to lowercase and strip its accents.
|
|
... |
|
... |
222 |
// Retrieve additional info and filter against the index itself
|
222 |
// Retrieve additional info and filter against the index itself
|
223 |
for (vector<string>::const_iterator it = exp.begin();
|
223 |
for (vector<string>::const_iterator it = exp.begin();
|
224 |
it != exp.end(); it++) {
|
224 |
it != exp.end(); it++) {
|
225 |
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
225 |
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
226 |
}
|
226 |
}
|
227 |
// And also expand the original expresionn against the
|
227 |
// And also expand the original expression against the
|
228 |
// main index: for the common case where the expression
|
228 |
// main index: for the common case where the expression
|
229 |
// had no case/diac expansion (no entry in the exp db if
|
229 |
// had no case/diac expansion (no entry in the exp db if
|
230 |
// the original term is lowercase and without accents).
|
230 |
// the original term is lowercase and without accents).
|
231 |
idxTermMatch(typ_sens, lang, term, res, max, field);
|
231 |
idxTermMatch(typ_sens, lang, term, res, max, field);
|
232 |
} else {
|
232 |
} else {
|
|
... |
|
... |
318 |
// expansion: expand against main index terms
|
318 |
// expansion: expand against main index terms
|
319 |
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
319 |
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
|
320 |
TermMatchResult& res, int max, const string& field)
|
320 |
TermMatchResult& res, int max, const string& field)
|
321 |
{
|
321 |
{
|
322 |
int typ = matchTypeTp(typ_sens);
|
322 |
int typ = matchTypeTp(typ_sens);
|
|
|
323 |
LOGDEB1(("Db::idxTermMatch: typ %s lang [%s] term [%s] "
|
|
|
324 |
"max %d field [%s] init res.size %u\n",
|
|
|
325 |
tmtptostr(typ), lang.c_str(), root.c_str(),
|
|
|
326 |
max, field.c_str(), res.entries.size()));
|
323 |
|
327 |
|
324 |
#ifndef RCL_INDEX_STRIPCHARS
|
328 |
#ifndef RCL_INDEX_STRIPCHARS
|
325 |
if (typ == ET_STEM) {
|
329 |
if (typ == ET_STEM) {
|
326 |
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
330 |
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
327 |
abort();
|
331 |
abort();
|
|
... |
|
... |
378 |
// Find the initial section before any special char
|
382 |
// Find the initial section before any special char
|
379 |
string::size_type es = string::npos;
|
383 |
string::size_type es = string::npos;
|
380 |
if (matcher.isNotNull()) {
|
384 |
if (matcher.isNotNull()) {
|
381 |
es = matcher->baseprefixlen();
|
385 |
es = matcher->baseprefixlen();
|
382 |
}
|
386 |
}
|
|
|
387 |
|
|
|
388 |
// Initial section: the part of the prefix+expr before the
|
|
|
389 |
// first wildcard character. We only scan the part of the
|
|
|
390 |
// index where this matches
|
383 |
string is;
|
391 |
string is;
|
384 |
switch (es) {
|
392 |
switch (es) {
|
385 |
case string::npos: is = prefix + root; break;
|
393 |
case string::npos: is = prefix + root; break;
|
386 |
case 0: is = prefix; break;
|
394 |
case 0: is = prefix; break;
|
387 |
default: is = prefix + root.substr(0, es); break;
|
395 |
default: is = prefix + root.substr(0, es); break;
|
|
... |
|
... |
393 |
Xapian::TermIterator it = xdb.allterms_begin();
|
401 |
Xapian::TermIterator it = xdb.allterms_begin();
|
394 |
if (!is.empty())
|
402 |
if (!is.empty())
|
395 |
it.skip_to(is.c_str());
|
403 |
it.skip_to(is.c_str());
|
396 |
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
|
404 |
for (int rcnt = 0; it != xdb.allterms_end(); it++) {
|
397 |
// If we're beyond the terms matching the initial
|
405 |
// If we're beyond the terms matching the initial
|
398 |
// string, end
|
406 |
// section, end
|
399 |
if (!is.empty() && (*it).find(is) != 0)
|
407 |
if (!is.empty() && (*it).find(is) != 0)
|
400 |
break;
|
408 |
break;
|
|
|
409 |
|
|
|
410 |
// Else try to match the term. The matcher content
|
|
|
411 |
// is without prefix, so we remove this if any. We
|
|
|
412 |
// just checked that the index term did begin with
|
|
|
413 |
// the prefix.
|
401 |
string term;
|
414 |
string term;
|
402 |
if (!prefix.empty())
|
415 |
if (!prefix.empty()) {
|
403 |
term = (*it).substr(prefix.length());
|
416 |
term = (*it).substr(prefix.length());
|
404 |
else
|
417 |
} else {
|
|
|
418 |
if (has_prefix(*it)) {
|
|
|
419 |
continue;
|
|
|
420 |
}
|
405 |
term = *it;
|
421 |
term = *it;
|
|
|
422 |
}
|
406 |
|
423 |
|
407 |
if (matcher.isNotNull() && !matcher->match(term))
|
424 |
if (matcher.isNotNull() && !matcher->match(term))
|
408 |
continue;
|
425 |
continue;
|
409 |
|
426 |
|
410 |
res.entries.push_back(
|
427 |
res.entries.push_back(
|