recoll / Code / Diff of /src/rcldb/rclterms.cpp

Diff of /src/rcldb/rclterms.cpp [2e2eeb] .. [3736c0]

Switch to unified view


...
    stripped = true;
#else
    stripped = o_index_stripchars;
#endif

    LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
        " max %d field [%s] stripped %d init res.size %u\n",
        tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(), 
        _term.c_str(), max, field.c_str(), stripped, res.entries.size()));

    // If index is stripped, no case or diac expansion can be needed:
    // for the processing inside this routine, everything looks like
    // we're all-sensitive: no use of expansion db.
    // Also, convert input to lowercase and strip its accents.
...
        // Retrieve additional info and filter against the index itself
        for (vector<string>::const_iterator it = exp.begin(); 
         it != exp.end(); it++) {
        idxTermMatch(ET_NONE, "", *it, res, max, field);
        }
        // And also expand the original expression against the
        // main index: for the common case where the expression
        // had no case/diac expansion (no entry in the exp db if
        // the original term is lowercase and without accents).
        idxTermMatch(typ_sens, lang, term, res, max, field);
    } else {
...
// expansion: expand against main index terms
bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
              TermMatchResult& res, int max,  const string& field)
{
    int typ = matchTypeTp(typ_sens);
    LOGDEB1(("Db::idxTermMatch: typ %s lang [%s] term [%s] "
       "max %d field [%s] init res.size %u\n",
       tmtptostr(typ), lang.c_str(), root.c_str(),
       max, field.c_str(), res.entries.size()));

#ifndef RCL_INDEX_STRIPCHARS
    if (typ == ET_STEM) {
    LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
    abort();
...
    // Find the initial section before any special char
    string::size_type es = string::npos;
    if (matcher.isNotNull()) {
        es = matcher->baseprefixlen();
    }

  // Initial section: the part of the prefix+expr before the
  // first wildcard character. We only scan the part of the
  // index where this matches
    string is;
    switch (es) {
    case string::npos: is = prefix + root; break;
    case 0: is = prefix; break;
    default: is = prefix + root.substr(0, es); break;
...
                Xapian::TermIterator it = xdb.allterms_begin(); 
                if (!is.empty())
                    it.skip_to(is.c_str());
                for (int rcnt = 0; it != xdb.allterms_end(); it++) {
                    // If we're beyond the terms matching the initial
                    // section, end
                    if (!is.empty() && (*it).find(is) != 0)
                        break;

          // Else try to match the term. The matcher content
          // is without prefix, so we remove this if any. We
          // just checked that the index term did begin with
          // the prefix.
                    string term;
                    if (!prefix.empty()) {
                        term = (*it).substr(prefix.length());
          } else {
          if (has_prefix(*it)) {
              continue;
          }
                        term = *it;
          }

            if (matcher.isNotNull() && !matcher->match(term))
            continue;

                    res.entries.push_back(

	a/src/rcldb/rclterms.cpp		b/src/rcldb/rclterms.cpp
	...		...
166	stripped = true;	166	stripped = true;
167	#else	167	#else
168	stripped = o_index_stripchars;	168	stripped = o_index_stripchars;
169	#endif	169	#endif
170		170
171	LOGDEB(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s] "	171	LOGDEB0(("Db::TermMatch: typ %s diacsens %d casesens %d lang [%s] term [%s]"
172	"max %d field [%s] stripped %d\n",	172	" max %d field [%s] stripped %d init res.size %u\n",
173	tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),	173	tmtptostr(matchtyp), diac_sensitive, case_sensitive, lang.c_str(),
174	_term.c_str(), max, field.c_str(), stripped));	174	_term.c_str(), max, field.c_str(), stripped, res.entries.size()));
175		175
176	// If index is stripped, no case or diac expansion can be needed:	176	// If index is stripped, no case or diac expansion can be needed:
177	// for the processing inside this routine, everything looks like	177	// for the processing inside this routine, everything looks like
178	// we're all-sensitive: no use of expansion db.	178	// we're all-sensitive: no use of expansion db.
179	// Also, convert input to lowercase and strip its accents.	179	// Also, convert input to lowercase and strip its accents.
	...		...
222	// Retrieve additional info and filter against the index itself	222	// Retrieve additional info and filter against the index itself
223	for (vector<string>::const_iterator it = exp.begin();	223	for (vector<string>::const_iterator it = exp.begin();
224	it != exp.end(); it++) {	224	it != exp.end(); it++) {
225	idxTermMatch(ET_NONE, "", *it, res, max, field);	225	idxTermMatch(ET_NONE, "", *it, res, max, field);
226	}	226	}
227	// And also expand the original expresionn against the	227	// And also expand the original expression against the
228	// main index: for the common case where the expression	228	// main index: for the common case where the expression
229	// had no case/diac expansion (no entry in the exp db if	229	// had no case/diac expansion (no entry in the exp db if
230	// the original term is lowercase and without accents).	230	// the original term is lowercase and without accents).
231	idxTermMatch(typ_sens, lang, term, res, max, field);	231	idxTermMatch(typ_sens, lang, term, res, max, field);
232	} else {	232	} else {
	...		...
318	// expansion: expand against main index terms	318	// expansion: expand against main index terms
319	bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,	319	bool Db::idxTermMatch(int typ_sens, const string &lang, const string &root,
320	TermMatchResult& res, int max, const string& field)	320	TermMatchResult& res, int max, const string& field)
321	{	321	{
322	int typ = matchTypeTp(typ_sens);	322	int typ = matchTypeTp(typ_sens);
		323	LOGDEB1(("Db::idxTermMatch: typ %s lang [%s] term [%s] "
		324	"max %d field [%s] init res.size %u\n",
		325	tmtptostr(typ), lang.c_str(), root.c_str(),
		326	max, field.c_str(), res.entries.size()));
323		327
324	#ifndef RCL_INDEX_STRIPCHARS	328	#ifndef RCL_INDEX_STRIPCHARS
325	if (typ == ET_STEM) {	329	if (typ == ET_STEM) {
326	LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));	330	LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
327	abort();	331	abort();
	...		...
378	// Find the initial section before any special char	382	// Find the initial section before any special char
379	string::size_type es = string::npos;	383	string::size_type es = string::npos;
380	if (matcher.isNotNull()) {	384	if (matcher.isNotNull()) {
381	es = matcher->baseprefixlen();	385	es = matcher->baseprefixlen();
382	}	386	}
		387
		388	// Initial section: the part of the prefix+expr before the
		389	// first wildcard character. We only scan the part of the
		390	// index where this matches
383	string is;	391	string is;
384	switch (es) {	392	switch (es) {
385	case string::npos: is = prefix + root; break;	393	case string::npos: is = prefix + root; break;
386	case 0: is = prefix; break;	394	case 0: is = prefix; break;
387	default: is = prefix + root.substr(0, es); break;	395	default: is = prefix + root.substr(0, es); break;
	...		...
393	Xapian::TermIterator it = xdb.allterms_begin();	401	Xapian::TermIterator it = xdb.allterms_begin();
394	if (!is.empty())	402	if (!is.empty())
395	it.skip_to(is.c_str());	403	it.skip_to(is.c_str());
396	for (int rcnt = 0; it != xdb.allterms_end(); it++) {	404	for (int rcnt = 0; it != xdb.allterms_end(); it++) {
397	// If we're beyond the terms matching the initial	405	// If we're beyond the terms matching the initial
398	// string, end	406	// section, end
399	if (!is.empty() && (*it).find(is) != 0)	407	if (!is.empty() && (*it).find(is) != 0)
400	break;	408	break;
		409
		410	// Else try to match the term. The matcher content
		411	// is without prefix, so we remove this if any. We
		412	// just checked that the index term did begin with
		413	// the prefix.
401	string term;	414	string term;
402	if (!prefix.empty())	415	if (!prefix.empty()) {
403	term = (*it).substr(prefix.length());	416	term = (*it).substr(prefix.length());
404	else	417	} else {
		418	if (has_prefix(*it)) {
		419	continue;
		420	}
405	term = *it;	421	term = *it;
		422	}
406		423
407	if (matcher.isNotNull() && !matcher->match(term))	424	if (matcher.isNotNull() && !matcher->match(term))
408	continue;	425	continue;
409		426
410	res.entries.push_back(	427	res.entries.push_back(