recoll / Code / Diff of /src/rcldb/searchdata.cpp

Diff of /src/rcldb/searchdata.cpp [8b40cb] .. [52bc9f]

Switch to unified view


...
 *   Free Software Foundation, Inc.,
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

// Handle translation from rcl's SearchData structures to Xapian Queries

#include "autoconfig.h"

#include <stdio.h>
#include <fnmatch.h>

#include <string>
#include <vector>
#include <algorithm>
using namespace std;

#include "xapian.h"

#include "cstr.h"
#include "rcldb.h"
#include "rcldb_p.h"
#include "searchdata.h"
#include "debuglog.h"
#include "smallut.h"
#include "textsplit.h"
#include "unacpp.h"
#include "utf8iter.h"
#include "stoplist.h"
#include "rclconfig.h"
#include "termproc.h"
#include "synfamily.h"
#include "stemdb.h"
#include "expansiondbs.h"



namespace Rcl {


typedef  vector<SearchDataClause *>::iterator qlist_it_t;
typedef  vector<SearchDataClause *>::const_iterator qlist_cit_t;

static const int original_term_wqf_booster = 10;
...
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
 * USA
 */

#ifdef RCL_INDEX_STRIPCHARS
#define bufprefix(BUF, L) {(BUF)[0] = L;}
#define bpoffs() 1
#else
static inline void bufprefix(char *buf, char c)
{
    if (o_index_stripchars) {
  buf[0] = c;
    } else {
  buf[0] = ':'; 
  buf[1] = c; 
  buf[2] = ':';
    }
}
static inline int bpoffs() 
{
    return o_index_stripchars ? 1 : 3;
}
#endif

static Xapian::Query
date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
{
    // Xapian uses a smallbuf and snprintf. Can't be bothered, we're
    // only doing %d's !
    char buf[200];
    bufprefix(buf, 'D');
    sprintf(buf+bpoffs(), "%04d%02d", y1, m1);
    vector<Xapian::Query> v;

    int d_last = monthdays(m1, y1);
    int d_end = d_last;
    if (y1 == y2 && m1 == m2 && d2 < d_last) {
    d_end = d2;
    }
    // Deal with any initial partial month
    if (d1 > 1 || d_end < d_last) {
        for ( ; d1 <= d_end ; d1++) {
        sprintf(buf + 6 + bpoffs(), "%02d", d1);
        v.push_back(Xapian::Query(buf));
    }
    } else {
  bufprefix(buf, 'M');
    v.push_back(Xapian::Query(buf));
    }
    
    if (y1 == y2 && m1 == m2) {
    return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
    }

    int m_last = (y1 < y2) ? 12 : m2 - 1;
    while (++m1 <= m_last) {
    sprintf(buf + 4 + bpoffs(), "%02d", m1);
  bufprefix(buf, 'M');
    v.push_back(Xapian::Query(buf));
    }
    
    if (y1 < y2) {
    while (++y1 < y2) {
        sprintf(buf + bpoffs(), "%04d", y1);
      bufprefix(buf, 'Y');
        v.push_back(Xapian::Query(buf));
    }
    sprintf(buf + bpoffs(), "%04d", y2);
  bufprefix(buf, 'M');
    for (m1 = 1; m1 < m2; m1++) {
        sprintf(buf + 4 + bpoffs(), "%02d", m1);
        v.push_back(Xapian::Query(buf));
    }
    }
    
    sprintf(buf + 2 + bpoffs(), "%02d", m2);

    // Deal with any final partial month
    if (d2 < monthdays(m2, y2)) {
  bufprefix(buf, 'D');
        for (d1 = 1 ; d1 <= d2; d1++) {
        sprintf(buf + 6 + bpoffs(), "%02d", d1);
        v.push_back(Xapian::Query(buf));
    }
    } else {
  bufprefix(buf, 'M');
    v.push_back(Xapian::Query(buf));
    }

    return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
}
...
    }
    tps = exptps;
    return true;
}

bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, 
              vector<SearchDataClause*>& query, 
              string& reason, void *d)
{


    Xapian::Query xq;




    for (qlist_it_t it = query.begin(); it != query.end(); it++) {
    Xapian::Query nq;
    if (!(*it)->toNativeQuery(db, &nq)) {
        LOGERR(("SearchData::clausesToQuery: toNativeQuery failed\n"));
        reason = (*it)->getReason();
        return false;
    }       
        if (nq.empty()) {
            LOGDEB(("SearchData::clausesToQuery: skipping empty clause\n"));
            continue;
        }
    // If this structure is an AND list, must use AND_NOT for excl clauses.
    // Else this is an OR list, and there can't be excl clauses (checked by
    // addClause())
    Xapian::Query::op op;
    if (tp == SCLT_AND) {
            if ((*it)->m_tp == SCLT_EXCL) {
                op =  Xapian::Query::OP_AND_NOT;
            } else {
                op =  Xapian::Query::OP_AND;
            }
...
            xq = Xapian::Query(op, xq, nq);
        }
    }
    if (xq.empty())
    xq = Xapian::Query::MatchAll;

   *((Xapian::Query *)d) = xq;
    return true;
}

bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
{
    LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
    m_reason.erase();

    // Walk the clause list translating each in turn and building the 
    // Xapian query tree
    Xapian::Query xq;
    if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
  LOGERR(("SearchData::toNativeQuery: clausesToQuery failed\n"));
  return false;
    }

    if (m_haveDates) {
        // If one of the extremities is unset, compute db extremas
        if (m_dates.y1 == 0 || m_dates.y2 == 0) {
            int minyear = 1970, maxyear = 2100;
...
     dit != m_dirspecs.end(); dit++) {
    vector<string> vpath;
    stringToTokens(dit->dir, vpath, "/");
    vector<string> pvpath;
    if (dit->dir[0] == '/')
        pvpath.push_back(wrap_prefix(pathelt_prefix));
    for (vector<string>::const_iterator pit = vpath.begin(); 
         pit != vpath.end(); pit++){
        pvpath.push_back(wrap_prefix(pathelt_prefix) + *pit);
    }
    Xapian::Query::op tdop;
    if (dit->weight == 1.0) {
        tdop = dit->exclude ? 
        Xapian::Query::OP_AND_NOT : Xapian::Query::OP_FILTER;
...
    addClause(nclp);
    } else {
    // My type is AND. Change it to OR and insert two queries, one
    // being the original query as a subquery, the other the
    // phrase.
    SearchData *sd = new SearchData(m_tp, m_stemlang);
    sd->m_query = m_query;
    sd->m_stemlang = m_stemlang;
    m_tp = SCLT_OR;
    m_query.clear();
    SearchDataClauseSub *oq = 
...
    : m_db(db), m_field(field), m_stemlang(stmlng),
      m_doBoostUserTerms(boostUser), m_hld(hld)
    { }

    bool processUserString(const string &iq,
             int mods, 
               string &ermsg,
               vector<Xapian::Query> &pqueries, 

               int slack = 0, bool useNear = false);
private:
    void expandTerm(int mods, 
          const string& term, vector<string>& exp, 
                    string& sterm, const string& prefix);
    // After splitting entry on whitespace: process non-phrase element
    void processSimpleSpan(const string& span, 
             int mods,
               vector<Xapian::Query> &pqueries);
    // Process phrase/near element
    void processPhraseOrNear(TextSplitQ *splitData, 
               int mods,
                 vector<Xapian::Query> &pqueries,
                 bool useNear, int slack);

    Db&           m_db;
    const string& m_field;
    const string& m_stemlang;
    const bool    m_doBoostUserTerms;
    HighlightData& m_hld;
};

#if 1
static void listVector(const string& what, const vector<string>&l)
...
    }
    LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
}
#endif

/** Expand term into term list, using appropriate mode: stem, wildcards, 
 *  diacritics... 
 *
 * @param mods stem expansion, case and diacritics sensitivity control.





 * @param term input single word
 * @param exp output expansion list
 * @param sterm output original input term if there were no wildcards
 * @param prefix field prefix in index. We could recompute it, but the caller
 *  has it already. Used in the simple case where there is nothing to expand, 
 *  and we just return the prefixed term (else Db::termMatch deals with it).
 */
void StringToXapianQ::expandTerm(int mods, 
               const string& term, 
                                 vector<string>& oexp, string &sterm,
               const string& prefix)
{
    LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
         mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
    sterm.clear();
    oexp.clear();
    if (term.empty())
    return;


    bool haswild = term.find_first_of(cstr_minwilds) != string::npos;

    // If there are no wildcards, add term to the list of user-entered terms
    if (!haswild)
  m_hld.uterms.insert(term);

    bool nostemexp = (mods & SearchDataClause::SDCM_NOSTEMMING) != 0;

    // No stem expansion if there are wildcards or if prevented by caller
    if (haswild || m_stemlang.empty()) {
    LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
    nostemexp = true;
    }

    bool noexpansion = nostemexp && !haswild;


#ifndef RCL_INDEX_STRIPCHARS
    bool diac_sensitive = (mods & SearchDataClause::SDCM_DIACSENS) != 0;
    bool case_sensitive = (mods & SearchDataClause::SDCM_CASESENS) != 0;

    if (o_index_stripchars) {
  diac_sensitive = case_sensitive = false;
    } else {
  // If we are working with a raw index, apply the rules for case and 
  // diacritics sensitivity.

  // If any character has a diacritic, we become
  // diacritic-sensitive. Note that the way that the test is
  // performed (conversion+comparison) will automatically ignore
  // accented characters which are actually a separate letter
  if (unachasaccents(term))
      diac_sensitive = true;

  // If any character apart the first is uppercase, we become
  // case-sensitive.  The first character is reserved for
  // turning off stemming. You need to use a query language
  // modifier to search for Floor in a case-sensitive way.
  Utf8Iter it(term);
  it++;
  if (unachasuppercase(term.substr(it.getBpos())))
      case_sensitive = true;

  // If we are sensitive to case or diacritics turn stemming off
  if (diac_sensitive || case_sensitive)
      nostemexp = true;

  if (!case_sensitive || !diac_sensitive)
      noexpansion = false;
    }
#endif

    if (noexpansion) {
    sterm = term;
  oexp.push_back(prefix + term);
  LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
  return;
    } 

    SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
    XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all", 
                  &unacfoldtrans);
    vector<string> lexp;

    TermMatchResult res;
    if (haswild) {
  // Note that if there are wildcards, we do a direct from-index
  // expansion, which means that we are casediac-sensitive. There
  // would be nothing to prevent us to expand from the casediac
  // synonyms first. To be done later
    m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, 
             m_field);
  goto termmatchtoresult;
    }

    sterm = term;

#ifdef RCL_INDEX_STRIPCHARS

    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);

#else

    if (o_index_stripchars) {
  // If the index is raw, we can only come here if nostemexp is unset
  // and we just need stem expansion.
  m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
  goto termmatchtoresult;
    } 

    // No stem expansion when diacritic or case sensitivity is set, it
    // makes no sense (it would mess with the diacritics anyway if
    // they are not in the stem part).  In these 3 cases, perform
    // appropriate expansion from the charstripping db, and do a bogus
    // wildcard expansion (there is no wild card) to generate the
    // result:

    if (diac_sensitive && case_sensitive) {
  // No expansion whatsoever
  m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, m_field);
  goto termmatchtoresult;
    }

    if (diac_sensitive) {
  // Expand for accents and case, filtering for same accents,
  // then bogus wildcard expansion for generating result
  SynTermTransUnac foldtrans(UNACOP_FOLD);
  synac.synExpand(term, lexp, &foldtrans);
  goto exptotermatch;
    } 

    if (case_sensitive) {
  // Expand for accents and case, filtering for same case, then
  // bogus wildcard expansion for generating result
  SynTermTransUnac unactrans(UNACOP_UNAC);
  synac.synExpand(term, lexp, &unactrans);
  goto exptotermatch;
    }

    // We are neither accent- nor case- sensitive and may need stem
    // expansion or not.

    // Expand for accents and case
    synac.synExpand(term, lexp);
    LOGDEB(("ExpTerm: casediac: %s\n", stringsToString(lexp).c_str()));
    if (nostemexp)
  goto exptotermatch;

    // Need stem expansion. Lowercase the result of accent and case
    // expansion for input to stemdb.
    for (unsigned int i = 0; i < lexp.size(); i++) {
  string lower;
  unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
  lexp[i] = lower;
    }
    sort(lexp.begin(), lexp.end());
    {
  vector<string>::iterator uit = unique(lexp.begin(), lexp.end());
  lexp.resize(uit - lexp.begin());
  StemDb db(m_db.m_ndb->xrdb);
  vector<string> exp1;
  for (vector<string>::const_iterator it = lexp.begin(); 
       it != lexp.end(); it++) {
      db.stemExpand(m_stemlang, *it, exp1);
    }
  LOGDEB(("ExpTerm: stem: %s\n", stringsToString(exp1).c_str()));

  // Expand the resulting list for case (all stemdb content
  // is lowercase)
  lexp.clear();
  for (vector<string>::const_iterator it = exp1.begin(); 
       it != exp1.end(); it++) {
      synac.synExpand(*it, lexp);
  }
  sort(lexp.begin(), lexp.end());
  uit = unique(lexp.begin(), lexp.end());
  lexp.resize(uit - lexp.begin());
    }
    LOGDEB(("ExpTerm: case exp of stem: %s\n", stringsToString(lexp).c_str()));

    // Bogus wildcard expand to generate the result
exptotermatch:
    for (vector<string>::const_iterator it = lexp.begin();
   it != lexp.end(); it++) {
  m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, 
             res, -1, m_field);
    }
#endif

    // Term match entries to vector of terms
termmatchtoresult:
    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
     it != res.entries.end(); it++) {
    oexp.push_back(it->term);

    }
    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
}

// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
void multiply_groups(vector<vector<string> >::const_iterator vvit,
             vector<vector<string> >::const_iterator vvend, 
...
    // vector)
    comb.pop_back();
    }
}

void StringToXapianQ::processSimpleSpan(const string& span, 
                  int mods,
                    vector<Xapian::Query> &pqueries)
{
    LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
      span.c_str(), (unsigned int)mods));
    vector<string> exp;  
    string sterm; // dumb version of user term

    string prefix;
    const FieldTraits *ftp;
    if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
    prefix = wrap_prefix(ftp->pfx);
    }

    expandTerm(mods, span, exp, sterm, prefix);
    
    // Set up the highlight data. No prefix should go in there
    for (vector<string>::const_iterator it = exp.begin(); 
     it != exp.end(); it++) {
    m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
...
// User entry element had several terms: transform into a PHRASE or
// NEAR xapian query, the elements of which can themselves be OR
// queries if the terms get expanded by stemming or wildcards (we
// don't do stemming for PHRASE though)
void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData, 
                    int mods,
                      vector<Xapian::Query> &pqueries,
                      bool useNear, int slack)
{
    Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
    Xapian::Query::OP_PHRASE;
    vector<Xapian::Query> orqueries;
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
...
    vector<vector<string> >groups;

    string prefix;
    const FieldTraits *ftp;
    if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
    prefix = wrap_prefix(ftp->pfx);
    }

    if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {
    orqueries.push_back(Xapian::Query(prefix + start_of_field_term));
    slack++;
...
    bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE) 
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
        || hadmultiple
#endif // single OR inside NEAR
        ;
  int lmods = mods;
  if (nostemexp)
      lmods |= SearchDataClause::SDCM_NOSTEMMING;
    string sterm;
    vector<string> exp;
    expandTerm(lmods, *it, exp, sterm, prefix);
    LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
    listVector("", exp);
    // groups is used for highlighting, we don't want prefixes in there.
    vector<string> noprefs;
    for (vector<string>::const_iterator it = exp.begin(); 
...
 *     composition of the phrase terms (no stem expansion in this case)
 * @return the subquery count (either or'd stem-expanded terms or phrase word
 *   count)
 */
bool StringToXapianQ::processUserString(const string &iq,
                  int mods, 
                    string &ermsg,
                    vector<Xapian::Query> &pqueries,

                    int slack, 
                    bool useNear
                    )
{
    LOGDEB(("StringToXapianQ:: qstr [%s] mods 0x%x slack %d near %d\n", 
      iq.c_str(), mods, slack, useNear));
    ermsg.erase();

    const StopList stops = m_db.getStopList();

    // Simple whitespace-split input into user-level words and
    // double-quoted phrases: word1 word2 "this is a phrase". 
    //
    // The text splitter may further still decide that the resulting
...
    // expansion and transform into an appropriate Xapian::Query
    try {
    for (vector<string>::iterator it = phrases.begin(); 
         it != phrases.end(); it++) {
        LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));
      // Anchoring modifiers
        int amods = stringToMods(*it);
        int terminc = amods != 0 ? 1 : 0;
      mods |= amods;
        // If there are multiple spans in this element, including
        // at least one composite, we have to increase the slack
        // else a phrase query including a span would fail. 
        // Ex: "term0@term1 term2" is onlyspans-split as:
        //   0 term0@term1             0   12
...
        TermProcQ tpq;
        TermProc *nxt = &tpq;
            TermProcStop tpstop(nxt, stops); nxt = &tpstop;
            //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
            //tpcommon.onlygrams(true);
        TermProcPrep tpprep(nxt);
#ifndef RCL_INDEX_STRIPCHARS
      if (o_index_stripchars)
#endif
      nxt = &tpprep;

        TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | 
                         TextSplit::TXTS_KEEPWILD), 
              stops, nxt);
        tpq.setTSQ(&splitter);
        splitter.text_to_words(*it);

        slack += splitter.lastpos - splitter.terms.size() + 1;

        LOGDEB0(("strToXapianQ: termcount: %d\n", splitter.terms.size()));
        switch (splitter.terms.size() + terminc) {
        case 0: 
        continue;// ??
        case 1: {
      int lmods = mods;
      if (splitter.nostemexps.front())
          lmods |= SearchDataClause::SDCM_NOSTEMMING;
        m_hld.ugroups.push_back(vector<string>(1, *it));
        processSimpleSpan(splitter.terms.front(), lmods, pqueries);
      }
        break;
        default:
        m_hld.ugroups.push_back(vector<string>(1, *it));
        processPhraseOrNear(&splitter, mods, pqueries, useNear, slack);
        }
    }
    } catch (const Xapian::Error &e) {
    ermsg = e.get_msg();
    } catch (const string &s) {
...
    }
    return true;
}

// Translate a simple OR, AND, or EXCL search clause. 
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)

{


    LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
         getStemLang().c_str()));

    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

    Xapian::Query::op op;
...
    // do it if there are wildcards anywhere, this would skew the results.
    bool doBoostUserTerm = 
    (m_parentSearch && !m_parentSearch->haveWildCards()) || 
    (m_parentSearch == 0 && !m_haveWildCards);

    StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm);
    if (!tr.processUserString(m_text, getModifiers(), m_reason, pqueries))
    return false;
    if (pqueries.empty()) {
    LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
    return true;
    }
...
//
// We do not split the entry any more (used to do some crazy thing
// about expanding multiple fragments in the past. We just take the
// value blanks and all and expand this against the indexed unsplit
// file names
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)

{
    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

    vector<string> names;
...
    }
    return true;
}

// Translate NEAR or PHRASE clause. 
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)

{


    LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));

    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

...
    if (m_text.find('\"') != string::npos) {
    m_text = neutchars(m_text, "\"");
    }
    string s = cstr_dquote + m_text + cstr_dquote;
    bool useNear = (m_tp == SCLT_NEAR);
    StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm);
    if (!tr.processUserString(s, getModifiers(), m_reason, pqueries, 
                  m_slack, useNear))
    return false;
    if (pqueries.empty()) {
    LOGERR(("SearchDataClauseDist: resolved to null query\n"));
    return true;

	a/src/rcldb/searchdata.cpp		b/src/rcldb/searchdata.cpp
	...		...
14	* Free Software Foundation, Inc.,	14	* Free Software Foundation, Inc.,
15	* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.	15	* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
16	*/	16	*/
17		17
18	// Handle translation from rcl's SearchData structures to Xapian Queries	18	// Handle translation from rcl's SearchData structures to Xapian Queries
		19
		20	#include "autoconfig.h"
		21
19	#include <stdio.h>	22	#include <stdio.h>
20	#include <fnmatch.h>	23	#include <fnmatch.h>
21		24
22	#include <string>	25	#include <string>
23	#include <vector>	26	#include <vector>
24	#include <algorithm>	27	#include <algorithm>
		28	using namespace std;
25		29
26	#include "xapian.h"	30	#include "xapian.h"
27		31
28	#include "cstr.h"	32	#include "cstr.h"
29	#include "rcldb.h"	33	#include "rcldb.h"
		34	#include "rcldb_p.h"
30	#include "searchdata.h"	35	#include "searchdata.h"
31	#include "debuglog.h"	36	#include "debuglog.h"
32	#include "smallut.h"	37	#include "smallut.h"
33	#include "textsplit.h"	38	#include "textsplit.h"
34	#include "unacpp.h"	39	#include "unacpp.h"
35	#include "utf8iter.h"	40	#include "utf8iter.h"
36	#include "stoplist.h"	41	#include "stoplist.h"
37	#include "rclconfig.h"	42	#include "rclconfig.h"
38	#include "termproc.h"	43	#include "termproc.h"
		44	#include "synfamily.h"
		45	#include "stemdb.h"
		46	#include "expansiondbs.h"
39		47
40	#ifndef NO_NAMESPACES
41	using namespace std;
42	namespace Rcl {	48	namespace Rcl {
43	#endif
44		49
45	typedef vector<SearchDataClause *>::iterator qlist_it_t;	50	typedef vector<SearchDataClause *>::iterator qlist_it_t;
46	typedef vector<SearchDataClause *>::const_iterator qlist_cit_t;	51	typedef vector<SearchDataClause *>::const_iterator qlist_cit_t;
47		52
48	static const int original_term_wqf_booster = 10;	53	static const int original_term_wqf_booster = 10;
	...		...
69	* You should have received a copy of the GNU General Public License	74	* You should have received a copy of the GNU General Public License
70	* along with this program; if not, write to the Free Software	75	* along with this program; if not, write to the Free Software
71	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301	76	* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
72	* USA	77	* USA
73	*/	78	*/
		79
		80	#ifdef RCL_INDEX_STRIPCHARS
		81	#define bufprefix(BUF, L) {(BUF)[0] = L;}
		82	#define bpoffs() 1
		83	#else
		84	static inline void bufprefix(char *buf, char c)
		85	{
		86	if (o_index_stripchars) {
		87	buf[0] = c;
		88	} else {
		89	buf[0] = ':';
		90	buf[1] = c;
		91	buf[2] = ':';
		92	}
		93	}
		94	static inline int bpoffs()
		95	{
		96	return o_index_stripchars ? 1 : 3;
		97	}
		98	#endif
		99
74	static Xapian::Query	100	static Xapian::Query
75	date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)	101	date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
76	{	102	{
77	// Xapian uses a smallbuf and snprintf. Can't be bothered, we're	103	// Xapian uses a smallbuf and snprintf. Can't be bothered, we're
78	// only doing %d's !	104	// only doing %d's !
79	char buf[200];	105	char buf[200];
		106	bufprefix(buf, 'D');
80	sprintf(buf, "D%04d%02d", y1, m1);	107	sprintf(buf+bpoffs(), "%04d%02d", y1, m1);
81	vector<Xapian::Query> v;	108	vector<Xapian::Query> v;
82		109
83	int d_last = monthdays(m1, y1);	110	int d_last = monthdays(m1, y1);
84	int d_end = d_last;	111	int d_end = d_last;
85	if (y1 == y2 && m1 == m2 && d2 < d_last) {	112	if (y1 == y2 && m1 == m2 && d2 < d_last) {
86	d_end = d2;	113	d_end = d2;
87	}	114	}
88	// Deal with any initial partial month	115	// Deal with any initial partial month
89	if (d1 > 1 \|\| d_end < d_last) {	116	if (d1 > 1 \|\| d_end < d_last) {
90	for ( ; d1 <= d_end ; d1++) {	117	for ( ; d1 <= d_end ; d1++) {
91	sprintf(buf + 7, "%02d", d1);	118	sprintf(buf + 6 + bpoffs(), "%02d", d1);
92	v.push_back(Xapian::Query(buf));	119	v.push_back(Xapian::Query(buf));
93	}	120	}
94	} else {	121	} else {
95	buf[0] = 'M';	122	bufprefix(buf, 'M');
96	v.push_back(Xapian::Query(buf));	123	v.push_back(Xapian::Query(buf));
97	}	124	}
98		125
99	if (y1 == y2 && m1 == m2) {	126	if (y1 == y2 && m1 == m2) {
100	return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());	127	return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
101	}	128	}
102		129
103	int m_last = (y1 < y2) ? 12 : m2 - 1;	130	int m_last = (y1 < y2) ? 12 : m2 - 1;
104	while (++m1 <= m_last) {	131	while (++m1 <= m_last) {
105	sprintf(buf + 5, "%02d", m1);	132	sprintf(buf + 4 + bpoffs(), "%02d", m1);
106	buf[0] = 'M';	133	bufprefix(buf, 'M');
107	v.push_back(Xapian::Query(buf));	134	v.push_back(Xapian::Query(buf));
108	}	135	}
109		136
110	if (y1 < y2) {	137	if (y1 < y2) {
111	while (++y1 < y2) {	138	while (++y1 < y2) {
112	sprintf(buf + 1, "%04d", y1);	139	sprintf(buf + bpoffs(), "%04d", y1);
113	buf[0] = 'Y';	140	bufprefix(buf, 'Y');
114	v.push_back(Xapian::Query(buf));	141	v.push_back(Xapian::Query(buf));
115	}	142	}
116	sprintf(buf + 1, "%04d", y2);	143	sprintf(buf + bpoffs(), "%04d", y2);
117	buf[0] = 'M';	144	bufprefix(buf, 'M');
118	for (m1 = 1; m1 < m2; m1++) {	145	for (m1 = 1; m1 < m2; m1++) {
119	sprintf(buf + 5, "%02d", m1);	146	sprintf(buf + 4 + bpoffs(), "%02d", m1);
120	v.push_back(Xapian::Query(buf));	147	v.push_back(Xapian::Query(buf));
121	}	148	}
122	}	149	}
123		150
124	sprintf(buf + 5, "%02d", m2);	151	sprintf(buf + 2 + bpoffs(), "%02d", m2);
125		152
126	// Deal with any final partial month	153	// Deal with any final partial month
127	if (d2 < monthdays(m2, y2)) {	154	if (d2 < monthdays(m2, y2)) {
128	buf[0] = 'D';	155	bufprefix(buf, 'D');
129	for (d1 = 1 ; d1 <= d2; d1++) {	156	for (d1 = 1 ; d1 <= d2; d1++) {
130	sprintf(buf + 7, "%02d", d1);	157	sprintf(buf + 6 + bpoffs(), "%02d", d1);
131	v.push_back(Xapian::Query(buf));	158	v.push_back(Xapian::Query(buf));
132	}	159	}
133	} else {	160	} else {
134	buf[0] = 'M';	161	bufprefix(buf, 'M');
135	v.push_back(Xapian::Query(buf));	162	v.push_back(Xapian::Query(buf));
136	}	163	}
137		164
138	return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());	165	return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
139	}	166	}
	...		...
170	}	197	}
171	tps = exptps;	198	tps = exptps;
172	return true;	199	return true;
173	}	200	}
174		201
175	bool SearchData::toNativeQuery(Rcl::Db &db, void *d)	202	bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
		203	vector<SearchDataClause*>& query,
		204	string& reason, void *d)
176	{	205	{
177	LOGDEB2(("SearchData::toNativeQuery: stemlang [%s]\n",
178	m_stemlang.c_str()));
179	Xapian::Query xq;	206	Xapian::Query xq;
180	m_reason.erase();
181
182	// Walk the clause list translating each in turn and building the
183	// Xapian query tree
184	for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {	207	for (qlist_it_t it = query.begin(); it != query.end(); it++) {
185	Xapian::Query nq;	208	Xapian::Query nq;
186	if (!(*it)->toNativeQuery(db, &nq, m_stemlang)) {	209	if (!(*it)->toNativeQuery(db, &nq)) {
187	LOGERR(("SearchData::toNativeQuery: failed\n"));	210	LOGERR(("SearchData::clausesToQuery: toNativeQuery failed\n"));
188	m_reason = (*it)->getReason();	211	reason = (*it)->getReason();
189	return false;	212	return false;
190	}	213	}
191	if (nq.empty()) {	214	if (nq.empty()) {
192	LOGDEB(("SearchData::toNativeQuery: skipping empty clause\n"));	215	LOGDEB(("SearchData::clausesToQuery: skipping empty clause\n"));
193	continue;	216	continue;
194	}	217	}
195	// If this structure is an AND list, must use AND_NOT for excl clauses.	218	// If this structure is an AND list, must use AND_NOT for excl clauses.
196	// Else this is an OR list, and there can't be excl clauses (checked by	219	// Else this is an OR list, and there can't be excl clauses (checked by
197	// addClause())	220	// addClause())
198	Xapian::Query::op op;	221	Xapian::Query::op op;
199	if (m_tp == SCLT_AND) {	222	if (tp == SCLT_AND) {
200	if ((*it)->m_tp == SCLT_EXCL) {	223	if ((*it)->m_tp == SCLT_EXCL) {
201	op = Xapian::Query::OP_AND_NOT;	224	op = Xapian::Query::OP_AND_NOT;
202	} else {	225	} else {
203	op = Xapian::Query::OP_AND;	226	op = Xapian::Query::OP_AND;
204	}	227	}
	...		...
214	xq = Xapian::Query(op, xq, nq);	237	xq = Xapian::Query(op, xq, nq);
215	}	238	}
216	}	239	}
217	if (xq.empty())	240	if (xq.empty())
218	xq = Xapian::Query::MatchAll;	241	xq = Xapian::Query::MatchAll;
		242
		243	((Xapian::Query )d) = xq;
		244	return true;
		245	}
		246
		247	bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
		248	{
		249	LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
		250	m_reason.erase();
		251
		252	// Walk the clause list translating each in turn and building the
		253	// Xapian query tree
		254	Xapian::Query xq;
		255	if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
		256	LOGERR(("SearchData::toNativeQuery: clausesToQuery failed\n"));
		257	return false;
		258	}
219		259
220	if (m_haveDates) {	260	if (m_haveDates) {
221	// If one of the extremities is unset, compute db extremas	261	// If one of the extremities is unset, compute db extremas
222	if (m_dates.y1 == 0 \|\| m_dates.y2 == 0) {	262	if (m_dates.y1 == 0 \|\| m_dates.y2 == 0) {
223	int minyear = 1970, maxyear = 2100;	263	int minyear = 1970, maxyear = 2100;
	...		...
324	dit != m_dirspecs.end(); dit++) {	364	dit != m_dirspecs.end(); dit++) {
325	vector<string> vpath;	365	vector<string> vpath;
326	stringToTokens(dit->dir, vpath, "/");	366	stringToTokens(dit->dir, vpath, "/");
327	vector<string> pvpath;	367	vector<string> pvpath;
328	if (dit->dir[0] == '/')	368	if (dit->dir[0] == '/')
329	pvpath.push_back(pathelt_prefix);	369	pvpath.push_back(wrap_prefix(pathelt_prefix));
330	for (vector<string>::const_iterator pit = vpath.begin();	370	for (vector<string>::const_iterator pit = vpath.begin();
331	pit != vpath.end(); pit++){	371	pit != vpath.end(); pit++){
332	pvpath.push_back(pathelt_prefix + *pit);	372	pvpath.push_back(wrap_prefix(pathelt_prefix) + *pit);
333	}	373	}
334	Xapian::Query::op tdop;	374	Xapian::Query::op tdop;
335	if (dit->weight == 1.0) {	375	if (dit->weight == 1.0) {
336	tdop = dit->exclude ?	376	tdop = dit->exclude ?
337	Xapian::Query::OP_AND_NOT : Xapian::Query::OP_FILTER;	377	Xapian::Query::OP_AND_NOT : Xapian::Query::OP_FILTER;
	...		...
444	addClause(nclp);	484	addClause(nclp);
445	} else {	485	} else {
446	// My type is AND. Change it to OR and insert two queries, one	486	// My type is AND. Change it to OR and insert two queries, one
447	// being the original query as a subquery, the other the	487	// being the original query as a subquery, the other the
448	// phrase.	488	// phrase.
449	SearchData *sd = new SearchData(m_tp);	489	SearchData *sd = new SearchData(m_tp, m_stemlang);
450	sd->m_query = m_query;	490	sd->m_query = m_query;
451	sd->m_stemlang = m_stemlang;	491	sd->m_stemlang = m_stemlang;
452	m_tp = SCLT_OR;	492	m_tp = SCLT_OR;
453	m_query.clear();	493	m_query.clear();
454	SearchDataClauseSub *oq =	494	SearchDataClauseSub *oq =
	...		...
584	: m_db(db), m_field(field), m_stemlang(stmlng),	624	: m_db(db), m_field(field), m_stemlang(stmlng),
585	m_doBoostUserTerms(boostUser), m_hld(hld)	625	m_doBoostUserTerms(boostUser), m_hld(hld)
586	{ }	626	{ }
587		627
588	bool processUserString(const string &iq,	628	bool processUserString(const string &iq,
		629	int mods,
589	string &ermsg,	630	string &ermsg,
590	vector<Xapian::Query> &pqueries,	631	vector<Xapian::Query> &pqueries,
591	const StopList &stops,
592	int slack = 0, bool useNear = false);	632	int slack = 0, bool useNear = false);
593	private:	633	private:
594	void expandTerm(bool dont, const string& term, vector<string>& exp,	634	void expandTerm(int mods,
		635	const string& term, vector<string>& exp,
595	string& sterm, const string& prefix);	636	string& sterm, const string& prefix);
596	// After splitting entry on whitespace: process non-phrase element	637	// After splitting entry on whitespace: process non-phrase element
597	void processSimpleSpan(const string& span, bool nostemexp,	638	void processSimpleSpan(const string& span,
		639	int mods,
598	vector<Xapian::Query> &pqueries);	640	vector<Xapian::Query> &pqueries);
599	// Process phrase/near element	641	// Process phrase/near element
600	void processPhraseOrNear(TextSplitQ *splitData,	642	void processPhraseOrNear(TextSplitQ *splitData,
		643	int mods,
601	vector<Xapian::Query> &pqueries,	644	vector<Xapian::Query> &pqueries,
602	bool useNear, int slack, int mods);	645	bool useNear, int slack);
603		646
604	Db& m_db;	647	Db& m_db;
605	const string& m_field;	648	const string& m_field;
606	const string& m_stemlang;	649	const string& m_stemlang;
607	bool m_doBoostUserTerms;	650	const bool m_doBoostUserTerms;
608	HighlightData& m_hld;	651	HighlightData& m_hld;
609	};	652	};
610		653
611	#if 1	654	#if 1
612	static void listVector(const string& what, const vector<string>&l)	655	static void listVector(const string& what, const vector<string>&l)
	...		...
617	}	660	}
618	LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));	661	LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
619	}	662	}
620	#endif	663	#endif
621		664
622	/** Take simple term and expand stem and wildcards	665	/** Expand term into term list, using appropriate mode: stem, wildcards,
		666	* diacritics...
623	*	667	*
624	* @param nostemexp don't perform stem expansion. This is mainly used to	668	* @param mods stem expansion, case and diacritics sensitivity control.
625	* prevent stem expansion inside phrases (because the user probably
626	* does not expect it). This does NOT prevent wild card expansion.
627	* Other factors than nostemexp can prevent stem expansion:
628	* a null stemlang, resulting from a global user preference, a
629	* capitalized term, or wildcard(s)
630	* @param term input single word	669	* @param term input single word
631	* @param exp output expansion list	670	* @param exp output expansion list
632	* @param sterm output original input term if there were no wildcards	671	* @param sterm output original input term if there were no wildcards
		672	* @param prefix field prefix in index. We could recompute it, but the caller
		673	* has it already. Used in the simple case where there is nothing to expand,
		674	* and we just return the prefixed term (else Db::termMatch deals with it).
633	*/	675	*/
634	void StringToXapianQ::expandTerm(bool nostemexp,	676	void StringToXapianQ::expandTerm(int mods,
635	const string& term,	677	const string& term,
636	vector<string>& exp,	678	vector<string>& oexp, string &sterm,
637	string &sterm, const string& prefix)	679	const string& prefix)
638	{	680	{
639	LOGDEB2(("expandTerm: field [%s] term [%s] stemlang [%s] nostemexp %d\n",	681	LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
640	m_field.c_str(), term.c_str(), m_stemlang.c_str(), nostemexp));	682	mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
641	sterm.erase();	683	sterm.clear();
642	exp.clear();	684	oexp.clear();
643	if (term.empty()) {	685	if (term.empty())
644	return;	686	return;
645	}
646		687
647	bool haswild = term.find_first_of(cstr_minwilds) != string::npos;	688	bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
648		689
		690	// If there are no wildcards, add term to the list of user-entered terms
		691	if (!haswild)
		692	m_hld.uterms.insert(term);
		693
		694	bool nostemexp = (mods & SearchDataClause::SDCM_NOSTEMMING) != 0;
		695
649	// No stemming if there are wildcards or prevented globally.	696	// No stem expansion if there are wildcards or if prevented by caller
650	if (haswild \|\| m_stemlang.empty()) {	697	if (haswild \|\| m_stemlang.empty()) {
651	LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));	698	LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
652	nostemexp = true;	699	nostemexp = true;
653	}	700	}
654		701
655	if (!haswild)	702	bool noexpansion = nostemexp && !haswild;
656	m_hld.uterms.insert(term);
657		703
658	if (nostemexp && !haswild) {	704	#ifndef RCL_INDEX_STRIPCHARS
		705	bool diac_sensitive = (mods & SearchDataClause::SDCM_DIACSENS) != 0;
		706	bool case_sensitive = (mods & SearchDataClause::SDCM_CASESENS) != 0;
		707
		708	if (o_index_stripchars) {
		709	diac_sensitive = case_sensitive = false;
		710	} else {
		711	// If we are working with a raw index, apply the rules for case and
		712	// diacritics sensitivity.
		713
		714	// If any character has a diacritic, we become
		715	// diacritic-sensitive. Note that the way that the test is
		716	// performed (conversion+comparison) will automatically ignore
		717	// accented characters which are actually a separate letter
		718	if (unachasaccents(term))
		719	diac_sensitive = true;
		720
		721	// If any character apart the first is uppercase, we become
		722	// case-sensitive. The first character is reserved for
		723	// turning off stemming. You need to use a query language
		724	// modifier to search for Floor in a case-sensitive way.
		725	Utf8Iter it(term);
		726	it++;
		727	if (unachasuppercase(term.substr(it.getBpos())))
		728	case_sensitive = true;
		729
		730	// If we are sensitive to case or diacritics turn stemming off
		731	if (diac_sensitive \|\| case_sensitive)
		732	nostemexp = true;
		733
		734	if (!case_sensitive \|\| !diac_sensitive)
		735	noexpansion = false;
		736	}
		737	#endif
		738
		739	if (noexpansion) {
659	sterm = term;	740	sterm = term;
660	exp.resize(1);	741	oexp.push_back(prefix + term);
661	exp[0] = prefix + term;	742	LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
662	} else {	743	return;
		744	}
		745
		746	SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
		747	XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all",
		748	&unacfoldtrans);
		749	vector<string> lexp;
		750
663	TermMatchResult res;	751	TermMatchResult res;
664	if (haswild) {	752	if (haswild) {
		753	// Note that if there are wildcards, we do a direct from-index
		754	// expansion, which means that we are casediac-sensitive. There
		755	// would be nothing to prevent us to expand from the casediac
		756	// synonyms first. To be done later
665	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1,	757	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1,
666	m_field);	758	m_field);
667	} else {	759	goto termmatchtoresult;
		760	}
		761
668	sterm = term;	762	sterm = term;
		763
		764	#ifdef RCL_INDEX_STRIPCHARS
		765
669	m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,	766	m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
670	m_field);	767
		768	#else
		769
		770	if (o_index_stripchars) {
		771	// If the index is raw, we can only come here if nostemexp is unset
		772	// and we just need stem expansion.
		773	m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
		774	goto termmatchtoresult;
		775	}
		776
		777	// No stem expansion when diacritic or case sensitivity is set, it
		778	// makes no sense (it would mess with the diacritics anyway if
		779	// they are not in the stem part). In these 3 cases, perform
		780	// appropriate expansion from the charstripping db, and do a bogus
		781	// wildcard expansion (there is no wild card) to generate the
		782	// result:
		783
		784	if (diac_sensitive && case_sensitive) {
		785	// No expansion whatsoever
		786	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, m_field);
		787	goto termmatchtoresult;
		788	}
		789
		790	if (diac_sensitive) {
		791	// Expand for accents and case, filtering for same accents,
		792	// then bogus wildcard expansion for generating result
		793	SynTermTransUnac foldtrans(UNACOP_FOLD);
		794	synac.synExpand(term, lexp, &foldtrans);
		795	goto exptotermatch;
		796	}
		797
		798	if (case_sensitive) {
		799	// Expand for accents and case, filtering for same case, then
		800	// bogus wildcard expansion for generating result
		801	SynTermTransUnac unactrans(UNACOP_UNAC);
		802	synac.synExpand(term, lexp, &unactrans);
		803	goto exptotermatch;
		804	}
		805
		806	// We are neither accent- nor case- sensitive and may need stem
		807	// expansion or not.
		808
		809	// Expand for accents and case
		810	synac.synExpand(term, lexp);
		811	LOGDEB(("ExpTerm: casediac: %s\n", stringsToString(lexp).c_str()));
		812	if (nostemexp)
		813	goto exptotermatch;
		814
		815	// Need stem expansion. Lowercase the result of accent and case
		816	// expansion for input to stemdb.
		817	for (unsigned int i = 0; i < lexp.size(); i++) {
		818	string lower;
		819	unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
		820	lexp[i] = lower;
		821	}
		822	sort(lexp.begin(), lexp.end());
		823	{
		824	vector<string>::iterator uit = unique(lexp.begin(), lexp.end());
		825	lexp.resize(uit - lexp.begin());
		826	StemDb db(m_db.m_ndb->xrdb);
		827	vector<string> exp1;
		828	for (vector<string>::const_iterator it = lexp.begin();
		829	it != lexp.end(); it++) {
		830	db.stemExpand(m_stemlang, *it, exp1);
671	}	831	}
		832	LOGDEB(("ExpTerm: stem: %s\n", stringsToString(exp1).c_str()));
		833
		834	// Expand the resulting list for case (all stemdb content
		835	// is lowercase)
		836	lexp.clear();
		837	for (vector<string>::const_iterator it = exp1.begin();
		838	it != exp1.end(); it++) {
		839	synac.synExpand(*it, lexp);
		840	}
		841	sort(lexp.begin(), lexp.end());
		842	uit = unique(lexp.begin(), lexp.end());
		843	lexp.resize(uit - lexp.begin());
		844	}
		845	LOGDEB(("ExpTerm: case exp of stem: %s\n", stringsToString(lexp).c_str()));
		846
		847	// Bogus wildcard expand to generate the result
		848	exptotermatch:
		849	for (vector<string>::const_iterator it = lexp.begin();
		850	it != lexp.end(); it++) {
		851	m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it,
		852	res, -1, m_field);
		853	}
		854	#endif
		855
		856	// Term match entries to vector of terms
		857	termmatchtoresult:
672	for (vector<TermMatchEntry>::const_iterator it = res.entries.begin();	858	for (vector<TermMatchEntry>::const_iterator it = res.entries.begin();
673	it != res.entries.end(); it++) {	859	it != res.entries.end(); it++) {
674	exp.push_back(it->term);	860	oexp.push_back(it->term);
675	}
676	}	861	}
		862	LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
677	}	863	}
678		864
679	// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d	865	// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
680	void multiply_groups(vector<vector<string> >::const_iterator vvit,	866	void multiply_groups(vector<vector<string> >::const_iterator vvit,
681	vector<vector<string> >::const_iterator vvend,	867	vector<vector<string> >::const_iterator vvend,
	...		...
708	// vector)	894	// vector)
709	comb.pop_back();	895	comb.pop_back();
710	}	896	}
711	}	897	}
712		898
713	void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,	899	void StringToXapianQ::processSimpleSpan(const string& span,
		900	int mods,
714	vector<Xapian::Query> &pqueries)	901	vector<Xapian::Query> &pqueries)
715	{	902	{
716	LOGDEB2(("StringToXapianQ::processSimpleSpan: [%s] nostemexp %d\n",	903	LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
717	span.c_str(), int(nostemexp)));	904	span.c_str(), (unsigned int)mods));
718	vector<string> exp;	905	vector<string> exp;
719	string sterm; // dumb version of user term	906	string sterm; // dumb version of user term
720		907
721	string prefix;	908	string prefix;
722	const FieldTraits *ftp;	909	const FieldTraits *ftp;
723	if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {	910	if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {