recoll / Code / Diff of /src/rcldb/searchdata.cpp

Diff of /src/rcldb/searchdata.cpp [4982e9] .. [844f4f]

Switch to side-by-side view

--- a/src/rcldb/searchdata.cpp
+++ b/src/rcldb/searchdata.cpp
@@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.17 2007-06-22 06:14:04 dockes Exp $ (C) 2006 J.F.Dockes";
+static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.18 2007-09-20 08:43:12 dockes Exp $ (C) 2006 J.F.Dockes";
 #endif
 /*
  *   This program is free software; you can redistribute it and/or modify
@@ -366,18 +366,25 @@
 	     it != phrases.end(); it++) {
 	    LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
 
-	    // If there are both spans and single words in this element,
-	    // we need to use a word split, else a phrase query including
-	    // a span would fail if we didn't adjust the proximity to
-	    // account for the additional span term which is complicated.
+	    // If there are multiple spans in this element, including
+	    // at least one composite, we need to use a word split,
+	    // else a phrase query including a span would fail. 
+	    // (other possible solution: adjust slack to account for the
+	    //  additional position increase?)
+	    // Ex: "term0@term01 term1" is onlyspans-split as:
+	    //   0 term0@term01            0   12
+	    //   2 term1                  13   18
+	    // The position of term1 is 2, not 1, so the phrase search would
+	    // fail. We search for "term0 term01 term1" instead, which may 
+	    // have worse performance, but will succeed.
 	    wsQData splitDataS(stops), splitDataW(stops);
-	    TextSplit splitterS(&splitDataS, (TextSplit::Flags)
-				(TextSplit::TXTS_ONLYSPANS | 
-				 TextSplit::TXTS_KEEPWILD));
+	    TextSplit splitterS(&splitDataS, 
+				TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | 
+						 TextSplit::TXTS_KEEPWILD));
 	    splitterS.text_to_words(*it);
-	    TextSplit splitterW(&splitDataW, (TextSplit::Flags)
-				(TextSplit::TXTS_NOSPANS | 
-				 TextSplit::TXTS_KEEPWILD));
+	    TextSplit splitterW(&splitDataW, 
+				TextSplit::Flags(TextSplit::TXTS_NOSPANS | 
+						 TextSplit::TXTS_KEEPWILD));
 	    splitterW.text_to_words(*it);
 	    wsQData *splitData = &splitDataS;
 	    if (splitDataS.terms.size() > 1 && 
@@ -389,12 +396,12 @@
 	    switch (splitData->terms.size()) {
 	    case 0: continue;// ??
 	    case 1: 
-		// Not a real phrase: one term. Still may be expanded
-		// (stem or wildcard)
+		// Just a term. Still may be expanded (by stem or
+		// wildcard) to an OR list.
 		{
 		    string term = splitData->terms.front();
 		    list<string> exp;  
-		    string sterm;
+		    string sterm; // dumb version of user term
 		    stripExpandTerm(false, term, exp, sterm);
 		    m_terms.insert(m_terms.end(), exp.begin(), exp.end());
 		    // Push either term or OR of stem-expanded set
@@ -417,10 +424,11 @@
 		break;
 
 	    default:
-		// Phrase/near: transform into a PHRASE or NEAR xapian
-		// query, the element of which can themselves be OR
-		// queries if the terms get expanded by stemming or
-		// wildcards (we don't do stemming for PHRASE though)
+		// Element had several terms: transform into a PHRASE
+		// or NEAR xapian query, the elements of which can
+		// themselves be OR queries if the terms get expanded
+		// by stemming or wildcards (we don't do stemming for
+		// PHRASE though)
 		Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
 		Xapian::Query::OP_PHRASE;
 		list<Xapian::Query> orqueries;