recoll / Code / Diff of /src/query/plaintorich.cpp

Diff of /src/query/plaintorich.cpp [ebdd6f] .. [dc7b34]

Switch to side-by-side view

--- a/src/query/plaintorich.cpp
+++ b/src/query/plaintorich.cpp
@@ -24,12 +24,10 @@
 #include <map>
 #include <algorithm>
 
-#ifndef NO_NAMESPACES
 using std::vector;
 using std::list;
 using std::pair;
 using std::set;
-#endif /* NO_NAMESPACES */
 
 #include "rcldb.h"
 #include "rclconfig.h"
@@ -51,28 +49,30 @@
     return sterms;
 }
 
-// Text splitter callback used to take note of the position of query terms 
-// inside the result text. This is then used to insert highlight tags. 
+// Text splitter used to take note of the position of query terms
+// inside the result text. This is then used to insert highlight tags.
 class TextSplitPTR : public TextSplit {
  public:
 
     // Out: begin and end byte positions of query terms/groups in text
     vector<pair<int, int> > tboffs;  
 
-    TextSplitPTR(const vector<string>& its, 
-                 const vector<vector<string> >&groups, 
-                 const vector<int>& slacks) 
-	:  m_wcount(0), m_groups(groups), m_slacks(slacks)
+    TextSplitPTR(const HighlightData& hdata)
+    :  m_wcount(0), m_hdata(hdata)
     {
-	for (vector<string>::const_iterator it = its.begin(); 
-	     it != its.end(); it++) {
-	    m_terms.insert(*it);
-	}
-	for (vector<vector<string> >::const_iterator vit = m_groups.begin(); 
-	     vit != m_groups.end(); vit++) {
-	    for (vector<string>::const_iterator it = (*vit).begin(); 
-		 it != (*vit).end(); it++) {
-		m_gterms.insert(*it);
+	// We separate single terms and groups and extract the group
+	// terms for computing positions list before looking for group
+	// matches
+
+	for (vector<vector<string> >::const_iterator vit = hdata.groups.begin();
+	     vit != hdata.groups.end(); vit++) {
+	    if (vit->size() == 1) {
+		m_terms.insert(vit->front());
+	    } else if (vit->size() > 1) {
+		for (vector<string>::const_iterator it = vit->begin(); 
+		     it != vit->end(); it++) {
+		    m_gterms.insert(*it);
+		}
 	    }
 	}
     }
@@ -116,15 +116,16 @@
 private:
     virtual bool matchGroup(const vector<string>& terms, int dist);
 
+    // Word count. Used to call checkCancel from time to time.
     int m_wcount;
 
     // In: user query terms
     set<string>    m_terms; 
 
-    // In: user query groups, for near/phrase searches.
-    const vector<vector<string> >& m_groups;
-    const vector<int>&             m_slacks;
-    set<string>                    m_gterms;
+    // m_gterms holds all the terms in m_groups, as a set for quick lookup
+    set<string>    m_gterms;
+
+    const HighlightData& m_hdata;
 
     // group/near terms word positions.
     map<string, vector<int> > m_plists;
@@ -294,10 +295,11 @@
 // handle all groups as NEAR (ignore order).
 bool TextSplitPTR::matchGroups()
 {
-    vector<vector<string> >::const_iterator vit = m_groups.begin();
-    vector<int>::const_iterator sit = m_slacks.begin();
-    for (; vit != m_groups.end() && sit != m_slacks.end(); vit++, sit++) {
-	matchGroup(*vit, *sit + (*vit).size());
+    for (unsigned int i = 0; i < m_hdata.groups.size(); i++) {
+	if (m_hdata.groups[i].size() <= 1)
+	    continue;
+	matchGroup(m_hdata.groups[i], 
+		   m_hdata.groups[i].size() + m_hdata.slacks[i]);
     }
 
     // Sort regions by increasing start and decreasing width.  
@@ -317,39 +319,22 @@
 // the input is html, the body is always a single output chunk.
 bool PlainToRich::plaintorich(const string& in, 
 			      list<string>& out, // Output chunk list
-			      const HiliteData& hdata,
+			      const HighlightData& hdata,
 			      int chunksize)
 {
     Chrono chron;
-    const vector<string>& terms(hdata.terms);
-    const vector<vector<string> >& groups(hdata.groups);
-    const vector<int>& slacks(hdata.gslks);
-
-    if (0 && DebugLog::getdbl()->getlevel() >= DEBDEB0) {
-	string sterms = vecStringToString(terms);
-	LOGDEB0(("plaintorich: terms: %s\n", sterms.c_str()));
-	sterms.clear();
-	for (vector<vector<string> >::const_iterator vit = groups.begin(); 
-	     vit != groups.end(); vit++) {
-	    sterms += "GROUP: ";
-	    sterms += vecStringToString(*vit);
-	    sterms += "\n";
-	}
-	LOGDEB0(("plaintorich: groups:\n %s", sterms.c_str()));
-        LOGDEB2(("  TEXT:[%s]\n", in.c_str()));
-    }
 
     // Compute the positions for the query terms.  We use the text
     // splitter to break the text into words, and compare the words to
     // the search terms,
-    TextSplitPTR splitter(terms, groups, slacks);
+    TextSplitPTR splitter(hdata);
     // Note: the splitter returns the term locations in byte, not
     // character, offsets.
     splitter.text_to_words(in);
     LOGDEB2(("plaintorich: split done %d mS\n", chron.millis()));
-
     // Compute the positions for NEAR and PHRASE groups.
     splitter.matchGroups();
+    LOGDEB2(("plaintorich: group match done %d mS\n", chron.millis()));
 
     out.clear();
     out.push_back("");