recoll / Code / Diff of /src/rcldb/rclquery.cpp

Diff of /src/rcldb/rclquery.cpp [5c46db] .. [b99372]

Switch to side-by-side view

--- a/src/rcldb/rclquery.cpp
+++ b/src/rcldb/rclquery.cpp
@@ -51,11 +51,11 @@
 static const string& docfToDatf(const string& df)
 {
     if (!df.compare(Doc::keytt)) {
-	return cstr_caption;
+        return cstr_caption;
     } else if (!df.compare(Doc::keymt)) {
-	return cstr_dmtime;
+        return cstr_dmtime;
     } else {
-	return df;
+        return df;
     }
 }
 
@@ -70,70 +70,70 @@
 #endif
 public:
     QSorter(const string& f) 
-	: m_fld(docfToDatf(f) + "=") 
+        : m_fld(docfToDatf(f) + "=") 
     {
-	m_ismtime = !m_fld.compare("dmtime=");
-	if (m_ismtime)
-	    m_issize = false;
-	else 
-	    m_issize = !m_fld.compare("fbytes=") || !m_fld.compare("dbytes=") ||
-		!m_fld.compare("pcbytes=");
+        m_ismtime = !m_fld.compare("dmtime=");
+        if (m_ismtime)
+            m_issize = false;
+        else 
+            m_issize = !m_fld.compare("fbytes=") || !m_fld.compare("dbytes=") ||
+                !m_fld.compare("pcbytes=");
     }
 
     virtual std::string operator()(const Xapian::Document& xdoc) const 
     {
-	string data = xdoc.get_data();
-	// It would be simpler to do the record->Rcl::Doc thing, but
-	// hand-doing this will be faster. It makes more assumptions
-	// about the format than a ConfTree though:
-	string::size_type i1, i2;
-	i1 = data.find(m_fld);
-	if (i1 == string::npos) {
-	    if (m_ismtime) {
-		// Ugly: specialcase mtime as it's either dmtime or fmtime
-		i1 = data.find("fmtime=");
-		if (i1 == string::npos) {
-		    return string();
-		}
-	    } else {
-		return string();
-	    }
-	}
-	i1 += m_fld.length();
-	if (i1 >= data.length())
-	    return string();
-	i2 = data.find_first_of("\n\r", i1);
-	if (i2 == string::npos)
-	    return string();
-
-	string term = data.substr(i1, i2-i1);
-	if (m_ismtime) {
-	    return term;
-	} else if (m_issize) {
-	    // Left zeropad values for appropriate numeric sorting
-	    leftzeropad(term, 12);
-	    return term;
-	}
-
-	// Process data for better sorting. We should actually do the
-	// unicode thing
-	// (http://unicode.org/reports/tr10/#Introduction), but just
-	// removing accents and majuscules will remove the most
-	// glaring weirdnesses (or not, depending on your national
-	// approach to collating...)
-	string sortterm;
-	// We're not even sure the term is utf8 here (ie: url)
-	if (!unacmaybefold(term, sortterm, "UTF-8", UNACOP_UNACFOLD)) {
-	    sortterm = term;
-	}
-	// Also remove some common uninteresting starting characters
-	i1 = sortterm.find_first_not_of(" \t\\\"'([*+,.#/");
-	if (i1 != 0 && i1 != string::npos) {
-	    sortterm = sortterm.substr(i1, sortterm.size()-i1);
-	}
-
-	LOGDEB2("QSorter: ["  << (term) << "] -> ["  << (sortterm) << "]\n" );
-	return sortterm;
+        string data = xdoc.get_data();
+        // It would be simpler to do the record->Rcl::Doc thing, but
+        // hand-doing this will be faster. It makes more assumptions
+        // about the format than a ConfTree though:
+        string::size_type i1, i2;
+        i1 = data.find(m_fld);
+        if (i1 == string::npos) {
+            if (m_ismtime) {
+                // Ugly: specialcase mtime as it's either dmtime or fmtime
+                i1 = data.find("fmtime=");
+                if (i1 == string::npos) {
+                    return string();
+                }
+            } else {
+                return string();
+            }
+        }
+        i1 += m_fld.length();
+        if (i1 >= data.length())
+            return string();
+        i2 = data.find_first_of("\n\r", i1);
+        if (i2 == string::npos)
+            return string();
+
+        string term = data.substr(i1, i2-i1);
+        if (m_ismtime) {
+            return term;
+        } else if (m_issize) {
+            // Left zeropad values for appropriate numeric sorting
+            leftzeropad(term, 12);
+            return term;
+        }
+
+        // Process data for better sorting. We should actually do the
+        // unicode thing
+        // (http://unicode.org/reports/tr10/#Introduction), but just
+        // removing accents and majuscules will remove the most
+        // glaring weirdnesses (or not, depending on your national
+        // approach to collating...)
+        string sortterm;
+        // We're not even sure the term is utf8 here (ie: url)
+        if (!unacmaybefold(term, sortterm, "UTF-8", UNACOP_UNACFOLD)) {
+            sortterm = term;
+        }
+        // Also remove some common uninteresting starting characters
+        i1 = sortterm.find_first_not_of(" \t\\\"'([*+,.#/");
+        if (i1 != 0 && i1 != string::npos) {
+            sortterm = sortterm.substr(i1, sortterm.size()-i1);
+        }
+
+        LOGDEB2("QSorter: [" << term << "] -> [" << sortterm << "]\n");
+        return sortterm;
     }
 
 private:
@@ -147,26 +147,27 @@
       m_collapseDuplicates(false), m_resCnt(-1), m_snipMaxPosWalk(1000000)
 {
     if (db)
-	db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
+        db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
 }
 
 Query::~Query()
 {
     deleteZ(m_nq);
     if (m_sorter) {
-	delete (QSorter*)m_sorter;
-	m_sorter = 0;
+        delete (QSorter*)m_sorter;
+        m_sorter = 0;
     }
 }
 
 void Query::setSortBy(const string& fld, bool ascending) {
     if (fld.empty()) {
-	m_sortField.erase();
+        m_sortField.erase();
     } else {
-	m_sortField = m_db->getConf()->fieldQCanon(fld);
-	m_sortAscending = ascending;
-    }
-    LOGDEB0("RclQuery::setSortBy: ["  << (m_sortField) << "] "  << (m_sortAscending ? "ascending" : "descending") << "\n" );
+        m_sortField = m_db->getConf()->fieldQCanon(fld);
+        m_sortAscending = ascending;
+    }
+    LOGDEB0("RclQuery::setSortBy: [" << m_sortField << "] " <<
+            (m_sortAscending ? "ascending" : "descending") << "\n");
 }
 
 //#define ISNULL(X) (X).isNull()
@@ -175,11 +176,11 @@
 // Prepare query out of user search data
 bool Query::setQuery(std::shared_ptr<SearchData> sdata)
 {
-    LOGDEB("Query::setQuery:\n" );
+    LOGDEB("Query::setQuery:\n");
 
     if (!m_db || ISNULL(m_nq)) {
-	LOGERR("Query::setQuery: not initialised!\n" );
-	return false;
+        LOGERR("Query::setQuery: not initialised!\n");
+        return false;
     }
     m_resCnt = -1;
     m_reason.erase();
@@ -189,33 +190,33 @@
     
     Xapian::Query xq;
     if (!sdata->toNativeQuery(*m_db, &xq)) {
-	m_reason += sdata->getReason();
-	return false;
+        m_reason += sdata->getReason();
+        return false;
     }
 
     m_nq->xquery = xq;
 
     string d;
     for (int tries = 0; tries < 2; tries++) {
-	try {
+        try {
             m_nq->xenquire = new Xapian::Enquire(m_db->m_ndb->xrdb);
             if (m_collapseDuplicates) {
                 m_nq->xenquire->set_collapse_key(Rcl::VALUE_MD5);
             } else {
                 m_nq->xenquire->set_collapse_key(Xapian::BAD_VALUENO);
             }
-	    m_nq->xenquire->set_docid_order(Xapian::Enquire::DONT_CARE);
+            m_nq->xenquire->set_docid_order(Xapian::Enquire::DONT_CARE);
             if (!m_sortField.empty() && 
-		stringlowercmp("relevancyrating", m_sortField)) {
+                stringlowercmp("relevancyrating", m_sortField)) {
                 if (m_sorter) {
                     delete (QSorter*)m_sorter;
                     m_sorter = 0;
                 }
-		m_sorter = new QSorter(m_sortField);
-		// It really seems there is a xapian bug about sort order, we 
-		// invert here.
-		m_nq->xenquire->set_sort_by_key((QSorter*)m_sorter, 
-						!m_sortAscending);
+                m_sorter = new QSorter(m_sortField);
+                // It really seems there is a xapian bug about sort order, we 
+                // invert here.
+                m_nq->xenquire->set_sort_by_key((QSorter*)m_sorter, 
+                                                !m_sortAscending);
             }
             m_nq->xenquire->set_query(m_nq->xquery);
             m_nq->xmset = Xapian::MSet();
@@ -223,64 +224,64 @@
             d = m_nq->xquery.get_description();
             m_reason.erase();
             break;
-	} catch (const Xapian::DatabaseModifiedError &e) {
+        } catch (const Xapian::DatabaseModifiedError &e) {
             m_reason = e.get_msg();
-	    m_db->m_ndb->xrdb.reopen();
+            m_db->m_ndb->xrdb.reopen();
             continue;
-	} XCATCHERROR(m_reason);
+        } XCATCHERROR(m_reason);
         break;
     }
 
     if (!m_reason.empty()) {
-	LOGDEB("Query::SetQuery: xapian error "  << (m_reason) << "\n" );
-	return false;
-    }
-	
+        LOGDEB("Query::SetQuery: xapian error " << m_reason << "\n");
+        return false;
+    }
+        
     if (d.find("Xapian::Query") == 0)
-	d.erase(0, strlen("Xapian::Query"));
+        d.erase(0, strlen("Xapian::Query"));
 
     sdata->setDescription(d);
     m_sd = sdata;
-    LOGDEB("Query::SetQuery: Q: "  << (sdata->getDescription()) << "\n" );
+    LOGDEB("Query::SetQuery: Q: " << sdata->getDescription() << "\n");
     return true;
 }
 
 bool Query::getQueryTerms(vector<string>& terms)
 {
     if (ISNULL(m_nq))
-	return false;
+        return false;
 
     terms.clear();
     Xapian::TermIterator it;
     string ermsg;
     try {
-	for (it = m_nq->xquery.get_terms_begin(); 
-	     it != m_nq->xquery.get_terms_end(); it++) {
-	    terms.push_back(*it);
-	}
+        for (it = m_nq->xquery.get_terms_begin(); 
+             it != m_nq->xquery.get_terms_end(); it++) {
+            terms.push_back(*it);
+        }
     } XCATCHERROR(ermsg);
     if (!ermsg.empty()) {
-	LOGERR("getQueryTerms: xapian error: "  << (ermsg) << "\n" );
-	return false;
+        LOGERR("getQueryTerms: xapian error: " << ermsg << "\n");
+        return false;
     }
     return true;
 }
 
-int Query::makeDocAbstract(const Doc &doc,
-			   vector<Snippet>& abstract, 
-			   int maxoccs, int ctxwords)
-{
-    LOGDEB("makeDocAbstract: maxoccs "  << (maxoccs) << " ctxwords "  << (ctxwords) << "\n" );
+int Query::makeDocAbstract(const Doc &doc, vector<Snippet>& abstract, 
+                           int maxoccs, int ctxwords)
+{
+    LOGDEB("makeDocAbstract: maxoccs " << maxoccs << " ctxwords "  <<
+           ctxwords << "\n");
     if (!m_db || !m_db->m_ndb || !m_db->m_ndb->m_isopen || !m_nq) {
-	LOGERR("Query::makeDocAbstract: no db or no nq\n" );
-	return ABSRES_ERROR;
+        LOGERR("Query::makeDocAbstract: no db or no nq\n");
+        return ABSRES_ERROR;
     }
     int ret = ABSRES_ERROR;
     XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords),
            m_db->m_ndb->xrdb, m_reason);
     if (!m_reason.empty()) {
-	LOGDEB("makeDocAbstract: makeAbstract error, reason: "  << (m_reason) << "\n" );
-	return ABSRES_ERROR;
+        LOGDEB("makeDocAbstract: makeAbstract: reason: " << m_reason << "\n");
+        return ABSRES_ERROR;
     }
     return ret;
 }
@@ -289,17 +290,17 @@
 {
     vector<Snippet> vpabs;
     if (!makeDocAbstract(doc, vpabs))
-	return false;
+        return false;
     for (vector<Snippet>::const_iterator it = vpabs.begin();
-	 it != vpabs.end(); it++) {
-	string chunk;
-	if (it->page > 0) {
-	    ostringstream ss;
-	    ss << it->page;
-	    chunk += string(" [p ") + ss.str() + "] ";
-	}
-	chunk += it->snippet;
-	abstract.push_back(chunk);
+         it != vpabs.end(); it++) {
+        string chunk;
+        if (it->page > 0) {
+            ostringstream ss;
+            ss << it->page;
+            chunk += string(" [p ") + ss.str() + "] ";
+        }
+        chunk += it->snippet;
+        abstract.push_back(chunk);
     }
     return true;
 }
@@ -308,25 +309,25 @@
 {
     vector<Snippet> vpabs;
     if (!makeDocAbstract(doc, vpabs))
-	return false;
+        return false;
     for (vector<Snippet>::const_iterator it = vpabs.begin(); 
-	 it != vpabs.end(); it++) {
-	abstract.append(it->snippet);
-	abstract.append(cstr_ellipsis);
+         it != vpabs.end(); it++) {
+        abstract.append(it->snippet);
+        abstract.append(cstr_ellipsis);
     }
     return m_reason.empty() ? true : false;
 }
 
 int Query::getFirstMatchPage(const Doc &doc, string& term)
 {
-    LOGDEB1("Db::getFirstMatchPage\n" );;
+    LOGDEB1("Db::getFirstMatchPage\n");;
     if (!m_nq) {
-	LOGERR("Query::getFirstMatchPage: no nq\n" );
-	return false;
+        LOGERR("Query::getFirstMatchPage: no nq\n");
+        return false;
     }
     int pagenum = -1;
     XAPTRY(pagenum = m_nq->getFirstMatchPage(Xapian::docid(doc.xdocid), term),
-	   m_db->m_ndb->xrdb, m_reason);
+           m_db->m_ndb->xrdb, m_reason);
     return m_reason.empty() ? pagenum : -1;
 }
 
@@ -339,11 +340,11 @@
 int Query::getResCnt()
 {
     if (ISNULL(m_nq) || !m_nq->xenquire) {
-	LOGERR("Query::getResCnt: no query opened\n" );
-	return -1;
+        LOGERR("Query::getResCnt: no query opened\n");
+        return -1;
     }
     if (m_resCnt >= 0)
-	return m_resCnt;
+        return m_resCnt;
 
     m_resCnt = -1;
     if (m_nq->xmset.size() <= 0) {
@@ -354,9 +355,9 @@
                m_resCnt = m_nq->xmset.get_matches_lower_bound(),
                m_db->m_ndb->xrdb, m_reason);
 
-        LOGDEB("Query::getResCnt: "  << (m_resCnt) << " "  << (chron.millis()) << " mS\n" );
-	if (!m_reason.empty())
-	    LOGERR("xenquire->get_mset: exception: "  << (m_reason) << "\n" );
+        LOGDEB("Query::getResCnt: "<<m_resCnt<<" "<< chron.millis() << " mS\n");
+        if (!m_reason.empty())
+            LOGERR("xenquire->get_mset: exception: " << m_reason << "\n");
     } else {
         m_resCnt = m_nq->xmset.get_matches_lower_bound();
     }
@@ -373,32 +374,32 @@
 // on subsequent calls is probably only due to disk caching.
 bool Query::getDoc(int xapi, Doc &doc)
 {
-    LOGDEB1("Query::getDoc: xapian enquire index "  << (xapi) << "\n" );
+    LOGDEB1("Query::getDoc: xapian enquire index " << xapi << "\n");
     if (ISNULL(m_nq) || !m_nq->xenquire) {
-	LOGERR("Query::getDoc: no query opened\n" );
-	return false;
+        LOGERR("Query::getDoc: no query opened\n");
+        return false;
     }
 
     int first = m_nq->xmset.get_firstitem();
     int last = first + m_nq->xmset.size() -1;
 
     if (!(xapi >= first && xapi <= last)) {
-	LOGDEB("Fetching for first "  << (xapi) << ", count "  << (qquantum) << "\n" );
-
-	XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,  
-						      (const Xapian::RSet *)0),
+        LOGDEB("Fetching for first " << xapi << ", count " << qquantum << "\n");
+
+        XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,  
+                                                      (const Xapian::RSet *)0),
                m_db->m_ndb->xrdb, m_reason);
 
         if (!m_reason.empty()) {
-            LOGERR("enquire->get_mset: exception: "  << (m_reason) << "\n" );
+            LOGERR("enquire->get_mset: exception: " << m_reason << "\n");
             return false;
-	}
-	if (m_nq->xmset.empty()) {
-            LOGDEB("enquire->get_mset: got empty result\n" );
-	    return false;
-        }
-	first = m_nq->xmset.get_firstitem();
-	last = first + m_nq->xmset.size() -1;
+        }
+        if (m_nq->xmset.empty()) {
+            LOGDEB("enquire->get_mset: got empty result\n");
+            return false;
+        }
+        first = m_nq->xmset.get_firstitem();
+        last = first + m_nq->xmset.size() -1;
     }
 
     Xapian::Document xdoc;
@@ -411,14 +412,15 @@
     for (int xaptries=0; xaptries < 2; xaptries++) {
         try {
             xdoc = m_nq->xmset[xapi-first].get_document();
-	    collapsecount = m_nq->xmset[xapi-first].get_collapse_count();
+            collapsecount = m_nq->xmset[xapi-first].get_collapse_count();
             docid = *(m_nq->xmset[xapi-first]);
             pc = m_nq->xmset.convert_to_percent(m_nq->xmset[xapi-first]);
             data = xdoc.get_data();
             m_reason.erase();
             Chrono chron;
-	    m_db->m_ndb->xdocToUdi(xdoc, udi);
-            LOGDEB2("Query::getDoc: "  << (chron.millis()) << " ms for udi ["  << (udi) << "], collapse count "  << (collapsecount) << "\n" );
+            m_db->m_ndb->xdocToUdi(xdoc, udi);
+            LOGDEB2("Query::getDoc: " << chron.millis() << " ms for udi [" <<
+                    udi << "], collapse count " << collapsecount << "\n");
             break;
         } catch (Xapian::DatabaseModifiedError &error) {
             // retry or end of loop
@@ -429,7 +431,7 @@
         break;
     }
     if (!m_reason.empty()) {
-        LOGERR("Query::getDoc: "  << (m_reason) << "\n" );
+        LOGERR("Query::getDoc: " << m_reason << "\n");
         return false;
     }
     doc.meta[Rcl::Doc::keyudi] = udi;
@@ -437,15 +439,15 @@
     doc.pc = pc;
     char buf[200];
     if (collapsecount > 0) {
-	sprintf(buf,"%3d%% (%d)", pc, collapsecount + 1);
+        sprintf(buf,"%3d%% (%d)", pc, collapsecount + 1);
     } else {
-	sprintf(buf,"%3d%%", pc);
+        sprintf(buf,"%3d%%", pc);
     }
     doc.meta[Doc::keyrr] = buf;
 
     if (collapsecount > 0) {
-	sprintf(buf, "%d", collapsecount);
-	doc.meta[Rcl::Doc::keycc] = buf;
+        sprintf(buf, "%d", collapsecount);
+        doc.meta[Rcl::Doc::keycc] = buf;
     }
 
     // Parse xapian document's data and populate doc fields
@@ -454,42 +456,42 @@
 
 vector<string> Query::expand(const Doc &doc)
 {
-    LOGDEB("Rcl::Query::expand()\n" );
+    LOGDEB("Rcl::Query::expand()\n");
     vector<string> res;
     if (ISNULL(m_nq) || !m_nq->xenquire) {
-	LOGERR("Query::expand: no query opened\n" );
-	return res;
+        LOGERR("Query::expand: no query opened\n");
+        return res;
     }
 
     for (int tries = 0; tries < 2; tries++) {
-	try {
-	    Xapian::RSet rset;
-	    rset.add_document(Xapian::docid(doc.xdocid));
-	    // We don't exclude the original query terms.
-	    Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false);
-	    LOGDEB("ESet terms:\n" );
-	    // We filter out the special terms
-	    for (Xapian::ESetIterator it = eset.begin(); 
-		 it != eset.end(); it++) {
-		LOGDEB(" ["  << ((*it)) << "]\n" );
-		if ((*it).empty() || has_prefix(*it))
-		    continue;
-		res.push_back(*it);
-		if (res.size() >= 10)
-		    break;
-	    }
+        try {
+            Xapian::RSet rset;
+            rset.add_document(Xapian::docid(doc.xdocid));
+            // We don't exclude the original query terms.
+            Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false);
+            LOGDEB("ESet terms:\n");
+            // We filter out the special terms
+            for (Xapian::ESetIterator it = eset.begin(); 
+                 it != eset.end(); it++) {
+                LOGDEB(" [" << (*it) << "]\n");
+                if ((*it).empty() || has_prefix(*it))
+                    continue;
+                res.push_back(*it);
+                if (res.size() >= 10)
+                    break;
+            }
             m_reason.erase();
             break;
-	} catch (const Xapian::DatabaseModifiedError &e) {
+        } catch (const Xapian::DatabaseModifiedError &e) {
             m_reason = e.get_msg();                    
             m_db->m_ndb->xrdb.reopen();
             continue;
-	} XCATCHERROR(m_reason);
-	break;
+        } XCATCHERROR(m_reason);
+        break;
     }
 
     if (!m_reason.empty()) {
-        LOGERR("Query::expand: xapian error "  << (m_reason) << "\n" );
+        LOGERR("Query::expand: xapian error " << m_reason << "\n");
         res.clear();
     }