recoll / Code / Diff of /src/rcldb/rcldb.cpp

Diff of /src/rcldb/rcldb.cpp [bde991] .. [9661a4]

Switch to side-by-side view

--- a/src/rcldb/rcldb.cpp
+++ b/src/rcldb/rcldb.cpp
@@ -57,6 +57,9 @@
 #include "rclinit.h"
 #include "internfile.h"
 #include "utf8fn.h"
+#ifdef RCL_USE_ASPELL
+#include "rclaspell.h"
+#endif
 
 // Recoll index format version is stored in user metadata. When this change,
 // we can't open the db and will have to reindex.
@@ -731,11 +734,13 @@
 
 Db::~Db()
 {
-    LOGDEB2("Db::~Db\n" );
+    LOGDEB2("Db::~Db\n");
     if (m_ndb == 0)
 	return;
-    LOGDEB("Db::~Db: isopen "  << (m_ndb->m_isopen) << " m_iswritable "  << (m_ndb->m_iswritable) << "\n" );
+    LOGDEB("Db::~Db: isopen " << m_ndb->m_isopen << " m_iswritable " <<
+           m_ndb->m_iswritable << "\n");
     i_close(true);
+    delete m_aspell;
     delete m_config;
 }
 
@@ -1055,9 +1060,11 @@
     // gets added to basepos in addition to the inter-section increment
     // to compute the first position of the next section.
     Xapian::termpos curpos;
-
-    TextSplitDb(Xapian::Document &d, TermProc *prc)
-	: TextSplitP(prc), doc(d), basepos(1), curpos(0)
+    Xapian::WritableDatabase& wdb;
+
+    TextSplitDb(Xapian::WritableDatabase& _wdb, Xapian::Document &d,
+                TermProc *prc)
+	: TextSplitP(prc), doc(d), basepos(1), curpos(0), wdb(_wdb)
     {}
 
     // Reimplement text_to_words to insert the begin and end anchor terms.
@@ -1132,8 +1139,8 @@
                 m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
 
 #ifdef TESTING_XAPIAN_SPELL
-	    if (Db::isSpellingCandidate(term)) {
-		m_ts->db.add_spelling(term);
+	    if (Db::isSpellingCandidate(term, false)) {
+		m_ts->wdb.add_spelling(term);
 	    }
 #endif
 	    // Index the prefixed term.
@@ -1192,30 +1199,80 @@
 };
 
 
+// At the moment, we normally use the Xapian speller for Katakana and
+// aspell for everything else
+bool Db::getSpellingSuggestions(const string& word, vector<string>& suggs)
+{
+    LOGDEB("Db::getSpellingSuggestions:[" << word << "]\n" );
+    suggs.clear();
+    if (nullptr == m_ndb) {
+	return false;
+    }
+
+    string term = word;
+
+    if (isSpellingCandidate(term, true)) {
+        // Term is candidate for aspell processing
+#ifdef RCL_USE_ASPELL
+        bool noaspell = false;
+        m_config->getConfParam("noaspell", &noaspell);
+        if (noaspell) {
+            return false;
+        }
+        if (nullptr == m_aspell) {
+            m_aspell = new Aspell(m_config);
+            if (m_aspell) {
+                string reason;
+                m_aspell->init(reason);
+                if (!m_aspell->ok()) {
+                    LOGDEB(("Aspell speller init failed %s\n", reason.c_str()));
+                    delete m_aspell;
+                    m_aspell = 0;
+                }
+            }
+        }
+
+        if (nullptr == m_aspell) {
+            LOGERR("Db::getSpellingSuggestions: aspell not initialized\n");
+            return false;
+        }
+
+        list<string> asuggs;
+        string reason;
+        if (!m_aspell->suggest(*this, term, asuggs, reason)) {
+            LOGERR("Db::getSpellingSuggestions: aspell failed: " << reason <<
+                   "\n");
+            return false;
+        }
+        suggs = vector<string>(asuggs.begin(), asuggs.end());
+#endif
+    } else {
 #ifdef TESTING_XAPIAN_SPELL
-string Db::getSpellingSuggestion(const string& word)
-{
-    if (m_ndb == 0)
-	return string();
-
-    string term = word;
-
-    if (o_index_stripchars)
-	if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
-	    LOGINFO("Db::getSpelling: unac failed for ["  << (word) << "]\n" );
-	    return string();
-	}
-
-    if (!isSpellingCandidate(term))
-	return string();
-    return m_ndb->xrdb.get_spelling_suggestion(term);
-}
+        // Was not aspell candidate (e.g.: katakana). Maybe use Xapian
+        // speller?
+        if (isSpellingCandidate(term, false)) {
+            if (!o_index_stripchars) {
+                if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
+                    LOGINFO("Db::getSpelling: unac failed for [" << word <<
+                            "]\n");
+                    return false;
+                }
+            }
+            string sugg = m_ndb->xrdb.get_spelling_suggestion(term);
+            if (!sugg.empty()) {
+                suggs.push_back(sugg);
+            }
+        }
 #endif
+    }
+    return true;
+}
 
 // Let our user set the parameters for abstract processing
 void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
 {
-    LOGDEB1("Db::setAbstractParams: trunc "  << (idxtrunc) << " syntlen "  << (syntlen) << " ctxlen "  << (syntctxlen) << "\n" );
+    LOGDEB1("Db::setAbstractParams: trunc " << idxtrunc << " syntlen " <<
+            syntlen << " ctxlen " << syntctxlen << "\n");
     if (idxtrunc >= 0)
 	m_idxAbsTruncLen = idxtrunc;
     if (syntlen > 0)
@@ -1238,7 +1295,7 @@
 // metadata), and update database
 bool Db::addOrUpdate(const string &udi, const string &parent_udi, Doc &doc)
 {
-    LOGDEB("Db::add: udi ["  << (udi) << "] parent ["  << (parent_udi) << "]\n" );
+    LOGDEB("Db::add: udi [" << udi << "] parent [" << parent_udi << "]\n");
     if (m_ndb == 0)
 	return false;
 
@@ -1259,7 +1316,7 @@
     if (o_index_stripchars)
 	nxt = &tpprep;
 
-    TextSplitDb splitter(newdocument, nxt);
+    TextSplitDb splitter(m_ndb->xwdb, newdocument, nxt);
     tpidx.setTSD(&splitter);
 
     // Udi unique term: this is used for file existence/uptodate