Switch to side-by-side view

--- a/src/rcldb/stemdb.cpp
+++ b/src/rcldb/stemdb.cpp
@@ -27,12 +27,16 @@
 #include <xapian.h>
 
 #include "stemdb.h"
-#include "wipedir.h"
 #include "pathut.h"
 #include "debuglog.h"
 #include "smallut.h"
 #include "utf8iter.h"
 #include "textsplit.h"
+#include "rcldb.h"
+#include "rcldb_p.h"
+#include "synfamily.h"
+
+#include <iostream>
 
 using namespace std;
 
@@ -40,46 +44,19 @@
 namespace StemDb {
 
 
-static const string cstr_stemdirstem = "stem_";
-
-/// Compute name of stem db for given base database and language
-static string stemdbname(const string& dbdir, const string& lang)
-{
-    return path_cat(dbdir, cstr_stemdirstem + lang);
-}
-
-vector<string> getLangs(const string& dbdir)
-{
-    string pattern = cstr_stemdirstem + "*";
-    vector<string> dirs = path_dirglob(dbdir, pattern);
-    for (vector<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
-	*it = path_basename(*it);
-	*it = it->substr(cstr_stemdirstem.length(), string::npos);
-    }
-    return dirs;
-}
-
-bool deleteDb(const string& dbdir, const string& lang)
-{
-    string dir = stemdbname(dbdir, lang);
-    if (wipedir(dir) == 0 && rmdir(dir.c_str()) == 0)
-	return true;
-    return false;
-}
-
-// Autoclean/delete directory 
-class DirWiper {
- public:
-    string dir;
-    bool do_it;
-    DirWiper(string d) : dir(d), do_it(true) {}
-    ~DirWiper() {
-	if (do_it) {
-	    wipedir(dir);
-	    rmdir(dir.c_str());
-	}
-    }
-};
+vector<string> getLangs(Xapian::Database& xdb)
+{
+    XapSynFamily fam(xdb, synprefStem);
+    vector<string> langs;
+    (void)fam.getMembers(langs);
+    return langs;
+}
+
+bool deleteDb(Xapian::WritableDatabase& xdb, const string& lang)
+{
+    XapWritableSynFamily fam(xdb, synprefStem);
+    return fam.deleteMember(lang);
+}
 
 inline static bool
 p_notlowerascii(unsigned int c)
@@ -88,30 +65,6 @@
 	return true;
     return false;
 }
-
-static bool addAssoc(Xapian::WritableDatabase &sdb, const string& stem,
-                     const vector<string>& derivs)
-{
-    Xapian::Document newdocument;
-    newdocument.add_term(stem);
-    // The doc data is just parents=blank-separated-list
-    string record = "parents=";
-    for (vector<string>::const_iterator it = derivs.begin(); 
-         it != derivs.end(); it++) {
-        record += *it + " ";
-    }
-    record += "\n";
-    LOGDEB2(("createStemDb: stmdoc data: [%s]\n", record.c_str()));
-    newdocument.set_data(record);
-    try {
-        sdb.replace_document(stem, newdocument);
-    } catch (...) {
-        LOGERR(("Db::createstemdb(addAssoc): replace failed\n"));
-        return false;
-    }
-    return true;
-}
-
 
 /**
  * Create database of stem to parents associations for a given language.
@@ -119,7 +72,7 @@
  * with documents indexed by a single term (the stem), and with the list of
  * parent terms in the document data.
  */
-bool createDb(Xapian::Database& xdb, const string& dbdir, const string& lang)
+bool createDb(Xapian::WritableDatabase& xdb, const string& lang)
 {
     LOGDEB(("StemDb::createDb(%s)\n", lang.c_str()));
     Chrono cron;
@@ -136,6 +89,7 @@
     int nostem=0; // Dont even try: not-alphanum (incomplete for now)
     int stemconst=0; // Stem == term
     int stemmultiple = 0; // Count of stems with multiple derivatives
+    string ermsg;
     try {
         Xapian::Stem stemmer(lang);
         Xapian::TermIterator it;
@@ -174,43 +128,18 @@
             }
             assocs[stem].push_back(*it);
         }
-    } catch (const Xapian::Error &e) {
-        LOGERR(("Db::createStemDb: build failed: %s\n", e.get_msg().c_str()));
+    } XCATCHERROR(ermsg);
+    if (!ermsg.empty()) {
+        LOGERR(("Db::createStemDb: map build failed: %s\n", ermsg.c_str()));
         return false;
-    } catch (...) {
-        LOGERR(("Db::createStemDb: build failed: no stemmer for %s ? \n", 
-                lang.c_str()));
-        return false;
-    }
+    }
+
     LOGDEB1(("StemDb::createDb(%s): in memory map built: %.2f S\n", 
              lang.c_str(), cron.secs()));
 
-    // Create xapian database for stem relations
-    string stemdbdir = stemdbname(dbdir, lang);
-    // We want to get rid of the db dir in case of error. This gets disarmed
-    // just before success return.
-    DirWiper wiper(stemdbdir);
-    string ermsg;
-    Xapian::WritableDatabase sdb;
-    try {
-        sdb = Xapian::WritableDatabase(stemdbdir, 
-                                       Xapian::DB_CREATE_OR_OVERWRITE);
-    } catch (const Xapian::Error &e) {
-        ermsg = e.get_msg();
-    } catch (const string &s) {
-        ermsg = s;
-    } catch (const char *s) {
-        ermsg = s;
-    } catch (...) {
-        ermsg = "Caught unknown exception";
-    }
-    if (!ermsg.empty()) {
-        LOGERR(("Db::createstemdb: exception while opening [%s]: %s\n", 
-                stemdbdir.c_str(), ermsg.c_str()));
-        return false;
-    }
-
-    // Enter pseud-docs in db by walking the map.
+    XapWritableSynFamily fam(xdb, synprefStem);
+    fam.createMember(lang);
+
     for (map<string, vector<string> >::const_iterator it = assocs.begin();
          it != assocs.end(); it++) {
 	LOGDEB2(("createStemDb: stem [%s]\n", it->first.c_str()));
@@ -219,8 +148,7 @@
 	// even if it doesnt exist as a term
 	if (it->second.size() > 1)
 	    ++stemmultiple;
-                    
-	if (!addAssoc(sdb, it->first, it->second)) {
+	if (!fam.addSynonyms(lang, it->first, it->second)) {
 	    return false;
 	}
     }
@@ -229,7 +157,7 @@
              lang.c_str(), cron.secs()));
     LOGDEB(("Stem map size: %d mult %d const %d no %d \n", 
 	    assocs.size(), stemmultiple, stemconst, nostem));
-    wiper.do_it = false;
+    fam.listMap(lang);
     return true;
 }
 
@@ -247,7 +175,7 @@
  * Expand term to list of all terms which stem to the same term, for one
  * expansion language
  */
-static bool stemExpandOne(const std::string& dbdir, 
+static bool stemExpandOne(Xapian::Database& xdb, 
 			  const std::string& lang,
 			  const std::string& term,
 			  vector<string>& result)
@@ -258,37 +186,9 @@
 	LOGDEB(("stemExpand:%s: [%s] stem-> [%s]\n", 
                 lang.c_str(), term.c_str(), stem.c_str()));
 
-	// Open stem database
-	string stemdbdir = stemdbname(dbdir, lang);
-	Xapian::Database sdb(stemdbdir);
-	LOGDEB0(("stemExpand: %s lastdocid: %d\n", 
-		stemdbdir.c_str(), sdb.get_lastdocid()));
-
-	// Try to fetch the doc from the stem db
-	if (!sdb.term_exists(stem)) {
-	    LOGDEB0(("Db::stemExpand: no term for %s\n", stem.c_str()));
-	} else {
-	    Xapian::PostingIterator did = sdb.postlist_begin(stem);
-	    if (did == sdb.postlist_end(stem)) {
-		LOGDEB0(("stemExpand: no term(1) for %s\n",stem.c_str()));
-	    } else {
-		Xapian::Document doc = sdb.get_document(*did);
-		string data = doc.get_data();
-
-		// Build expansion list from database data No need for
-		// a conftree, but we need to massage the data a
-		// little
-		string::size_type pos = data.find('=');
-		string::size_type pos1 = data.rfind('\n');
-		if (pos == string::npos || pos1 == string::npos || 
-		    pos1 <= pos+1) {
-		    LOGERR(("stemExpand: bad data in db: [%s]\n", 
-			    data.c_str()));
-		} else {
-		    ++pos;
-		    stringToStrings(data.substr(pos, pos1-pos), result);
-		}
-	    }
+	XapSynFamily fam(xdb, synprefStem);
+	if (!fam.synExpand(lang, stem, result)) {
+	    // ?
 	}
 
 	// If the user term or stem are not in the list, add them
@@ -302,8 +202,8 @@
 		 stringlistdisp(result).c_str()));
 
     } catch (...) {
-	LOGERR(("stemExpand: error accessing stem db. dbdir [%s] lang [%s]\n",
-		dbdir.c_str(), lang.c_str()));
+	LOGERR(("stemExpand: error accessing stem db. lang [%s]\n",
+		lang.c_str()));
 	result.push_back(term);
 	return false;
     }
@@ -315,18 +215,17 @@
  * Expand term to list of all terms which stem to the same term, add the
  * expansion sets for possibly multiple expansion languages
  */
-bool stemExpand(const std::string& dbdir, 
+bool stemExpand(Xapian::Database& xdb,
 		const std::string& langs,
 		const std::string& term,
 		vector<string>& result)
 {
-
     vector<string> llangs;
     stringToStrings(langs, llangs);
     for (vector<string>::const_iterator it = llangs.begin();
 	 it != llangs.end(); it++) {
 	vector<string> oneexp;
-	stemExpandOne(dbdir, *it, term, oneexp);
+	stemExpandOne(xdb, *it, term, oneexp);
 	result.insert(result.end(), oneexp.begin(), oneexp.end());
     }
     sort(result.begin(), result.end());