Switch to side-by-side view

--- a/src/rcldb/stemdb.cpp
+++ b/src/rcldb/stemdb.cpp
@@ -74,6 +74,30 @@
 	return true;
     return false;
 }
+
+static bool addAssoc(Xapian::WritableDatabase &sdb, const string& stem,
+                     const list<string>& derivs)
+{
+    Xapian::Document newdocument;
+    newdocument.add_term(stem);
+    // The doc data is just parents=blank-separated-list
+    string record = "parents=";
+    for (list<string>::const_iterator it = derivs.begin(); 
+         it != derivs.end(); it++) {
+        record += *it + " ";
+    }
+    record += "\n";
+    LOGDEB2(("createStemDb: stmdoc data: [%s]\n", record.c_str()));
+    newdocument.set_data(record);
+    try {
+        sdb.replace_document(stem, newdocument);
+    } catch (...) {
+        LOGERR(("Db::createstemdb(addAssoc): replace failed\n"));
+        return false;
+    }
+    return true;
+}
+
 
 /**
  * Create database of stem to parents associations for a given language.
@@ -100,41 +124,41 @@
     int stemdiff=0;  // Count of all different stems
     int stemmultiple = 0; // Count of stems with multiple derivatives
     try {
-	Xapian::Stem stemmer(lang);
-	Xapian::TermIterator it;
-	for (it = xdb.allterms_begin(); 
-	     it != xdb.allterms_end(); it++) {
-	    // Deciding if we try to stem the term. If it has any
-	    // non-lowercase 7bit char (that is, numbers, capitals and
-	    // punctuation) dont. We're still sending all multibyte
-	    // utf-8 chars to the stemmer, which is not too well
-	    // defined for xapian < 1.0, but seems to work anyway. We don't
-	    // try to look for multibyte non alphabetic data.
-	    string::iterator sit = (*it).begin(), eit = sit + (*it).length();
-	    if ((sit = find_if(sit, eit, p_notlowerascii)) != eit) {
-		++nostem;
-		// LOGDEB(("stemskipped: [%s], because of 0x%x\n", 
-		// (*it).c_str(), *sit));
-		continue;
-	    }
-	    string stem = stemmer(*it);
-	    //cerr << "word " << *it << " stem " << stem << endl;
-	    if (stem == *it) {
-		++stemconst;
-		continue;
-	    }
-	    assocs.insert(pair<string,string>(stem, *it));
-	}
+        Xapian::Stem stemmer(lang);
+        Xapian::TermIterator it;
+        for (it = xdb.allterms_begin(); it != xdb.allterms_end(); it++) {
+            // Deciding if we try to stem the term. If it has any
+            // non-lowercase 7bit char (that is, numbers, capitals and
+            // punctuation) dont. We're still sending all multibyte
+            // utf-8 chars to the stemmer, which is not too well
+            // defined for xapian < 1.0, but seems to work anyway. We don't
+            // try to look for multibyte non alphabetic data.
+            string::iterator sit = (*it).begin(), eit = sit + (*it).length();
+            if ((sit = find_if(sit, eit, p_notlowerascii)) != eit) {
+                ++nostem;
+                LOGDEB1(("stemskipped: [%s], because of 0x%x\n", 
+                         (*it).c_str(), *sit));
+                continue;
+            }
+            string stem = stemmer(*it);
+            LOGDEB2(("Db::createStemDb: word [%s], stem [%s]\n", (*it).c_str(),
+                     stem.c_str()));
+            if (stem == *it) {
+                ++stemconst;
+                continue;
+            }
+            assocs.insert(pair<string,string>(stem, *it));
+        }
     } catch (const Xapian::Error &e) {
-	LOGERR(("Db::createStemDb: build failed: %s\n", e.get_msg().c_str()));
-	return false;
+        LOGERR(("Db::createStemDb: build failed: %s\n", e.get_msg().c_str()));
+        return false;
     } catch (...) {
-	LOGERR(("Db::createStemDb: build failed: no stemmer for %s ? \n", 
-		lang.c_str()));
-	return false;
+        LOGERR(("Db::createStemDb: build failed: no stemmer for %s ? \n", 
+                lang.c_str()));
+        return false;
     }
     LOGDEB1(("StemDb::createDb(%s): in memory map built: %.2f S\n", 
-	    lang.c_str(), cron.secs()));
+             lang.c_str(), cron.secs()));
 
     // Create xapian database for stem relations
     string stemdbdir = stemdbname(dbdir, lang);
@@ -144,69 +168,66 @@
     string ermsg;
     Xapian::WritableDatabase sdb;
     try {
-	sdb = Xapian::WritableDatabase(stemdbdir, 
-				       Xapian::DB_CREATE_OR_OVERWRITE);
+        sdb = Xapian::WritableDatabase(stemdbdir, 
+                                       Xapian::DB_CREATE_OR_OVERWRITE);
     } catch (const Xapian::Error &e) {
-	ermsg = e.get_msg();
+        ermsg = e.get_msg();
     } catch (const string &s) {
-	ermsg = s;
+        ermsg = s;
     } catch (const char *s) {
-	ermsg = s;
+        ermsg = s;
     } catch (...) {
-	ermsg = "Caught unknown exception";
+        ermsg = "Caught unknown exception";
     }
     if (!ermsg.empty()) {
-	LOGERR(("Db::createstemdb: exception while opening [%s]: %s\n", 
-		stemdbdir.c_str(), ermsg.c_str()));
-	return false;
-    }
-
-    // Enter pseud-docs in db. Walk the multimap, only enter
-    // associations where there are several parent terms
+        LOGERR(("Db::createstemdb: exception while opening [%s]: %s\n", 
+                stemdbdir.c_str(), ermsg.c_str()));
+        return false;
+    }
+
+    // Enter pseud-docs in db by walking the multimap.
     string stem;
     list<string> derivs;
     for (multimap<string,string>::const_iterator it = assocs.begin();
-	 it != assocs.end(); it++) {
-	if (stem == it->first) {
-	    // Staying with same stem
-	    derivs.push_back(it->second);
-	    // cerr << " " << it->second << endl;
-	} else {
-	    // Changing stems 
-	    ++stemdiff;
-	    // We need an entry even if there is only one derivative
-	    // so that it is possible to search by entering the stem
-	    // even if it doesnt exist as a term
-	    if (derivs.size() >= 1) {
-		// Previous stem has multiple derivatives. Enter in db
-		++stemmultiple;
-		Xapian::Document newdocument;
-		newdocument.add_term(stem);
-		// The doc data is just parents=blank-separated-list
-		string record = "parents=";
-		for (list<string>::const_iterator it = derivs.begin(); 
-		     it != derivs.end(); it++) {
-		    record += *it + " ";
-		}
-		record += "\n";
-		LOGDEB1(("stemdocument data: %s\n", record.c_str()));
-		newdocument.set_data(record);
-		try {
-		    sdb.replace_document(stem, newdocument);
-		    //sdb.add_document(newdocument);
-		} catch (...) {
-		    LOGERR(("Db::createstemdb: replace failed\n"));
-		    return false;
-		}
-	    }
-	    derivs.clear();
-	    stem = it->first;
-	    derivs.push_back(it->second);
-	    //	    cerr << "\n" << stem << " " << it->second;
-	}
-    }
+         it != assocs.end(); it++) {
+        if (stem == it->first) {
+            // Staying with same stem
+            derivs.push_back(it->second);
+            // cerr << " " << it->second << endl;
+        } else {
+            // Changing stems 
+            ++stemdiff;
+            LOGDEB2(("createStemDb: stem [%s]\n", stem.c_str()));
+
+            // We need an entry even if there is only one derivative
+            // so that it is possible to search by entering the stem
+            // even if it doesnt exist as a term
+            if (!derivs.empty()) {
+
+                if (derivs.size() > 1)
+                    ++stemmultiple;
+                    
+                if (!addAssoc(sdb, stem, derivs)) {
+                    return false;
+                }
+                derivs.clear();
+            }
+            stem = it->first;
+            derivs.push_back(it->second);
+            //	    cerr << "\n" << stem << " " << it->second;
+        }
+    }
+    if (!derivs.empty()) {
+        if (derivs.size() > 1)
+            ++stemmultiple;
+                    
+        if (!addAssoc(sdb, stem, derivs)) {
+            return false;
+        }
+    }
+
     LOGDEB1(("StemDb::createDb(%s): done: %.2f S\n", 
-	    lang.c_str(), cron.secs()));
+             lang.c_str(), cron.secs()));
     LOGDEB(("Stem map size: %d stems %d mult %d no %d const %d\n", 
 	    assocs.size(), stemdiff, stemmultiple, nostem, stemconst));
     wiper.do_it = false;