Switch to side-by-side view

--- a/src/index/indexer.cpp
+++ b/src/index/indexer.cpp
@@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: indexer.cpp,v 1.13 2005-11-05 14:40:50 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: indexer.cpp,v 1.14 2005-11-10 08:47:49 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
 #include <stdio.h>
 #include <sys/stat.h>
@@ -15,7 +15,6 @@
 #include "conftree.h"
 #include "rclconfig.h"
 #include "fstreewalk.h"
-#include "mimetype.h"
 #include "rcldb.h"
 #include "readfile.h"
 #include "indexer.h"
@@ -32,11 +31,12 @@
 #define deleteZ(X) {delete X;X = 0;}
 #endif
 
-/**
- * Bunch holder for data used while indexing a directory tree. This also the
- * tree walker callback object (the processone method gets called for every 
- * file or directory).
- */
+/// A class to index a list of top directories into one database. 
+///
+/// Inherits FsTreeWalkerCB so that its processone() method is
+/// called by the file-system tree walk code for each file and
+/// directory, and keeps all state used while indexing a
+/// directory tree. 
 class DbIndexer : public FsTreeWalkerCB {
     FsTreeWalker walker;
     RclConfig *config;
@@ -45,9 +45,9 @@
     Rcl::Db db;
     string tmpdir;
  public:
+    /// Constructor does nothing but store parameters
     DbIndexer(RclConfig *cnf, const string &dbd, list<string> *top) 
-	: config(cnf), dbdir(dbd), topdirs(top)
-    { }
+	: config(cnf), dbdir(dbd), topdirs(top) {}
 
     virtual ~DbIndexer() {
 	// Maybe clean up temporary directory
@@ -60,19 +60,21 @@
 	}
     }
 
+    /// Start indexing.
+    bool index();
+
+    /// Tree walker callback method
     FsTreeWalker::Status 
     processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
-
-    // The top level entry point. 
-    bool index();
 };
 
 
-// Top level file system tree index method for updating a given database.
-//
-// We create the temporary directory, open the database, then call a
-// file system walk for each top-level directory.
-// When walking is done, we create the stem databases and close the main db.
+/// Top level file system tree index method for updating a given database.
+///
+/// We create the temporary directory, open the database, then call a
+/// file system walk for each top-level directory.
+/// When walking is done, we create the stem databases and close the
+/// main db.
 bool DbIndexer::index()
 {
     string tdir;
@@ -90,9 +92,13 @@
 	 it != topdirs->end(); it++) {
 	LOGDEB(("DbIndexer::index: Indexing %s into %s\n", it->c_str(), 
 		dbdir.c_str()));
+
+	// Set the current directory in config so that subsequent
+	// getConfParams() will get local values
 	config->setKeyDir(*it);
 
-	// Set up skipped patterns for this subtree
+	// Set up skipped patterns for this subtree. This probably should be
+	// done in the directory change code in processone() instead.
 	{
 	    walker.clearSkippedNames();
 	    string skipped; 
@@ -106,6 +112,7 @@
 	    }
 	}
 
+	// Walk the directory tree
 	if (walker.walk(*it, *this) != FsTreeWalker::FtwOk) {
 	    LOGERR(("DbIndexer::index: error while indexing %s\n", 
 		    it->c_str()));
@@ -113,6 +120,9 @@
 	    return false;
 	}
     }
+
+    // Get rid of all database entries that don't exist in the
+    // filesystem anymore.
     db.purge();
 
     // Create stemming databases
@@ -135,22 +145,23 @@
 }
 
 
-/** 
- * This function gets called for every file and directory found by the
- * tree walker. It checks with the db if the file has changed and needs to
- * be reindexed. If so, it calls internfile() which will identify the
- * file type and call an appropriate handler to create documents in
- * internal form, which we then add to the database.
- *
- * Accent and majuscule handling are performed by the db module when doing
- * the actual indexing work. The Rcl::Doc created by internfile()
-   contains pretty raw utf8 data.
- */
+/// This method gets called for every file and directory found by the
+/// tree walker. 
+///
+/// It checks with the db if the file has changed and needs to be
+/// reindexed. If so, it calls internfile() which will identify the
+/// file type and call an appropriate handler to convert the document into
+/// internal format, which we then add to the database.
+///
+/// Accent and majuscule handling are performed by the db module when doing
+/// the actual indexing work. The Rcl::Doc created by internfile()
+/// contains pretty raw utf8 data.
 FsTreeWalker::Status 
 DbIndexer::processone(const std::string &fn, const struct stat *stp, 
-		   FsTreeWalker::CbFlag flg)
-{
-    // If we're changing directories, possibly adjust parameters.
+		      FsTreeWalker::CbFlag flg)
+{
+    // If we're changing directories, possibly adjust parameters (set
+    // the current directory in configuration object)
     if (flg == FsTreeWalker::FtwDirEnter || 
 	flg == FsTreeWalker::FtwDirReturn) {
 	config->setKeyDir(fn);
@@ -189,9 +200,13 @@
     return FsTreeWalker::FtwOk;
 }
 
-ConfIndexer::~ConfIndexer() 
-{
-    deleteZ(indexer);
+////////////////////////////////////////////////////////////////////////////
+// ConIndexer methods: ConfIndexer is the top-level object, that can index
+// multiple directories to multiple databases.
+
+ConfIndexer::~ConfIndexer()
+{
+     deleteZ(dbindexer);
 }
 
 bool ConfIndexer::index()
@@ -245,12 +260,12 @@
 	//}
 	//cout << endl;
 
-	indexer = new DbIndexer(config, dbit->first, &dbit->second);
-	if (!indexer->index()) {
-	    deleteZ(indexer);
+	dbindexer = new DbIndexer(config, dbit->first, &dbit->second);
+	if (!dbindexer->index()) {
+	    deleteZ(dbindexer);
 	    return false;
 	}
-	deleteZ(indexer);
+	deleteZ(dbindexer);
     }
     return true;
 }