Switch to side-by-side view

--- a/src/index/recollindex.cpp
+++ b/src/index/recollindex.cpp
@@ -1,6 +1,8 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
+static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.3 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
 #endif
+
+#include <sys/stat.h>
 
 #include <strings.h>
 
@@ -14,24 +16,49 @@
 #include "rcldb.h"
 #include "readfile.h"
 #include "indexer.h"
+#include "csguess.h"
+#include "transcode.h"
 
 using namespace std;
 
 
-Rcl::Doc* textPlainToDoc(RclConfig *conf, const string &fn, 
-			 const string &mtype)
-{
-    return 0;
-}
-
+bool textPlainToDoc(RclConfig *conf, const string &fn, 
+			 const string &mtype, Rcl::Doc &docout)
+{
+    string otext;
+    if (!file_to_string(fn, otext))
+	return false;
+	
+    // Try to guess charset, then convert to utf-8, and fill document fields
+    string charset;
+    if (conf->guesscharset) {
+	charset = csguess(otext, conf->defcharset);
+    } else
+	charset = conf->defcharset;
+    string utf8;
+    if (transcode(otext, charset, utf8, "UTF-8"))
+	return 0;
+
+    Rcl::Doc out;
+    out.origcharset = charset;
+    out.text = utf8;
+    docout = out;
+    return true;
+}
+
+// Map of mime types to internal interner functions. This could just as well 
+// be an if else if suite inside getMimeHandler(), but this is prettier ?
 static map<string, MimeHandlerFunc> ihandlers;
+// Static object to get the map to be initialized at program start.
 class IHandler_Init {
  public:
     IHandler_Init() {
 	ihandlers["text/plain"] = textPlainToDoc;
+	// Add new associations here when needed
     }
 };
 static IHandler_Init ihandleriniter;
+
 
 /**
  * Return handler function for given mime type
@@ -75,6 +102,9 @@
     }
 }
 
+/**
+ * Bunch holder for data used while indexing a directory tree
+ */
 class DirIndexer {
     FsTreeWalker walker;
     RclConfig *config;
@@ -95,23 +125,23 @@
 
 void DirIndexer::index()
 {
-#if 0
     if (!db.open(dbdir, Rcl::Db::DbUpd)) {
 	cerr << "Error opening database in " << dbdir << " for " <<
 	    topdir << endl;
 	return;
     }
-#endif
     walker.walk(topdir, indexfile, this);
-#if 0
     if (!db.close()) {
 	cerr << "Error closing database in " << dbdir << " for " <<
 	    topdir << endl;
 	return;
     }
-#endif
-}
-
+}
+
+/** 
+ * This function gets called for every file and directory found by the
+ * tree walker. Adjust parameters and index files if/when needed.
+ */
 FsTreeWalker::Status 
 indexfile(void *cdata, const std::string &fn, const struct stat *stp, 
 	  FsTreeWalker::CbFlag flg)
@@ -144,24 +174,23 @@
 	return FsTreeWalker::FtwOk;
     }
 
-    // Check if file has already been indexed, and has changed since
-    // - Make path term, 
-    // - query db: postlist_begin->docid
-    // - fetch doc (get_document(docid)
-    // - check date field, maybe skip
+    if (!me->db.needUpdate(fn, stp))
+	return FsTreeWalker::FtwOk;
 
     // Turn file into a document. The document has fields for title, body 
     // etc.,  all text converted to utf8
-    Rcl::Doc *doc = fun(me->config, fn,  mime);
-
-#if 0
+    Rcl::Doc doc;
+    if (!fun(me->config, fn,  mime, doc))
+	return FsTreeWalker::FtwOk;
+
     // Set up xapian document, add postings and misc fields, 
     // add to or update database.
-    dbadd(doc);
-#endif
+    if (!me->db.add(fn, doc))
+	return FsTreeWalker::FtwError;
 
     return FsTreeWalker::FtwOk;
 }
+
 
 
 int main(int argc, const char **argv)
@@ -180,7 +209,7 @@
     }
     vector<string> tdl;
     if (ConfTree::stringToStrings(topdirs, tdl)) {
-	for (int i = 0; i < tdl.size(); i++) {
+	for (unsigned int i = 0; i < tdl.size(); i++) {
 	    string topdir = tdl[i];
 	    cout << topdir << endl;
 	    string dbdir;