Switch to side-by-side view

--- a/src/python/recoll/pyrecoll.cpp
+++ b/src/python/recoll/pyrecoll.cpp
@@ -1,5 +1,5 @@
 #ifndef lint
-static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.5 2008-07-01 08:24:30 dockes Exp $ (C) 2007 J.F.Dockes";
+static char rcsid[] = "@(#$Id: pyrecoll.cpp,v 1.6 2008-08-26 07:36:41 dockes Exp $ (C) 2007 J.F.Dockes";
 #endif
 
 #include <Python.h>
@@ -35,6 +35,8 @@
     return result;
 }
 
+//////////////////////////////////////////////////////
+////// Python object definitions for Db, Query, and Doc
 typedef struct {
     PyObject_HEAD
     /* Type-specific fields go here. */
@@ -46,7 +48,7 @@
     "recollq.Db",             /*tp_name*/
     sizeof(recollq_DbObject), /*tp_basicsize*/
     0,                         /*tp_itemsize*/
-    0,    /*tp_dealloc*/
+    0,                         /*tp_dealloc*/
     0,                         /*tp_print*/
     0,                         /*tp_getattr*/
     0,                         /*tp_setattr*/
@@ -62,25 +64,26 @@
     0,                         /*tp_setattro*/
     0,                         /*tp_as_buffer*/
     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,        /*tp_flags*/
-    "Recollq Db objects",           /* tp_doc */
+    "Recollq Db objects",      /* tp_doc */
     0,		               /* tp_traverse */
     0,		               /* tp_clear */
     0,		               /* tp_richcompare */
     0,		               /* tp_weaklistoffset */
     0,		               /* tp_iter */
     0,		               /* tp_iternext */
-    0,             /* tp_methods */
-    0,             /* tp_members */
+    0,                         /* tp_methods */
+    0,                         /* tp_members */
     0,                         /* tp_getset */
     0,                         /* tp_base */
     0,                         /* tp_dict */
     0,                         /* tp_descr_get */
     0,                         /* tp_descr_set */
     0,                         /* tp_dictoffset */
-    0,      /* tp_init */
+    0,                         /* tp_init */
     0,                         /* tp_alloc */
-    0,                 /* tp_new */
+    0,                         /* tp_new */
 };
+
 
 typedef struct {
     PyObject_HEAD
@@ -111,24 +114,24 @@
     0,                         /*tp_setattro*/
     0,                         /*tp_as_buffer*/
     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,        /*tp_flags*/
-    "Recollq Query objects",           /* tp_doc */
+    "Recollq Query object",    /* tp_doc */
     0,		               /* tp_traverse */
     0,		               /* tp_clear */
     0,		               /* tp_richcompare */
     0,		               /* tp_weaklistoffset */
     0,		               /* tp_iter */
     0,		               /* tp_iternext */
-    0,             /* tp_methods */
-    0,             /* tp_members */
+    0,                         /* tp_methods */
+    0,                         /* tp_members */
     0,                         /* tp_getset */
     0,                         /* tp_base */
     0,                         /* tp_dict */
     0,                         /* tp_descr_get */
     0,                         /* tp_descr_set */
     0,                         /* tp_dictoffset */
-    0,      /* tp_init */
+    0,                         /* tp_init */
     0,                         /* tp_alloc */
-    0,                 /* tp_new */
+    0,                         /* tp_new */
 };
 typedef struct {
     PyObject_HEAD
@@ -158,26 +161,28 @@
     0,                         /*tp_setattro*/
     0,                         /*tp_as_buffer*/
     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,        /*tp_flags*/
-    "Recollq Doc objects",           /* tp_doc */
+    "Recollq Doc objects",     /* tp_doc */
     0,		               /* tp_traverse */
     0,		               /* tp_clear */
     0,		               /* tp_richcompare */
     0,		               /* tp_weaklistoffset */
     0,		               /* tp_iter */
     0,		               /* tp_iternext */
-    0,             /* tp_methods */
-    0,             /* tp_members */
+    0,                         /* tp_methods */
+    0,                         /* tp_members */
     0,                         /* tp_getset */
     0,                         /* tp_base */
     0,                         /* tp_dict */
     0,                         /* tp_descr_get */
     0,                         /* tp_descr_set */
     0,                         /* tp_dictoffset */
-    0,      /* tp_init */
+    0,                         /* tp_init */
     0,                         /* tp_alloc */
-    0,                 /* tp_new */
+    0,                         /* tp_new */
 };
 
+///////////////////////////////////////////////
+////// Db object code
 static void 
 Db_dealloc(recollq_DbObject *self)
 {
@@ -206,12 +211,13 @@
 Db_init(recollq_DbObject *self, PyObject *args, PyObject *kwargs)
 {
     LOGDEB(("Db_init\n"));
-    static char *kwlist[] = {"confdir", "extra_dbs", NULL};
+    static char *kwlist[] = {"confdir", "extra_dbs", "writable", NULL};
     PyObject *extradbs = 0;
     char *confdir = 0;
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|sO", kwlist,
-				     &confdir, &extradbs))
+    int writable = 0;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|sOi", kwlist,
+				     &confdir, &extradbs, &writable))
 	return -1;
 
     // If the user creates several dbs, changing the confdir, we call 
@@ -239,9 +245,10 @@
     self->db = new Rcl::Db;
     string dbdir = rclconfig->getDbDir();
     LOGDEB(("Db_init: getdbdir ok: [%s]\n", dbdir.c_str()));
-    if (!self->db->open(dbdir, rclconfig->getStopfile(), Rcl::Db::DbRO)) {
+    if (!self->db->open(dbdir, rclconfig->getStopfile(), writable ? 
+			Rcl::Db::DbUpd : Rcl::Db::DbRO)) {
 	LOGDEB(("Db_init: db open error\n"));
-	PyErr_SetString(PyExc_EnvironmentError, "Cant open index");
+	PyErr_SetString(PyExc_EnvironmentError, "Can't open index");
         return -1;
     }
 
@@ -355,20 +362,85 @@
 				     "UTF-8", "replace");
 }
 
+static PyObject *
+Db_needUpdate(recollq_DbObject* self, PyObject *args, PyObject *kwds)
+{
+    char *udi = 0;
+    char *sig = 0;
+    LOGDEB(("Db_needUpdate\n"));
+    if (!PyArg_ParseTuple(args, "eses:Db_needUpdate", 
+			  "utf-8", &udi, "utf-8", &sig)) {
+	return 0;
+    }
+    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
+	LOGERR(("Db_makeDocAbstract: db not found %p\n", self->db));
+        PyErr_SetString(PyExc_AttributeError, "db");
+        return 0;
+    }
+    bool result = self->db->needUpdate(udi, sig);
+    PyMem_Free(udi);
+    PyMem_Free(sig);
+    return Py_BuildValue("i", result);
+}
+
+static PyObject *
+Db_addOrUpdate(recollq_DbObject* self, PyObject *args, PyObject *)
+{
+    LOGDEB(("Db_addOrUpdate\n"));
+    char *udi = 0;
+    char *parent_udi = 0;
+
+    recollq_DocObject *pydoc;
+
+    if (!PyArg_ParseTuple(args, "esesO!:Db_makeDocAbstract",
+			  "utf-8", &udi, "utf-8", &parent_udi, 
+			  &recollq_DocType, &pydoc)) {
+	return 0;
+    }
+    if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
+	LOGERR(("Db_addOrUpdate: db not found %p\n", self->db));
+        PyErr_SetString(PyExc_AttributeError, "db");
+        return 0;
+    }
+    if (pydoc->doc == 0 || the_docs.find(pydoc->doc) == the_docs.end()) {
+	LOGERR(("Db_addOrUpdate: doc not found %p\n", pydoc->doc));
+        PyErr_SetString(PyExc_AttributeError, "doc");
+        return 0;
+    }
+    if (!self->db->addOrUpdate(udi, parent_udi, *pydoc->doc)) {
+	LOGERR(("Db_addOrUpdate: rcldb error\n"));
+        PyErr_SetString(PyExc_AttributeError, "rcldb error");
+	PyMem_Free(udi);
+	PyMem_Free(parent_udi);
+        return 0;
+    }
+    PyMem_Free(udi);
+    PyMem_Free(parent_udi);
+    Py_RETURN_NONE;
+}
+    
 static PyMethodDef Db_methods[] = {
     {"query", (PyCFunction)Db_query, METH_NOARGS,
      "Return a new, blank query for this index"
     },
     {"setAbstractParams", (PyCFunction)Db_setAbstractParams, 
      METH_VARARGS|METH_KEYWORDS,
-     "Set abstract build params: maxchars and contextwords"
+     "Set abstract build parameters: maxchars and contextwords"
     },
     {"makeDocAbstract", (PyCFunction)Db_makeDocAbstract, METH_VARARGS,
-     "Return a new, blank query for this index"
+     "Build keyword in context abstract for document and query"
+    },
+    {"needUpdate", (PyCFunction)Db_needUpdate, METH_VARARGS,
+     "Check index up to date"
+    },
+    {"addOrUpdate", (PyCFunction)Db_addOrUpdate, METH_VARARGS,
+     "Add or update document in index"
     },
     {NULL}  /* Sentinel */
 };
 
+/////////////////////////////////////////////
+/// Query object method
 static void 
 Query_dealloc(recollq_QueryObject *self)
 {
@@ -394,6 +466,9 @@
     return (PyObject *)self;
 }
 
+// Query_init creates an unusable object. The only way to create a
+// valid Query Object is through db_query(). (or we'd need to add a Db
+// parameter to the Query object creation method)
 static int
 Query_init(recollq_QueryObject *self, PyObject *, PyObject *)
 {
@@ -411,9 +486,8 @@
 Query_execute(recollq_QueryObject* self, PyObject *args, PyObject *kwds)
 {
     char *utf8 = 0;
-    int len = 0;
     LOGDEB(("Query_execute\n"));
-    if (!PyArg_ParseTuple(args, "es#:Query_execute", "utf-8", &utf8, &len)) {
+    if (!PyArg_ParseTuple(args, "es:Query_execute", "utf-8", &utf8)) {
 	return 0;
     }
 
@@ -425,6 +499,7 @@
     }
     string reason;
     Rcl::SearchData *sd = wasaStringToRcl(utf8, reason);
+    PyMem_Free(utf8);
     if (!sd) {
 	PyErr_SetString(PyExc_ValueError, reason.c_str());
 	return 0;
@@ -451,24 +526,22 @@
         PyErr_SetString(PyExc_AttributeError, "query: no results");
 	return 0;
     }
-    Rcl::Doc *doc = new Rcl::Doc;
-    int percent;
-    if (!self->query->getDoc(self->next, *doc, &percent)) {
-        PyErr_SetString(PyExc_EnvironmentError, "query: cant fetch result");
-	self->next = -1;
-	return 0;
-    }
-    self->next++;
     recollq_DocObject *result = 
 	(recollq_DocObject *)obj_Create(&recollq_DocType, 0, 0);
     if (!result) {
-	delete doc;
-	return 0;
-    }
-    result->doc = doc;
-    the_docs.insert(result->doc);
+	LOGERR(("Query_fetchone: couldn't create doc object for result\n"));
+	return 0;
+    }
+    int percent;
+    if (!self->query->getDoc(self->next, *result->doc, &percent)) {
+        PyErr_SetString(PyExc_EnvironmentError, "query: cant fetch result");
+	self->next = -1;
+	return 0;
+    }
+    self->next++;
     // Move some data from the dedicated fields to the meta array to make 
     // fetching attributes easier
+    Rcl::Doc *doc = result->doc;
     printableUrl(rclconfig->getDefCharset(), doc->url, doc->meta["url"]);
     doc->meta["mimetype"] = doc->mimetype;
     doc->meta["mtime"] = doc->dmtime.empty() ? doc->fmtime : doc->dmtime;
@@ -502,7 +575,8 @@
     {NULL}  /* Sentinel */
 };
 
-
+///////////////////////////////////////////////////////////////////////
+///// Doc object methods
 static void 
 Doc_dealloc(recollq_DocObject *self)
 {
@@ -534,14 +608,21 @@
     if (self->doc)
 	the_docs.erase(self->doc);
     delete self->doc;
-    self->doc = 0;
+    self->doc = new Rcl::Doc;
+    if (self->doc == 0)
+	return -1;
+    the_docs.insert(self->doc);
     return 0;
 }
 
+// The "closure" thing is actually the meta field name. This is how
+// python allows one set of get/set functions to get/set different
+// attributes (pass them an additional parameters as from the
+// getseters table and call it a "closure"
 static PyObject *
 Doc_getmeta(recollq_DocObject *self, void *closure)
 {
-    LOGDEB(("Doc_getmeta\n"));
+    LOGDEB(("Doc_getmeta: [%s]\n", (const char *)closure));
     if (self->doc == 0 || 
 	the_docs.find(self->doc) == the_docs.end()) {
         PyErr_SetString(PyExc_AttributeError, "doc");
@@ -568,32 +649,109 @@
 static int
 Doc_setmeta(recollq_DocObject *self, PyObject *value, void *closure)
 {
-    PyErr_SetString(PyExc_RuntimeError, "Cannot set attributes for now");
-    return -1;
+    if (self->doc == 0 || 
+	the_docs.find(self->doc) == the_docs.end()) {
+        PyErr_SetString(PyExc_AttributeError, "doc??");
+	return -1;
+    }
+    LOGDEB2(("Doc_setmeta: doc %p\n", self->doc));
+    if (PyString_Check(value)) {
+	value = PyUnicode_FromObject(value);
+	if (value == 0) 
+	    return -1;
+    } 
+
+    if (!PyUnicode_Check(value)) {
+	PyErr_SetString(PyExc_AttributeError, "value not str/unicode??");
+	return -1;
+    }
+
+    PyObject* putf8 = PyUnicode_AsUTF8String(value);
+    if (putf8 == 0) {
+	LOGERR(("Doc_setmeta: encoding to utf8 failed\n"));
+	PyErr_SetString(PyExc_AttributeError, "value??");
+	return -1;
+    }
+
+    char* uvalue = PyString_AsString(putf8);
+    const char *key = (const char *)closure;
+    if (key == 0) {
+        PyErr_SetString(PyExc_AttributeError, "key??");
+	return -1;
+    }
+
+    LOGDEB(("Doc_setmeta: setting [%s] to [%s]\n", key, uvalue));
+    self->doc->meta[key] = uvalue;
+    switch (key[0]) {
+    case 'd':
+	if (!strcmp(key, "dbytes")) {
+	    self->doc->dbytes = uvalue;
+	}
+	break;
+    case 'f':
+	if (!strcmp(key, "fbytes")) {
+	    self->doc->fbytes = uvalue;
+	}
+	break;
+    case 'i':
+	if (!strcmp(key, "ipath")) {
+	    self->doc->ipath = uvalue;
+	}
+	break;
+    case 'm':
+	if (!strcmp(key, "mimetype")) {
+	    self->doc->mimetype = uvalue;
+	} else if (!strcmp(key, "mtime")) {
+	    self->doc->dmtime = uvalue;
+	}
+	break;
+    case 's':
+	if (!strcmp(key, "sig")) {
+	    self->doc->sig = uvalue;
+	}
+	break;
+    case 't':
+	if (!strcmp(key, "text")) {
+	    self->doc->text = uvalue;
+	}
+	break;
+    case 'u':
+	if (!strcmp(key, "url")) {
+	    self->doc->url = uvalue;
+	}
+	break;
+    }
+    return 0;
 }
 
 static PyGetSetDef Doc_getseters[] = {
     // Name, get, set, doc, closure
+    {"url", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "url", (void *)"url"},
+    {"ipath", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "ipath", (void *)"ipath"},
+    {"mimetype", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "mimetype", (void *)"mimetype"},
+    {"mtime", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "mtime", (void *)"mtime"},
+    {"fbytes", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "fbytes", (void *)"fbytes"},
+    {"dbytes", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "dbytes", (void *)"dbytes"},
+    {"relevance", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "relevance", (void *)"relevance"},
     {"title", (getter)Doc_getmeta, (setter)Doc_setmeta, 
      "title", (void *)"title"},
     {"keywords", (getter)Doc_getmeta, (setter)Doc_setmeta, 
      "keywords", (void *)"keywords"},
     {"abstract", (getter)Doc_getmeta, (setter)Doc_setmeta, 
      "abstract", (void *)"abstract"},
-    {"url", (getter)Doc_getmeta, (setter)Doc_setmeta, 
-     "url", (void *)"url"},
-    {"mimetype", (getter)Doc_getmeta, (setter)Doc_setmeta, 
-     "mimetype", (void *)"mimetype"},
-    {"mtime", (getter)Doc_getmeta, (setter)Doc_setmeta, 
-     "mtime", (void *)"mtime"},
-    {"ipath", (getter)Doc_getmeta, (setter)Doc_setmeta, 
-     "ipath", (void *)"ipath"},
-    {"fbytes", (getter)Doc_getmeta, (setter)Doc_setmeta, 
-     "fbytes", (void *)"fbytes"},
-    {"dbytes", (getter)Doc_getmeta, (setter)Doc_setmeta, 
-     "dbytes", (void *)"dbytes"},
-    {"relevance", (getter)Doc_getmeta, (setter)Doc_setmeta, 
-     "relevance", (void *)"relevance"},
+    {"author", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "author", (void *)"author"},
+    {"text", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "text", (void *)"text"},
+    {"sig", (getter)Doc_getmeta, (setter)Doc_setmeta, 
+     "sig", (void *)"sig"},
     {NULL}  /* Sentinel */
 };