recoll / Code / Diff of /src/python/samples/recollq.py

Diff of /src/python/samples/recollq.py [a0e273] .. [3be5e9]

Switch to unified view

-a/src/python/samples/recollq.py
+b/src/python/samples/recollq.py
 ...
 """
 import sys
 from getopt import getopt
 import recoll
+import rclextract
 allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
            "ipath", "fbytes", "dbytes", "relevancyrating")
 def Usage():
     print >> sys.stderr, "Usage: recollq.py [-c conf] [-i extra_index] <recoll query>"
     sys.exit(1);
+class ptrmeths:
+    def __init__(self, groups):
+        self.groups = groups
+    def startMatch(self, idx):
+        ugroup = " ".join(self.groups[idx][1])
+        return '<span class="pyrclstart" idx="%d" ugroup="%s">' % (idx, ugroup)
+    def endMatch(self):
+        return '</span>'
+def extract(doc):
+    extractor = rclextract.Extractor(doc)
+    newdoc = extractor.extract(doc.ipath)
+    return newdoc
 def doquery(db, q):
     # Get query object
     query = db.query()
     # Parse/run input query string
-    nres = query.execute(q, stemming = 1, stemlang="english")
+    nres = query.execute(q, stemming = 0, stemlang="english")
+    qs = u"Xapian query: [%s]" % query.getxquery()
+    print(qs.encode("utf-8"))
+    groups = query.getgroups()
+    print "Groups:", groups
+    m = ptrmeths(groups)
     # Print results:
     print "Result count: ", nres
+    if nres > 20:
+        nres = 20
     while query.next >= 0 and query.next < nres:
         doc = query.fetchone()
         print query.next, ":",
+#        for k,v in doc.items().items():
+#            print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
+#        continue
         for k in ("title", "mtime", "author"):
+            value = getattr(doc, k)
+#            value = doc.get(k)
+            if value is None:
+                print k, ":", "(None)"
+            else:
-            print k, ":", getattr(doc, k).encode('utf-8')
+                print k, ":", value.encode('utf-8')
         #doc.setbinurl(bytearray("toto"))
         #burl = doc.getbinurl(); print "Bin URL :", doc.getbinurl()
-        abs = db.makeDocAbstract(doc, query).encode('utf-8')
+        abs = query.makedocabstract(doc, methods=m)
-        print abs
+        print abs.encode('utf-8')
         print
+#        fulldoc = extract(doc)
+#        print "FULLDOC MIMETYPE", fulldoc.mimetype, "TEXT:", fulldoc.text.encode("utf-8")
+########################################### MAIN
 if len(sys.argv) < 2:
     Usage()
 confdir=""
 ...
 db = recoll.connect(confdir=confdir,
                     extra_dbs=extra_dbs)
 db.setAbstractParams(maxchars=maxchars, contextwords=contextwords)
 doquery(db, q)