|
a/src/python/samples/recollq.py |
|
b/src/python/samples/recollq.py |
|
... |
|
... |
5 |
"""
|
5 |
"""
|
6 |
|
6 |
|
7 |
import sys
|
7 |
import sys
|
8 |
from getopt import getopt
|
8 |
from getopt import getopt
|
9 |
import recoll
|
9 |
import recoll
|
|
|
10 |
import rclextract
|
10 |
|
11 |
|
11 |
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
12 |
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
12 |
"ipath", "fbytes", "dbytes", "relevancyrating")
|
13 |
"ipath", "fbytes", "dbytes", "relevancyrating")
|
13 |
|
14 |
|
14 |
def Usage():
|
15 |
def Usage():
|
15 |
print >> sys.stderr, "Usage: recollq.py [-c conf] [-i extra_index] <recoll query>"
|
16 |
print >> sys.stderr, "Usage: recollq.py [-c conf] [-i extra_index] <recoll query>"
|
16 |
sys.exit(1);
|
17 |
sys.exit(1);
|
17 |
|
18 |
|
|
|
19 |
class ptrmeths:
|
|
|
20 |
def __init__(self, groups):
|
|
|
21 |
self.groups = groups
|
|
|
22 |
def startMatch(self, idx):
|
|
|
23 |
ugroup = " ".join(self.groups[idx][1])
|
|
|
24 |
return '<span class="pyrclstart" idx="%d" ugroup="%s">' % (idx, ugroup)
|
|
|
25 |
def endMatch(self):
|
|
|
26 |
return '</span>'
|
|
|
27 |
|
|
|
28 |
def extract(doc):
|
|
|
29 |
extractor = rclextract.Extractor(doc)
|
|
|
30 |
newdoc = extractor.extract(doc.ipath)
|
|
|
31 |
return newdoc
|
|
|
32 |
|
18 |
def doquery(db, q):
|
33 |
def doquery(db, q):
|
19 |
# Get query object
|
34 |
# Get query object
|
20 |
query = db.query()
|
35 |
query = db.query()
|
21 |
# Parse/run input query string
|
36 |
# Parse/run input query string
|
22 |
nres = query.execute(q, stemming = 1, stemlang="english")
|
37 |
nres = query.execute(q, stemming = 0, stemlang="english")
|
|
|
38 |
qs = u"Xapian query: [%s]" % query.getxquery()
|
|
|
39 |
print(qs.encode("utf-8"))
|
|
|
40 |
groups = query.getgroups()
|
|
|
41 |
print "Groups:", groups
|
|
|
42 |
m = ptrmeths(groups)
|
23 |
|
43 |
|
24 |
# Print results:
|
44 |
# Print results:
|
25 |
print "Result count: ", nres
|
45 |
print "Result count: ", nres
|
|
|
46 |
if nres > 20:
|
|
|
47 |
nres = 20
|
26 |
while query.next >= 0 and query.next < nres:
|
48 |
while query.next >= 0 and query.next < nres:
|
27 |
doc = query.fetchone()
|
49 |
doc = query.fetchone()
|
28 |
print query.next, ":",
|
50 |
print query.next, ":",
|
|
|
51 |
# for k,v in doc.items().items():
|
|
|
52 |
# print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
|
|
|
53 |
# continue
|
29 |
for k in ("title", "mtime", "author"):
|
54 |
for k in ("title", "mtime", "author"):
|
|
|
55 |
value = getattr(doc, k)
|
|
|
56 |
# value = doc.get(k)
|
|
|
57 |
if value is None:
|
|
|
58 |
print k, ":", "(None)"
|
|
|
59 |
else:
|
30 |
print k, ":", getattr(doc, k).encode('utf-8')
|
60 |
print k, ":", value.encode('utf-8')
|
31 |
#doc.setbinurl(bytearray("toto"))
|
61 |
#doc.setbinurl(bytearray("toto"))
|
32 |
#burl = doc.getbinurl(); print "Bin URL :", doc.getbinurl()
|
62 |
#burl = doc.getbinurl(); print "Bin URL :", doc.getbinurl()
|
33 |
abs = db.makeDocAbstract(doc, query).encode('utf-8')
|
63 |
abs = query.makedocabstract(doc, methods=m)
|
34 |
print abs
|
64 |
print abs.encode('utf-8')
|
35 |
print
|
65 |
print
|
|
|
66 |
# fulldoc = extract(doc)
|
|
|
67 |
# print "FULLDOC MIMETYPE", fulldoc.mimetype, "TEXT:", fulldoc.text.encode("utf-8")
|
36 |
|
68 |
|
|
|
69 |
|
|
|
70 |
########################################### MAIN
|
37 |
|
71 |
|
38 |
if len(sys.argv) < 2:
|
72 |
if len(sys.argv) < 2:
|
39 |
Usage()
|
73 |
Usage()
|
40 |
|
74 |
|
41 |
confdir=""
|
75 |
confdir=""
|
|
... |
|
... |
66 |
db = recoll.connect(confdir=confdir,
|
100 |
db = recoll.connect(confdir=confdir,
|
67 |
extra_dbs=extra_dbs)
|
101 |
extra_dbs=extra_dbs)
|
68 |
db.setAbstractParams(maxchars=maxchars, contextwords=contextwords)
|
102 |
db.setAbstractParams(maxchars=maxchars, contextwords=contextwords)
|
69 |
|
103 |
|
70 |
doquery(db, q)
|
104 |
doquery(db, q)
|
71 |
|
|
|
72 |
|
|
|