|
a/src/python/samples/recollq.py |
|
b/src/python/samples/recollq.py |
|
... |
|
... |
6 |
"""
|
6 |
"""
|
7 |
|
7 |
|
8 |
import sys
|
8 |
import sys
|
9 |
import locale
|
9 |
import locale
|
10 |
from getopt import getopt
|
10 |
from getopt import getopt
|
|
|
11 |
|
|
|
12 |
if sys.version_info[0] >= 3:
|
|
|
13 |
ISP3 = True
|
|
|
14 |
else:
|
|
|
15 |
ISP3 = False
|
11 |
|
16 |
|
12 |
try:
|
17 |
try:
|
13 |
from recoll import recoll
|
18 |
from recoll import recoll
|
14 |
from recoll import rclextract
|
19 |
from recoll import rclextract
|
15 |
hasextract = True
|
20 |
hasextract = True
|
|
... |
|
... |
19 |
|
24 |
|
20 |
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
25 |
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
|
21 |
"ipath", "fbytes", "dbytes", "relevancyrating")
|
26 |
"ipath", "fbytes", "dbytes", "relevancyrating")
|
22 |
|
27 |
|
23 |
def Usage():
|
28 |
def Usage():
|
24 |
print >> sys.stderr, "Usage: recollq.py [-c conf] [-i extra_index] <recoll query>"
|
29 |
print("Usage: recollq.py [-c conf] [-i extra_index] <recoll query>")
|
25 |
sys.exit(1);
|
30 |
sys.exit(1);
|
26 |
|
31 |
|
27 |
class ptrmeths:
|
32 |
class ptrmeths:
|
28 |
def __init__(self, groups):
|
33 |
def __init__(self, groups):
|
29 |
self.groups = groups
|
34 |
self.groups = groups
|
|
... |
|
... |
42 |
extractor = rclextract.Extractor(doc)
|
47 |
extractor = rclextract.Extractor(doc)
|
43 |
outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
|
48 |
outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
|
44 |
ofilename=outfilename)
|
49 |
ofilename=outfilename)
|
45 |
return outfilename
|
50 |
return outfilename
|
46 |
|
51 |
|
|
|
52 |
def utf8string(s):
|
|
|
53 |
if ISP3:
|
|
|
54 |
return s
|
|
|
55 |
else:
|
|
|
56 |
return s.encode('utf8')
|
|
|
57 |
|
47 |
def doquery(db, q):
|
58 |
def doquery(db, q):
|
48 |
# Get query object
|
59 |
# Get query object
|
49 |
query = db.query()
|
60 |
query = db.query()
|
50 |
#query.sortby("dmtime", ascending=True)
|
61 |
#query.sortby("dmtime", ascending=True)
|
51 |
|
62 |
|
52 |
# Parse/run input query string
|
63 |
# Parse/run input query string
|
53 |
nres = query.execute(q, stemming = 0, stemlang="english")
|
64 |
nres = query.execute(q, stemming = 0, stemlang="english")
|
54 |
qs = u"Xapian query: [%s]" % query.getxquery()
|
65 |
qs = "Xapian query: [%s]" % query.getxquery()
|
55 |
print(qs.encode("utf-8"))
|
66 |
print(utf8string(qs))
|
56 |
groups = query.getgroups()
|
67 |
groups = query.getgroups()
|
57 |
print "Groups:", groups
|
|
|
58 |
m = ptrmeths(groups)
|
68 |
m = ptrmeths(groups)
|
59 |
|
69 |
|
60 |
# Print results:
|
70 |
# Print results:
|
61 |
print "Result count: ", nres, query.rowcount
|
71 |
print("Result count: %d %d" % (nres, query.rowcount))
|
62 |
if nres > 20:
|
72 |
if nres > 20:
|
63 |
nres = 20
|
73 |
nres = 20
|
64 |
#results = query.fetchmany(nres)
|
74 |
#results = query.fetchmany(nres)
|
65 |
#for doc in results:
|
75 |
#for doc in results:
|
66 |
|
76 |
|
67 |
for i in range(nres):
|
77 |
for i in range(nres):
|
68 |
doc = query.fetchone()
|
78 |
doc = query.fetchone()
|
69 |
rownum = query.next if type(query.next) == int else \
|
79 |
rownum = query.next if type(query.next) == int else \
|
70 |
query.rownumber
|
80 |
query.rownumber
|
71 |
print rownum, ":",
|
81 |
print("%d:"%(rownum,))
|
72 |
#for k,v in doc.items().items():
|
82 |
#for k,v in doc.items().items():
|
73 |
#print "KEY:", k.encode('utf-8'), "VALUE", v.encode('utf-8')
|
83 |
#print "KEY:", utf8string(k), "VALUE", utf8string(v)
|
74 |
#continue
|
84 |
#continue
|
75 |
#outfile = extractofile(doc)
|
85 |
#outfile = extractofile(doc)
|
76 |
#print "outfile:", outfile, "url", doc.url.encode("utf-8")
|
86 |
#print "outfile:", outfile, "url", utf8string(doc.url)
|
77 |
for k in ("title", "mtime", "author"):
|
87 |
for k in ("title", "mtime", "author"):
|
78 |
value = getattr(doc, k)
|
88 |
value = getattr(doc, k)
|
79 |
# value = doc.get(k)
|
89 |
# value = doc.get(k)
|
80 |
if value is None:
|
90 |
if value is None:
|
81 |
print k, ":", "(None)"
|
91 |
print("%s: (None)"%(k,))
|
82 |
else:
|
92 |
else:
|
83 |
print k, ":", value.encode('utf-8')
|
93 |
print("%s : %s"%(k, utf8string(value)))
|
84 |
#doc.setbinurl(bytearray("toto"))
|
94 |
#doc.setbinurl(bytearray("toto"))
|
85 |
#burl = doc.getbinurl(); print "Bin URL :", doc.getbinurl()
|
95 |
#burl = doc.getbinurl(); print("Bin URL : [%s]"%(doc.getbinurl(),))
|
86 |
abs = query.makedocabstract(doc, methods=m)
|
96 |
abs = query.makedocabstract(doc, methods=m)
|
87 |
print abs.encode('utf-8')
|
97 |
print(utf8string(abs))
|
88 |
print
|
98 |
print('')
|
89 |
# fulldoc = extract(doc)
|
99 |
# fulldoc = extract(doc)
|
90 |
# print "FULLDOC MIMETYPE", fulldoc.mimetype, "TEXT:", fulldoc.text.encode("utf-8")
|
100 |
# print "FULLDOC MIMETYPE", fulldoc.mimetype, "TEXT:", fulldoc.text.encode("utf-8")
|
91 |
|
101 |
|
92 |
|
102 |
|
93 |
########################################### MAIN
|
103 |
########################################### MAIN
|
|
... |
|
... |
107 |
if opt == "-c":
|
117 |
if opt == "-c":
|
108 |
confdir = val
|
118 |
confdir = val
|
109 |
elif opt == "-i":
|
119 |
elif opt == "-i":
|
110 |
extra_dbs.append(val)
|
120 |
extra_dbs.append(val)
|
111 |
else:
|
121 |
else:
|
112 |
print >> sys.stderr, "Bad opt: ", opt
|
122 |
print("Bad opt: %s"%(opt,))
|
113 |
Usage()
|
123 |
Usage()
|
114 |
|
124 |
|
115 |
# The query should be in the remaining arg(s)
|
125 |
# The query should be in the remaining arg(s)
|
116 |
if len(args) == 0:
|
126 |
if len(args) == 0:
|
117 |
print >> sys.stderr, "No query found in command line"
|
127 |
print("No query found in command line")
|
118 |
Usage()
|
128 |
Usage()
|
119 |
q = u''
|
129 |
q = ''
|
120 |
for word in args:
|
130 |
for word in args:
|
121 |
q += word.decode(localecharset) + u' '
|
131 |
q += word + ' '
|
122 |
|
132 |
|
123 |
print "QUERY: [", q, "]"
|
133 |
print("QUERY: [%s]"%(q,))
|
124 |
db = recoll.connect(confdir=confdir,
|
134 |
db = recoll.connect(confdir=confdir, extra_dbs=extra_dbs)
|
125 |
extra_dbs=extra_dbs)
|
|
|
126 |
db.setAbstractParams(maxchars=maxchars, contextwords=contextwords)
|
135 |
db.setAbstractParams(maxchars=maxchars, contextwords=contextwords)
|
127 |
|
136 |
|
128 |
doquery(db, q)
|
137 |
doquery(db, q)
|