Parent: [64bf01] (diff)

Download this file

recollq.py    138 lines (117 with data), 3.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""A python version of the command line query tool recollq (a bit simplified)
The input string is always interpreted as a query language string.
This could actually be useful for something after some customization
"""
import sys
import locale
from getopt import getopt
if sys.version_info[0] >= 3:
ISP3 = True
else:
ISP3 = False
try:
from recoll import recoll
from recoll import rclextract
hasextract = True
except:
import recoll
hasextract = False
allmeta = ("title", "keywords", "abstract", "url", "mimetype", "mtime",
"ipath", "fbytes", "dbytes", "relevancyrating")
def Usage():
print("Usage: recollq.py [-c conf] [-i extra_index] <recoll query>")
sys.exit(1);
class ptrmeths:
def __init__(self, groups):
self.groups = groups
def startMatch(self, idx):
ugroup = " ".join(self.groups[idx][1])
return '<span class="pyrclstart" idx="%d" ugroup="%s">' % (idx, ugroup)
def endMatch(self):
return '</span>'
def extract(doc):
extractor = rclextract.Extractor(doc)
newdoc = extractor.textextract(doc.ipath)
return newdoc
def extractofile(doc, outfilename=""):
extractor = rclextract.Extractor(doc)
outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, \
ofilename=outfilename)
return outfilename
def utf8string(s):
if ISP3:
return s
else:
return s.encode('utf8')
def doquery(db, q):
# Get query object
query = db.query()
#query.sortby("dmtime", ascending=True)
# Parse/run input query string
nres = query.execute(q, stemming = 0, stemlang="english")
qs = "Xapian query: [%s]" % query.getxquery()
print(utf8string(qs))
groups = query.getgroups()
m = ptrmeths(groups)
# Print results:
print("Result count: %d %d" % (nres, query.rowcount))
if nres > 20:
nres = 20
#results = query.fetchmany(nres)
#for doc in results:
for i in range(nres):
doc = query.fetchone()
rownum = query.next if type(query.next) == int else \
query.rownumber
print("%d:"%(rownum,))
#for k,v in doc.items().items():
#print "KEY:", utf8string(k), "VALUE", utf8string(v)
#continue
#outfile = extractofile(doc)
#print "outfile:", outfile, "url", utf8string(doc.url)
for k in ("title", "mtime", "author"):
value = getattr(doc, k)
# value = doc.get(k)
if value is None:
print("%s: (None)"%(k,))
else:
print("%s : %s"%(k, utf8string(value)))
#doc.setbinurl(bytearray("toto"))
#burl = doc.getbinurl(); print("Bin URL : [%s]"%(doc.getbinurl(),))
abs = query.makedocabstract(doc, methods=m)
print(utf8string(abs))
print('')
# fulldoc = extract(doc)
# print "FULLDOC MIMETYPE", fulldoc.mimetype, "TEXT:", fulldoc.text.encode("utf-8")
########################################### MAIN
if len(sys.argv) < 2:
Usage()
language, localecharset = locale.getdefaultlocale()
confdir=""
extra_dbs = []
# Snippet params
maxchars = 120
contextwords = 4
# Process options: [-c confdir] [-i extra_db [-i extra_db] ...]
options, args = getopt(sys.argv[1:], "c:i:")
for opt,val in options:
if opt == "-c":
confdir = val
elif opt == "-i":
extra_dbs.append(val)
else:
print("Bad opt: %s"%(opt,))
Usage()
# The query should be in the remaining arg(s)
if len(args) == 0:
print("No query found in command line")
Usage()
q = ''
for word in args:
q += word + ' '
print("QUERY: [%s]"%(q,))
db = recoll.connect(confdir=confdir, extra_dbs=extra_dbs)
db.setAbstractParams(maxchars=maxchars, contextwords=contextwords)
doquery(db, q)