|
a |
|
b/src/python/samples/rcldlkp.py |
|
|
1 |
#!/usr/bin/env python
|
|
|
2 |
__doc__ = """
|
|
|
3 |
''Lookup'' notes file indexing
|
|
|
4 |
|
|
|
5 |
The file format has text notes separated by lines with a single '%' character
|
|
|
6 |
|
|
|
7 |
If the script is called with just the file name as an argument, it will
|
|
|
8 |
(re)index the contents.
|
|
|
9 |
|
|
|
10 |
If the script is called with second numeric argument, it will retrieve the
|
|
|
11 |
specified record and output it in html
|
|
|
12 |
"""
|
|
|
13 |
|
|
|
14 |
import os
|
|
|
15 |
import stat
|
|
|
16 |
import sys
|
|
|
17 |
import re
|
|
|
18 |
|
|
|
19 |
rclconf = "/Users/dockes/.recoll-test"
|
|
|
20 |
|
|
|
21 |
def udi(docfile, numrec):
|
|
|
22 |
return docfile + "#" + str(numrec)
|
|
|
23 |
|
|
|
24 |
###############################################################
|
|
|
25 |
def index_rec(db, numrec, rec):
|
|
|
26 |
doc = recollq.Doc()
|
|
|
27 |
# url
|
|
|
28 |
doc.url = "file://" + docfile
|
|
|
29 |
# utf8fn
|
|
|
30 |
# ipath
|
|
|
31 |
doc.ipath = str(numrec)
|
|
|
32 |
# mimetype
|
|
|
33 |
doc.mimetype = "text/plain"
|
|
|
34 |
# mtime
|
|
|
35 |
# origcharset
|
|
|
36 |
# title
|
|
|
37 |
lines = rec.split("\n")
|
|
|
38 |
if len(lines) >= 2:
|
|
|
39 |
doc.title = unicode(lines[1], "iso-8859-1")
|
|
|
40 |
if len(doc.title.strip()) == 0 and len(lines) >= 3:
|
|
|
41 |
doc.title = unicode(lines[2], "iso-8859-1")
|
|
|
42 |
# keywords
|
|
|
43 |
# abstract
|
|
|
44 |
# author
|
|
|
45 |
# fbytes
|
|
|
46 |
doc.fbytes = str(fbytes)
|
|
|
47 |
# text
|
|
|
48 |
doc.text = unicode(rec, "iso-8859-1")
|
|
|
49 |
# dbytes
|
|
|
50 |
doc.dbytes = str(len(rec))
|
|
|
51 |
# sig
|
|
|
52 |
if numrec == 0:
|
|
|
53 |
doc.sig = str(fmtime)
|
|
|
54 |
db.addOrUpdate(udi(docfile, numrec), u"", doc)
|
|
|
55 |
|
|
|
56 |
def output_rec(rec):
|
|
|
57 |
# Escape html
|
|
|
58 |
rec = unicode(rec, "iso-8859-1").encode("utf-8")
|
|
|
59 |
rec = rec.replace("<", "<");
|
|
|
60 |
rec = rec.replace("&", "&");
|
|
|
61 |
rec = rec.replace('"', "&dquot;");
|
|
|
62 |
print '<html><head>'
|
|
|
63 |
print '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
|
|
|
64 |
print '</head><body><pre>'
|
|
|
65 |
print rec
|
|
|
66 |
print '</pre></body></html>'
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
################################################################
|
|
|
70 |
|
|
|
71 |
def usage():
|
|
|
72 |
sys.stderr.write("Usage: doclookup.py <filename> [<recnum>]\n")
|
|
|
73 |
exit(1)
|
|
|
74 |
|
|
|
75 |
if len(sys.argv) < 2:
|
|
|
76 |
usage()
|
|
|
77 |
|
|
|
78 |
docfile = sys.argv[1]
|
|
|
79 |
|
|
|
80 |
if len(sys.argv) > 2:
|
|
|
81 |
targetnum = int(sys.argv[2])
|
|
|
82 |
else:
|
|
|
83 |
targetnum = None
|
|
|
84 |
|
|
|
85 |
#print docfile, targetnum
|
|
|
86 |
|
|
|
87 |
stdata = os.stat(docfile)
|
|
|
88 |
fmtime = stdata[stat.ST_MTIME]
|
|
|
89 |
fbytes = stdata[stat.ST_SIZE]
|
|
|
90 |
f = open(docfile, 'r')
|
|
|
91 |
|
|
|
92 |
if targetnum == None:
|
|
|
93 |
import recollq
|
|
|
94 |
db = recollq.connect(confdir=rclconf, writable=1)
|
|
|
95 |
if not db.needUpdate(udi(docfile, 0), str(fmtime)):
|
|
|
96 |
exit(0)
|
|
|
97 |
|
|
|
98 |
rec = ""
|
|
|
99 |
numrec = 1
|
|
|
100 |
for line in f:
|
|
|
101 |
if re.compile("^%[ \t]*").match(line):
|
|
|
102 |
if targetnum == None:
|
|
|
103 |
index_rec(db, numrec, rec)
|
|
|
104 |
elif targetnum == numrec:
|
|
|
105 |
output_rec(rec)
|
|
|
106 |
exit(0)
|
|
|
107 |
numrec += 1
|
|
|
108 |
rec = ""
|
|
|
109 |
else:
|
|
|
110 |
rec += line
|
|
|
111 |
|
|
|
112 |
if targetnum == None:
|
|
|
113 |
index_rec(db, 0, "")
|
|
|
114 |
|