Switch to unified view

a b/src/python/samples/rcldlkp.py
1
#!/usr/bin/env python
2
__doc__ = """
3
''Lookup'' notes file indexing
4
5
The file format has text notes separated by lines with a single '%' character
6
7
If the script is called with just the file name as an argument, it will 
8
(re)index the contents.
9
10
If the script is called with second numeric argument, it will retrieve the
11
specified record and output it in html
12
"""
13
14
import os
15
import stat
16
import sys
17
import re
18
19
rclconf = "/Users/dockes/.recoll-test"
20
21
def udi(docfile, numrec):
22
    return docfile + "#" + str(numrec)
23
24
###############################################################
25
def index_rec(db, numrec, rec):
26
    doc = recollq.Doc()
27
    # url
28
    doc.url = "file://" + docfile
29
    # utf8fn
30
    # ipath
31
    doc.ipath = str(numrec)
32
    # mimetype
33
    doc.mimetype = "text/plain"
34
    # mtime
35
    # origcharset
36
    # title
37
    lines = rec.split("\n")
38
    if len(lines) >= 2:
39
        doc.title = unicode(lines[1], "iso-8859-1")
40
    if len(doc.title.strip()) == 0 and len(lines) >= 3:
41
        doc.title = unicode(lines[2], "iso-8859-1")
42
    # keywords
43
    # abstract
44
    # author
45
    # fbytes
46
    doc.fbytes = str(fbytes)
47
    # text
48
    doc.text = unicode(rec, "iso-8859-1")
49
    # dbytes
50
    doc.dbytes = str(len(rec))
51
    # sig
52
    if numrec == 0:
53
        doc.sig = str(fmtime)
54
    db.addOrUpdate(udi(docfile, numrec), u"", doc)
55
56
def output_rec(rec):
57
    # Escape html
58
    rec = unicode(rec, "iso-8859-1").encode("utf-8")
59
    rec = rec.replace("<", "&lt;");
60
    rec = rec.replace("&", "&amp;");
61
    rec = rec.replace('"', "&dquot;");
62
    print '<html><head>'
63
    print '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">'
64
    print '</head><body><pre>'
65
    print rec
66
    print '</pre></body></html>'
67
68
69
################################################################
70
71
def usage():
72
    sys.stderr.write("Usage: doclookup.py <filename> [<recnum>]\n")
73
    exit(1)
74
75
if len(sys.argv) < 2:
76
    usage()
77
78
docfile = sys.argv[1]
79
80
if len(sys.argv) > 2:
81
    targetnum = int(sys.argv[2])
82
else:
83
    targetnum = None
84
85
#print docfile, targetnum
86
87
stdata = os.stat(docfile)
88
fmtime = stdata[stat.ST_MTIME]
89
fbytes = stdata[stat.ST_SIZE]
90
f = open(docfile, 'r')
91
92
if targetnum == None:
93
    import recollq
94
    db = recollq.connect(confdir=rclconf, writable=1)
95
    if not db.needUpdate(udi(docfile, 0), str(fmtime)):
96
        exit(0)
97
98
rec = ""
99
numrec = 1
100
for line in f:
101
    if re.compile("^%[ \t]*").match(line):
102
        if targetnum == None:
103
            index_rec(db, numrec, rec)
104
        elif targetnum == numrec:
105
            output_rec(rec)
106
            exit(0)
107
        numrec += 1
108
        rec = ""
109
    else:
110
        rec += line
111
112
if targetnum == None:
113
    index_rec(db, 0, "")
114