Child: [29fe1e] (diff)

Download this file

rclexecm.py    196 lines (163 with data), 6.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env python
###########################################
## Generic recoll multifilter communication code
import sys
import os
class RclExecM:
noteof = 0
eofnext = 1
eofnow = 2
noerror = 0
subdocerror = 1
fileerror = 2
def __init__(self):
self.myname = os.path.basename(sys.argv[0])
self.mimetype = ""
def rclog(self, s, doexit = 0, exitvalue = 1):
print >> sys.stderr, "RCLMFILT:", self.myname, ":", s
if doexit:
sys.exit(exitvalue)
def htmlescape(self, txt):
# This must stay first (it somehow had managed to skip after
# the next line, with rather interesting results)
txt = txt.replace("&", "&")
txt = txt.replace("<", "&lt;")
txt = txt.replace('"', "&dquot;")
return txt
# Our worker sometimes knows the mime types of the data it sends
def setmimetype(self, mt):
self.mimetype = mt
# Read single parameter from process input: line with param name and size
# followed by data.
def readparam(self):
s = sys.stdin.readline()
if s == '':
sys.exit(0)
# self.rclog(": EOF on input", 1, 0)
s = s.rstrip("\n")
if s == "":
return ("","")
l = s.split()
if len(l) != 2:
self.rclog("bad line: [" + s + "]", 1, 1)
paramname = l[0].lower()
paramsize = int(l[1])
if paramsize > 0:
paramdata = sys.stdin.read(paramsize)
if len(paramdata) != paramsize:
self.rclog("Bad read: wanted %d, got %d" %
(paramsize, len(paramdata)), 1,1)
else:
paramdata = ""
#self.rclog("paramname [%s] paramsize %d value [%s]" %
# (paramname, paramsize, paramdata))
return (paramname, paramdata)
# Send answer: document, ipath, possible eof.
def answer(self, docdata, ipath, iseof = noteof, iserror = noerror):
if iserror != RclExecM.fileerror and iseof != RclExecM.eofnow:
print "Document:", len(docdata)
sys.stdout.write(docdata)
if len(ipath):
print "Ipath:", len(ipath)
sys.stdout.write(ipath)
if len(self.mimetype):
print "Mimetype:", len(self.mimetype)
sys.stdout.write(self.mimetype)
# If we're at the end of the contents, say so
if iseof == RclExecM.eofnow:
print "Eofnow: 0"
elif iseof == RclExecM.eofnext:
print "Eofnext: 0"
if iserror == RclExecM.subdocerror:
print "Subdocerror: 0"
elif iserror == RclExecM.fileerror:
print "Fileerror: 0"
# End of message
print
sys.stdout.flush()
#self.rclog("done writing data")
def processmessage(self, processor, params):
# We must have a filename entry (even empty). Else exit
if not params.has_key("filename:"):
self.rclog("no filename ??", 1, 1)
# If we're given a file name, open it.
if len(params["filename:"]) != 0:
try:
if not processor.openfile(params):
self.answer("", "", iserror = RclExecM.fileerror)
return
except Exception, err:
self.rclog("processmessage: openfile raised: [%s]" % err)
self.answer("", "", iserror = RclExecM.fileerror)
return
# If we have an ipath, that's what we look for, else ask for next entry
ipath = ""
eof = True
self.mimetype = ""
try:
if params.has_key("ipath:") and len(params["ipath:"]):
ok, data, ipath, eof = processor.getipath(params)
else:
ok, data, ipath, eof = processor.getnext(params)
except Exception, err:
self.answer("", "", eof, RclExecM.fileerror)
return
#self.rclog("processmessage: ok %s eof %s ipath %s"%(ok, eof, ipath))
if ok:
self.answer(data, ipath, eof)
else:
self.answer("", "", eof, RclExecM.subdocerror)
# Loop on messages from our master
def mainloop(self, processor):
while 1:
#self.rclog("waiting for command")
params = dict()
# Read at most 10 parameters (normally 1 or 2), stop at empty line
# End of message is signalled by empty paramname
for i in range(10):
paramname, paramdata = self.readparam()
if paramname == "":
break
params[paramname] = paramdata
# Got message, act on it
self.processmessage(processor, params)
# Common main routine for all python execm filters: either run the
# normal protocol engine or a local loop to test without recollindex
def main(proto, extract):
if len(sys.argv) == 1:
proto.mainloop(extract)
else:
# Got a file name parameter: TESTING without an execm parent
# Loop on all entries or get specific ipath
params = {'filename:':sys.argv[1]}
if not extract.openfile(params):
print "Open error"
sys.exit(1)
ipath = ""
if len(sys.argv) == 3:
ipath = sys.argv[2]
if ipath != "":
params['ipath:'] = ipath
ok, data, ipath, eof = extract.getipath(params)
if ok:
print "== Found entry for ipath %s (mimetype [%s]):" % \
(ipath, proto.mimetype)
print data
print
else:
print "Got error, eof %d"%eof
sys.exit(0)
ecnt = 0
while 1:
ok, data, ipath, eof = extract.getnext(params)
if ok:
ecnt = ecnt + 1
print "== Entry %d ipath %s (mimetype [%s]):" % \
(ecnt, ipath, proto.mimetype)
print data
print
if eof != RclExecM.noteof:
break
else:
print "Not ok, eof %d" % eof
break