|
a/src/filters/rcltxtlines.py |
|
b/src/filters/rcltxtlines.py |
|
... |
|
... |
25 |
def __init__(self, em):
|
25 |
def __init__(self, em):
|
26 |
# Store a ref to our execm object so that we can use its services.
|
26 |
# Store a ref to our execm object so that we can use its services.
|
27 |
self.em = em
|
27 |
self.em = em
|
28 |
|
28 |
|
29 |
# This is called once for every processed file during indexing, or
|
29 |
# This is called once for every processed file during indexing, or
|
30 |
# query preview. It usually creates some kind of table of
|
30 |
# query preview. For multi-document files, it usually creates some
|
31 |
# contents, and resets the current index in it, because we don't
|
31 |
# kind of table of contents, and resets the current index in it,
|
32 |
# know at this point if this is for indexing (will walk all
|
32 |
# because we don't know at this point if this is for indexing
|
33 |
# entries) or previewing (will request one). Actually we could
|
33 |
# (will walk all entries) or previewing (will request
|
34 |
# know from the environment but it's just simpler this way in
|
34 |
# one). Actually we could know from the environment but it's just
|
35 |
# general. Note that there is no close call, openfile() will just
|
35 |
# simpler this way in general. Note that there is no close call,
|
36 |
# be called repeatedly during indexing, and should clear any
|
36 |
# openfile() will just be called repeatedly during indexing, and
|
37 |
# existing state
|
37 |
# should clear any existing state
|
38 |
def openfile(self, params):
|
38 |
def openfile(self, params):
|
39 |
"""Open the text file, create a contents array"""
|
39 |
"""Open the text file, create a contents array"""
|
40 |
self.currentindex = -1
|
40 |
self.currentindex = -1
|
41 |
try:
|
41 |
try:
|
42 |
f = open(params["filename:"].decode('UTF-8'), "r")
|
42 |
f = open(params["filename:"].decode('UTF-8'), "r")
|
|
... |
|
... |
44 |
self.em.rclog("openfile: open failed: [%s]" % err)
|
44 |
self.em.rclog("openfile: open failed: [%s]" % err)
|
45 |
return False
|
45 |
return False
|
46 |
self.lines = f.readlines()
|
46 |
self.lines = f.readlines()
|
47 |
return True
|
47 |
return True
|
48 |
|
48 |
|
49 |
# This is called for query preview to request one specific
|
|
|
50 |
# entry. Here our internal paths are stringified line numbers, but
|
|
|
51 |
# they could be tar archive paths or whatever we returned during
|
|
|
52 |
# indexing.
|
|
|
53 |
def getipath(self, params):
|
|
|
54 |
return self.extractone(int(params["ipath:"]))
|
|
|
55 |
|
|
|
56 |
# This is called during indexing to walk the contents. The first
|
49 |
# This is called during indexing to walk the contents. The first
|
57 |
# time, we return a 'self' document, which may be empty (e.g. for
|
50 |
# time, we return a 'self' document, which may be empty (e.g. for
|
58 |
# a tar file), or might contain data (e.g. for an email body,
|
51 |
# a tar file), or might contain data (e.g. for an email body,
|
59 |
# further docs being the attachments).
|
52 |
# further docs being the attachments), and may also be the only
|
|
|
53 |
# document returned (for single document files).
|
60 |
def getnext(self, params):
|
54 |
def getnext(self, params):
|
61 |
|
55 |
|
62 |
# Self doc. Here empty.
|
56 |
# Self doc. Here empty.
|
|
|
57 |
#
|
|
|
58 |
# This could also be the only entry if this file type holds a
|
|
|
59 |
# single document. We return eofnext in this case
|
|
|
60 |
#
|
63 |
# !Note that the self doc has an *empty* ipath
|
61 |
# !Note that the self doc has an *empty* ipath
|
64 |
if self.currentindex == -1:
|
62 |
if self.currentindex == -1:
|
65 |
self.currentindex = 0
|
63 |
self.currentindex = 0
|
66 |
if len(self.lines) == 0:
|
64 |
if len(self.lines) == 0:
|
67 |
eof = rclexecm.RclExecM.eofnext
|
65 |
eof = rclexecm.RclExecM.eofnext
|
|
... |
|
... |
74 |
return (False, "", "", rclexecm.RclExecM.eofnow)
|
72 |
return (False, "", "", rclexecm.RclExecM.eofnow)
|
75 |
else:
|
73 |
else:
|
76 |
ret= self.extractone(self.currentindex)
|
74 |
ret= self.extractone(self.currentindex)
|
77 |
self.currentindex += 1
|
75 |
self.currentindex += 1
|
78 |
return ret
|
76 |
return ret
|
|
|
77 |
|
|
|
78 |
# This is called for query preview to request one specific (or the
|
|
|
79 |
# only) entry. Here our internal paths are stringified line
|
|
|
80 |
# numbers, but they could be tar archive paths or whatever we
|
|
|
81 |
# returned during indexing.
|
|
|
82 |
def getipath(self, params):
|
|
|
83 |
return self.extractone(int(params["ipath:"]))
|
79 |
|
84 |
|
80 |
# Most handlers factorize common code from getipath() and
|
85 |
# Most handlers factorize common code from getipath() and
|
81 |
# getnext() in an extractone() method, but this is not part of the
|
86 |
# getnext() in an extractone() method, but this is not part of the
|
82 |
# interface.
|
87 |
# interface.
|
83 |
def extractone(self, lno):
|
88 |
def extractone(self, lno):
|