recoll / Code / Diff of /src/filters/rcltxtlines.py

Diff of /src/filters/rcltxtlines.py [b2bd67] .. [236900]

Switch to unified view


...
    def __init__(self, em):
        # Store a ref to our execm object so that we can use its services.
        self.em = em

    # This is called once for every processed file during indexing, or
    # query preview. For multi-document files, it usually creates some
    # kind of table of contents, and resets the current index in it,
    # because we don't know at this point if this is for indexing
    # (will walk all entries) or previewing (will request
    # one). Actually we could know from the environment but it's just
    # simpler this way in general. Note that there is no close call,
    # openfile() will just be called repeatedly during indexing, and
    # should clear any existing state
    def openfile(self, params):
        """Open the text file, create a contents array"""
        self.currentindex = -1
        try:
            f = open(params["filename:"].decode('UTF-8'), "r")
...
            self.em.rclog("openfile: open failed: [%s]" % err)
            return False
        self.lines = f.readlines()
        return True








    # This is called during indexing to walk the contents. The first
    # time, we return a 'self' document, which may be empty (e.g. for
    # a tar file), or might contain data (e.g. for an email body,
    # further docs being the attachments), and may also be the only
    # document returned (for single document files).
    def getnext(self, params):

        # Self doc. Here empty.
        #
        # This could also be the only entry if this file type holds a
        # single document. We return eofnext in this case
        #
        # !Note that the self doc has an *empty* ipath
        if self.currentindex == -1:
            self.currentindex = 0
            if len(self.lines) == 0:
                eof = rclexecm.RclExecM.eofnext
...
            return (False, "", "", rclexecm.RclExecM.eofnow)
        else:
            ret= self.extractone(self.currentindex)
            self.currentindex += 1
            return ret

    # This is called for query preview to request one specific (or the
    # only) entry. Here our internal paths are stringified line
    # numbers, but they could be tar archive paths or whatever we
    # returned during indexing.
    def getipath(self, params):
        return self.extractone(int(params["ipath:"]))

    # Most handlers factorize common code from getipath() and
    # getnext() in an extractone() method, but this is not part of the
    # interface.
    def extractone(self, lno):

	a/src/filters/rcltxtlines.py		b/src/filters/rcltxtlines.py
	...		...
25	def __init__(self, em):	25	def __init__(self, em):
26	# Store a ref to our execm object so that we can use its services.	26	# Store a ref to our execm object so that we can use its services.
27	self.em = em	27	self.em = em
28		28
29	# This is called once for every processed file during indexing, or	29	# This is called once for every processed file during indexing, or
30	# query preview. It usually creates some kind of table of	30	# query preview. For multi-document files, it usually creates some
31	# contents, and resets the current index in it, because we don't	31	# kind of table of contents, and resets the current index in it,
32	# know at this point if this is for indexing (will walk all	32	# because we don't know at this point if this is for indexing
33	# entries) or previewing (will request one). Actually we could	33	# (will walk all entries) or previewing (will request
34	# know from the environment but it's just simpler this way in	34	# one). Actually we could know from the environment but it's just
35	# general. Note that there is no close call, openfile() will just	35	# simpler this way in general. Note that there is no close call,
36	# be called repeatedly during indexing, and should clear any	36	# openfile() will just be called repeatedly during indexing, and
37	# existing state	37	# should clear any existing state
38	def openfile(self, params):	38	def openfile(self, params):
39	"""Open the text file, create a contents array"""	39	"""Open the text file, create a contents array"""
40	self.currentindex = -1	40	self.currentindex = -1
41	try:	41	try:
42	f = open(params["filename:"].decode('UTF-8'), "r")	42	f = open(params["filename:"].decode('UTF-8'), "r")
	...		...
44	self.em.rclog("openfile: open failed: [%s]" % err)	44	self.em.rclog("openfile: open failed: [%s]" % err)
45	return False	45	return False
46	self.lines = f.readlines()	46	self.lines = f.readlines()
47	return True	47	return True
48		48
49	# This is called for query preview to request one specific
50	# entry. Here our internal paths are stringified line numbers, but
51	# they could be tar archive paths or whatever we returned during
52	# indexing.
53	def getipath(self, params):
54	return self.extractone(int(params["ipath:"]))
55
56	# This is called during indexing to walk the contents. The first	49	# This is called during indexing to walk the contents. The first
57	# time, we return a 'self' document, which may be empty (e.g. for	50	# time, we return a 'self' document, which may be empty (e.g. for
58	# a tar file), or might contain data (e.g. for an email body,	51	# a tar file), or might contain data (e.g. for an email body,
59	# further docs being the attachments).	52	# further docs being the attachments), and may also be the only
		53	# document returned (for single document files).
60	def getnext(self, params):	54	def getnext(self, params):
61		55
62	# Self doc. Here empty.	56	# Self doc. Here empty.
		57	#
		58	# This could also be the only entry if this file type holds a
		59	# single document. We return eofnext in this case
		60	#
63	# !Note that the self doc has an empty ipath	61	# !Note that the self doc has an empty ipath
64	if self.currentindex == -1:	62	if self.currentindex == -1:
65	self.currentindex = 0	63	self.currentindex = 0
66	if len(self.lines) == 0:	64	if len(self.lines) == 0:
67	eof = rclexecm.RclExecM.eofnext	65	eof = rclexecm.RclExecM.eofnext
	...		...
74	return (False, "", "", rclexecm.RclExecM.eofnow)	72	return (False, "", "", rclexecm.RclExecM.eofnow)
75	else:	73	else:
76	ret= self.extractone(self.currentindex)	74	ret= self.extractone(self.currentindex)
77	self.currentindex += 1	75	self.currentindex += 1
78	return ret	76	return ret
		77
		78	# This is called for query preview to request one specific (or the
		79	# only) entry. Here our internal paths are stringified line
		80	# numbers, but they could be tar archive paths or whatever we
		81	# returned during indexing.
		82	def getipath(self, params):
		83	return self.extractone(int(params["ipath:"]))
79		84
80	# Most handlers factorize common code from getipath() and	85	# Most handlers factorize common code from getipath() and
81	# getnext() in an extractone() method, but this is not part of the	86	# getnext() in an extractone() method, but this is not part of the
82	# interface.	87	# interface.
83	def extractone(self, lno):	88	def extractone(self, lno):