Parent:
[683a25]
(diff)
Download this file
rcltar
91 lines (76 with data), 2.8 kB
#!/usr/bin/env python2
# Tar-file filter for Recoll
# Thanks to Recoll user Martin Ziegler
# This is a modified version of /usr/share/recoll/filters/rclzip
# It works not only for tar-files, but automatically for gzipped and
# bzipped tar-files at well.
from __future__ import print_function
import rclexecm
try:
import tarfile
except:
print("RECFILTERROR HELPERNOTFOUND python:tarfile")
sys.exit(1);
class TarExtractor:
def __init__(self, em):
self.currentindex = 0
self.em = em
self.namen = []
def extractone(self, ipath):
docdata = b''
try:
info = self.tar.getmember(ipath)
if info.size > self.em.maxmembersize:
# skip
docdata = b''
self.em.rclog("extractone: entry %s size %d too big" %
(ipath, info.size))
docdata = b'' # raise TarError("Member too big")
else:
docdata = self.tar.extractfile(ipath).read()
ok = True
except Exception as err:
ok = False
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.namen) -1:
iseof = rclexecm.RclExecM.eofnext
return (ok, docdata, rclexecm.makebytes(ipath), iseof)
def openfile(self, params):
self.currentindex = -1
try:
self.tar = tarfile.open(name=params["filename:"], mode='r')
#self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
self.namen = [ y.name for y in [z for z in self.tar.getmembers() if z.isfile()]]
return True
except:
return False
def getipath(self, params):
ipath = params["ipath:"]
ok, data, ipath, eof = self.extractone(ipath)
if ok:
return (ok, data, ipath, eof)
try:
ipath = ipath.decode("utf-8")
return self.extractone(ipath)
except Exception as err:
return (ok, data, ipath, eof)
def getnext(self, params):
if self.currentindex == -1:
# Return "self" doc
self.currentindex = 0
self.em.setmimetype('text/plain')
if len(self.namen) == 0:
eof = rclexecm.RclExecM.eofnext
else:
eof = rclexecm.RclExecM.noteof
return (True, "", "", eof)
if self.currentindex >= len(self.namen):
self.namen=[]
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(self.namen[self.currentindex])
self.currentindex += 1
return ret
proto = rclexecm.RclExecM()
extract = TarExtractor(proto)
rclexecm.main(proto, extract)