Parent:
[b321b0]
(diff)
Child:
[5add2e]
(diff)
Download this file
rcltar
78 lines (67 with data), 2.4 kB
#!/usr/bin/env python
# Tar-file filter for Recoll
# Thanks to Recoll user Martin Ziegler
# This is a modified version of /usr/share/recoll/filters/rclzip
# It works not only for tar-files, but automatically for gzipped and
# bzipped tar-files at well.
import rclexecm
try:
from tarfile import TarFile, open
except:
print "RECFILTERROR HELPERNOTFOUND python:tarfile"
sys.exit(1);
class TarExtractor:
def __init__(self, em):
self.currentindex = 0
self.em = em
self.namen = []
def extractone(self, ipath):
docdata = ""
try:
info = self.tar.getmember(ipath)
if info.size > self.em.maxmembersize:
# skip
docdata = ""
self.em.rclog("extractone: entry %s size %d too big" %
(ipath, info.size))
docdata = "" # raise TarError("Member too big")
else:
docdata = self.tar.extractfile(ipath).read()
ok = True
except Exception, err:
ok = False
iseof = rclexecm.RclExecM.noteof
if self.currentindex >= len(self.namen) -1:
iseof = rclexecm.RclExecM.eofnext
if isinstance(ipath, unicode):
ipath = ipath.encode("utf-8")
return (ok, docdata, ipath, iseof)
def openfile(self, params):
self.currentindex = 0
try:
self.tar = open(name=params["filename:"],mode='r')
self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
return True
except:
return False
def getipath(self, params):
ipath = params["ipath:"]
ok, data, ipath, eof = self.extractone(ipath)
if ok:
return (ok, data, ipath, eof)
try:
ipath = ipath.decode("utf-8")
return self.extractone(ipath)
except Exception, err:
return (ok, data, ipath, eof)
def getnext(self, params):
if self.currentindex >= len(self.namen):
self.namen=[]
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(self.namen[self.currentindex])
self.currentindex += 1
return ret
proto = rclexecm.RclExecM()
extract = TarExtractor(proto)
rclexecm.main(proto, extract)