Parent: [683a25] (diff)

Download this file

rcltar    91 lines (76 with data), 2.8 kB

#!/usr/bin/env python2

# Tar-file filter for Recoll
# Thanks to Recoll user Martin Ziegler
# This is a modified version of /usr/share/recoll/filters/rclzip
# It works not only for tar-files, but automatically for gzipped and
# bzipped tar-files at well.

from __future__ import print_function

import rclexecm

try:
    import tarfile
except:
    print("RECFILTERROR HELPERNOTFOUND python:tarfile")
    sys.exit(1);

class TarExtractor:
    def __init__(self, em):
        self.currentindex = 0
        self.em = em
        self.namen = []

    def extractone(self, ipath):
        docdata = b''
        try:
            info = self.tar.getmember(ipath)
            if info.size > self.em.maxmembersize:
                # skip
                docdata = b''
                self.em.rclog("extractone: entry %s size %d too big" %
                              (ipath, info.size))
                docdata = b'' # raise TarError("Member too big")
            else:
                docdata = self.tar.extractfile(ipath).read()
            ok = True
        except Exception as err:
            ok = False
        iseof = rclexecm.RclExecM.noteof
        if self.currentindex >= len(self.namen) -1:
            iseof = rclexecm.RclExecM.eofnext
        return (ok, docdata, rclexecm.makebytes(ipath), iseof)

    def openfile(self, params):
        self.currentindex = -1
        try:
            self.tar = tarfile.open(name=params["filename:"], mode='r')
            #self.namen = [ y.name for y in filter(lambda z:z.isfile(),self.tar.getmembers())]
            self.namen = [ y.name for y in [z for z in self.tar.getmembers() if z.isfile()]]

            return True
        except:
            return False

    def getipath(self, params):
        ipath = params["ipath:"]
        ok, data, ipath, eof = self.extractone(ipath)
        if ok:
            return (ok, data, ipath, eof)
        try:
            ipath = ipath.decode("utf-8")
            return self.extractone(ipath)
        except Exception as err:
            return (ok, data, ipath, eof)

    def getnext(self, params):

        if self.currentindex == -1:
            # Return "self" doc
            self.currentindex = 0
            self.em.setmimetype('text/plain')
            if len(self.namen) == 0:
                eof = rclexecm.RclExecM.eofnext
            else:
                eof = rclexecm.RclExecM.noteof
            return (True, "", "", eof)

        if self.currentindex >= len(self.namen):
            self.namen=[]
            return (False, "", "", rclexecm.RclExecM.eofnow)
        else:
            ret= self.extractone(self.namen[self.currentindex])
            self.currentindex += 1
            return ret


proto = rclexecm.RclExecM()
extract = TarExtractor(proto)
rclexecm.main(proto, extract)