a/src/filters/rclxls.py b/src/filters/rclxls.py
...
...
13
import xml.sax
13
import xml.sax
14
14
15
class XLSProcessData:
15
class XLSProcessData:
16
    def __init__(self, em, ishtml = False):
16
    def __init__(self, em, ishtml = False):
17
        self.em = em
17
        self.em = em
18
        self.out = ""
18
        self.out = b""
19
        self.gotdata = 0
19
        self.gotdata = 0
20
        self.xmldata = ""
20
        self.xmldata = b""
21
        self.ishtml = ishtml
21
        self.ishtml = ishtml
22
        
22
        
23
    def takeLine(self, line):
23
    def takeLine(self, line):
24
        if self.ishtml:
24
        if self.ishtml:
25
            self.out += line + "\n"
25
            self.out += line + "\n"
26
            return
26
            return
27
        if not self.gotdata:
27
        if not self.gotdata:
28
            self.out += '''<html><head>''' + \
28
            self.out += b'''<html><head>''' + \
29
                        '''<meta http-equiv="Content-Type" ''' + \
29
                        b'''<meta http-equiv="Content-Type" ''' + \
30
                        '''content="text/html;charset=UTF-8">''' + \
30
                        b'''content="text/html;charset=UTF-8">''' + \
31
                        '''</head><body><pre>'''
31
                        b'''</head><body><pre>'''
32
            self.gotdata = True
32
            self.gotdata = True
33
        self.xmldata += line
33
        self.xmldata += line
34
34
35
    def wrapData(self):
35
    def wrapData(self):
36
        if self.ishtml:
36
        if self.ishtml:
37
            return self.out
37
            return self.out
38
        handler =  xlsxmltocsv.XlsXmlHandler()
38
        handler =  xlsxmltocsv.XlsXmlHandler()
39
        data = xml.sax.parseString(self.xmldata, handler)
39
        xml.sax.parseString(self.xmldata, handler)
40
        self.out += self.em.htmlescape(handler.output)
40
        self.out += self.em.htmlescape(handler.output)
41
        return self.out + '''</pre></body></html>'''
41
        return self.out + b'''</pre></body></html>'''
42
42
43
class XLSFilter:
43
class XLSFilter:
44
    def __init__(self, em):
44
    def __init__(self, em):
45
        self.em = em
45
        self.em = em
46
        self.ntry = 0
46
        self.ntry = 0
...
...
54
            return ([], None)
54
            return ([], None)
55
        self.ntry = 1
55
        self.ntry = 1
56
        # Some HTML files masquerade as XLS
56
        # Some HTML files masquerade as XLS
57
        try:
57
        try:
58
            data = open(fn, 'rb').read(512)
58
            data = open(fn, 'rb').read(512)
59
            if data.find('html') != -1 or data.find('HTML') != -1:
59
            if data.find(b'html') != -1 or data.find(b'HTML') != -1:
60
                return ("cat", XLSProcessData(self.em, True))
60
                return ("cat", XLSProcessData(self.em, True))
61
        except Exception as err:
61
        except Exception as err:
62
            self.em.rclog("Error reading %s:%s" % (fn, str(err)))
62
            self.em.rclog("Error reading %s:%s" % (fn, str(err)))
63
            pass
63
            pass
64
        cmd = rclexecm.which("xls-dump.py")
64
        cmd = rclexecm.which("xls-dump.py")