|
a/src/filters/rclxls.py |
|
b/src/filters/rclxls.py |
|
... |
|
... |
13 |
import xml.sax
|
13 |
import xml.sax
|
14 |
|
14 |
|
15 |
class XLSProcessData:
|
15 |
class XLSProcessData:
|
16 |
def __init__(self, em, ishtml = False):
|
16 |
def __init__(self, em, ishtml = False):
|
17 |
self.em = em
|
17 |
self.em = em
|
18 |
self.out = ""
|
18 |
self.out = b""
|
19 |
self.gotdata = 0
|
19 |
self.gotdata = 0
|
20 |
self.xmldata = ""
|
20 |
self.xmldata = b""
|
21 |
self.ishtml = ishtml
|
21 |
self.ishtml = ishtml
|
22 |
|
22 |
|
23 |
def takeLine(self, line):
|
23 |
def takeLine(self, line):
|
24 |
if self.ishtml:
|
24 |
if self.ishtml:
|
25 |
self.out += line + "\n"
|
25 |
self.out += line + "\n"
|
26 |
return
|
26 |
return
|
27 |
if not self.gotdata:
|
27 |
if not self.gotdata:
|
28 |
self.out += '''<html><head>''' + \
|
28 |
self.out += b'''<html><head>''' + \
|
29 |
'''<meta http-equiv="Content-Type" ''' + \
|
29 |
b'''<meta http-equiv="Content-Type" ''' + \
|
30 |
'''content="text/html;charset=UTF-8">''' + \
|
30 |
b'''content="text/html;charset=UTF-8">''' + \
|
31 |
'''</head><body><pre>'''
|
31 |
b'''</head><body><pre>'''
|
32 |
self.gotdata = True
|
32 |
self.gotdata = True
|
33 |
self.xmldata += line
|
33 |
self.xmldata += line
|
34 |
|
34 |
|
35 |
def wrapData(self):
|
35 |
def wrapData(self):
|
36 |
if self.ishtml:
|
36 |
if self.ishtml:
|
37 |
return self.out
|
37 |
return self.out
|
38 |
handler = xlsxmltocsv.XlsXmlHandler()
|
38 |
handler = xlsxmltocsv.XlsXmlHandler()
|
39 |
data = xml.sax.parseString(self.xmldata, handler)
|
39 |
xml.sax.parseString(self.xmldata, handler)
|
40 |
self.out += self.em.htmlescape(handler.output)
|
40 |
self.out += self.em.htmlescape(handler.output)
|
41 |
return self.out + '''</pre></body></html>'''
|
41 |
return self.out + b'''</pre></body></html>'''
|
42 |
|
42 |
|
43 |
class XLSFilter:
|
43 |
class XLSFilter:
|
44 |
def __init__(self, em):
|
44 |
def __init__(self, em):
|
45 |
self.em = em
|
45 |
self.em = em
|
46 |
self.ntry = 0
|
46 |
self.ntry = 0
|
|
... |
|
... |
54 |
return ([], None)
|
54 |
return ([], None)
|
55 |
self.ntry = 1
|
55 |
self.ntry = 1
|
56 |
# Some HTML files masquerade as XLS
|
56 |
# Some HTML files masquerade as XLS
|
57 |
try:
|
57 |
try:
|
58 |
data = open(fn, 'rb').read(512)
|
58 |
data = open(fn, 'rb').read(512)
|
59 |
if data.find('html') != -1 or data.find('HTML') != -1:
|
59 |
if data.find(b'html') != -1 or data.find(b'HTML') != -1:
|
60 |
return ("cat", XLSProcessData(self.em, True))
|
60 |
return ("cat", XLSProcessData(self.em, True))
|
61 |
except Exception as err:
|
61 |
except Exception as err:
|
62 |
self.em.rclog("Error reading %s:%s" % (fn, str(err)))
|
62 |
self.em.rclog("Error reading %s:%s" % (fn, str(err)))
|
63 |
pass
|
63 |
pass
|
64 |
cmd = rclexecm.which("xls-dump.py")
|
64 |
cmd = rclexecm.which("xls-dump.py")
|