recoll / Code / Diff of /src/filters/rclpdf.py

Diff of /src/filters/rclpdf.py [41eb89] .. [123d5b]

Switch to side-by-side view

--- a/src/filters/rclpdf.py
+++ b/src/filters/rclpdf.py
@@ -400,23 +400,20 @@
 
         emf = EMF.MetaFixer() if EMF else None
 
+        # Execute pdfinfo and extract the XML packet
         all = subprocess.check_output([self.pdfinfo, "-meta", self.filename])
-
-        # Extract the XML packet
         res = self.re_xmlpacket.search(all)
-        xml = ''
-        if res:
-            xml = res.group(1)
+        xml = res.group(1) if res else ''
         #self.em.rclog("extrameta: XML: [%s]" % xml)
         if not xml:
             return html
 
+        # Process the XML data
+        root = ET.fromstring(xml)
+        # Sometimes the root tag is <x:xmpmeta>, sometimes <rdf:RDF>
         # The namespace thing is a drag. Can't do it from the top. See
         # the stackoverflow ref above. Maybe we'd be better off just
         # walking the full tree and building the namespaces dict.
-        root = ET.fromstring(xml)
-
-        # Sometimes the root tag is <x:xmpmeta>, sometimes <rdf:RDF>
         if root.tag.endswith('RDF'):
             rdf = root
         else:
@@ -441,13 +438,21 @@
                 if elt is not None:
                     text = self._xmltreetext(elt).encode('UTF-8')
                     if emf:
-                        text = emf.metafix(metanm, text)
+                        try:
+                            text = emf.metafix(metanm, text)
+                        except:
+                            pass
                     # Should we set empty values ?
                     if text:
                         # Can't use setfield as it only works for
                         # text/plain output at the moment.
                         metaheaders.append((rclnm, text))
         if metaheaders:
+            if emf:
+                try:
+                    emf.wrapup(metaheaders)
+                except:
+                    pass
             return self._injectmeta(html, metaheaders)
         else:
             return html