more filter cleanup: factorize code in the vanilla xslt ones, move a few more to python.

Jean-Francois Dockes Jean-Francois Dockes 2018-06-04

changed src/desktop/hotrecoll.py
changed src/filters/rclabw.py
changed src/filters/rcldvi
changed src/filters/rclsvg.py
changed src/filters/rclxml.py
changed src/filters/rclxslt.py
changed src/sampleconf/mimeconf
changed src/windows/mimeconf
changed src/Makefile.am
copied src/filters/rclfb2 -> src/filters/rclgnm.py
copied src/filters/rclgnm -> src/filters/rclfb2.py
copied src/filters/rclokulnote -> src/filters/rclokulnote.py
copied src/filters/rclsiduxman -> src/filters/rclgenxslt.py
src/desktop/hotrecoll.py Diff Switch to side-by-side view
Loading...
src/filters/rclabw.py Diff Switch to side-by-side view
Loading...
src/filters/rcldvi Diff Switch to side-by-side view
Loading...
src/filters/rclsvg.py Diff Switch to side-by-side view
Loading...
src/filters/rclxml.py Diff Switch to side-by-side view
Loading...
src/filters/rclxslt.py Diff Switch to side-by-side view
Loading...
src/sampleconf/mimeconf Diff Switch to side-by-side view
Loading...
src/windows/mimeconf Diff Switch to side-by-side view
Loading...
src/Makefile.am Diff Switch to side-by-side view
Loading...
src/filters/rclfb2 to src/filters/rclgnm.py
--- a/src/filters/rclfb2
+++ b/src/filters/rclgnm.py
@@ -1,139 +1,112 @@
-#!/bin/sh
-# @(#$Id: rclopxml,v 1.3 2008-10-08 08:27:34 dockes Exp $  (C) 2004 J.F.Dockes
-#================================================================
-# Extract text from an fb2 ebook (xml)
-#================================================================
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
 
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname=rclfb2
-filetype=fb2
+from __future__ import print_function
+
+import sys
+import rclexecm
+import rclgenxslt
 
 
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc
-
-xsltproc --nonet --novalid - "$infile" <<EOF
-<?xml version="1.0"?>
+stylesheet_all = '''<?xml version="1.0"?>
 <xsl:stylesheet version="1.0"
   xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-  xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0"
-  exclude-result-prefixes="fb"
+  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
+  xmlns:xlink="http://www.w3.org/1999/xlink" 
+  xmlns:dc="http://purl.org/dc/elements/1.1/" 
+  xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" 
+  xmlns:ooo="http://openoffice.org/2004/office"
+  xmlns:gnm="http://www.gnumeric.org/v10.dtd"
+
+  exclude-result-prefixes="office xlink meta ooo dc"
   >
 
 <xsl:output method="html" encoding="UTF-8"/>
 
-<xsl:template match="/fb:FictionBook">
- <html>
-  <xsl:apply-templates select="fb:description"/>
-  <xsl:apply-templates select="fb:body"/>
- </html>
+<xsl:template match="/">
+<html>
+  <head>
+   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
+   <xsl:apply-templates select="//office:document-meta/office:meta"/>
+  </head>
+
+  <body>
+    <xsl:apply-templates select="//gnm:Cells"/>
+    <xsl:apply-templates select="//gnm:Objects"/>
+  </body>
+</html>
 </xsl:template>
 
-<xsl:template match="fb:description">
-  <head>
-    <xsl:apply-templates select="fb:title-info"/>
-  </head><xsl:text>
-</xsl:text>
+<xsl:template match="//dc:date">
+   <meta>
+     <xsl:attribute name="name">date</xsl:attribute>
+     <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+   </meta>
 </xsl:template>
 
-<xsl:template match="fb:description/fb:title-info">
-    <xsl:apply-templates select="fb:book-title"/>
-    <xsl:apply-templates select="fb:author"/>
-</xsl:template>
-
-<xsl:template match="fb:description/fb:title-info/fb:book-title">
-<title> <xsl:value-of select="."/> </title>
-</xsl:template>
-
-<xsl:template match="fb:description/fb:title-info/fb:author">
+<xsl:template match="//dc:description">
   <meta>
-  <xsl:attribute name="name">author</xsl:attribute>
-  <xsl:attribute name="content">
-     <xsl:value-of select="fb:first-name"/><xsl:text> </xsl:text>
-     <xsl:value-of select="fb:middle-name"/><xsl:text> </xsl:text>
-     <xsl:value-of select="fb:last-name"/>
-  </xsl:attribute>
+    <xsl:attribute name="name">abstract</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
   </meta>
 </xsl:template>
 
-<xsl:template match="fb:body">
- <body>
- <xsl:apply-templates select="fb:section"/>
- </body>
+<xsl:template match="//meta:keyword">
+  <meta>
+    <xsl:attribute name="name">keywords</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
 </xsl:template>
 
-<xsl:template match="fb:body/fb:section">
-  <xsl:for-each select="fb:p">
+<xsl:template match="//dc:subject">
+  <meta>
+    <xsl:attribute name="name">keywords</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="//dc:title">
+  <title> <xsl:value-of select="."/> </title>
+</xsl:template>
+
+<xsl:template match="//meta:initial-creator">
+  <meta>
+    <xsl:attribute name="name">author</xsl:attribute>
+    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  </meta>
+</xsl:template>
+
+<xsl:template match="office:meta/*"/>
+
+<xsl:template match="gnm:Cell">
   <p><xsl:value-of select="."/></p>
-  </xsl:for-each>
+</xsl:template>
+
+<xsl:template match="gnm:CellComment">
+  <blockquote><xsl:value-of select="@Text"/></blockquote>
 </xsl:template>
 
 </xsl:stylesheet>
-EOF
+'''
+
+
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all, gzip=True)
+    rclexecm.main(proto, extract)
+
src/filters/rclgnm to src/filters/rclfb2.py
--- a/src/filters/rclgnm
+++ b/src/filters/rclfb2.py
@@ -1,191 +1,87 @@
-#!/bin/sh
-# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from a gnumeric spreadsheet
-#================================================================
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
 
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclgnumeric"
-filetype=gnumeric
+from __future__ import print_function
 
+import sys
+import rclexecm
+import rclxslt
+import rclgenxslt
 
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc gunzip
-
-# We need a temporary file
-if test z"$RECOLL_TMPDIR" != z; then
-   ttdir=$RECOLL_TMPDIR
-elif test z"$TMPDIR" != z ; then
-   ttdir=$TMPDIR
-else
-   ttdir=/tmp
-fi
-tmpfile=$ttdir/rclgnm.XXXXXX
-
-tmpfile=`mktemp "$tmpfile"`
-if [ $? -ne 0 ]; then
-   senderror "$0: Can't create temp file, exiting..."
-fi
-
-cleanup()
-{
-    rm -f $tmpfile
-}
-    
-trap cleanup EXIT HUP QUIT INT TERM
-
-gunzip < $1 > $tmpfile || senderror "Cant uncompress input"
-xsltproc --novalid --nonet - $tmpfile <<EOF
-<?xml version="1.0"?>
+stylesheet_all = '''<?xml version="1.0"?>
 <xsl:stylesheet version="1.0"
   xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-  xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" 
-  xmlns:xlink="http://www.w3.org/1999/xlink" 
-  xmlns:dc="http://purl.org/dc/elements/1.1/" 
-  xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" 
-  xmlns:ooo="http://openoffice.org/2004/office"
-  xmlns:gnm="http://www.gnumeric.org/v10.dtd"
-
-  exclude-result-prefixes="office xlink meta ooo dc"
+  xmlns:fb="http://www.gribuser.ru/xml/fictionbook/2.0"
+  exclude-result-prefixes="fb"
   >
 
 <xsl:output method="html" encoding="UTF-8"/>
 
-<xsl:template match="/">
-<html>
-  <head>
-   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
-   <xsl:apply-templates select="//office:document-meta/office:meta"/>
-  </head>
-
-  <body>
-    <xsl:apply-templates select="//gnm:Cells"/>
-    <xsl:apply-templates select="//gnm:Objects"/>
-  </body>
-</html>
+<xsl:template match="/fb:FictionBook">
+ <html>
+  <xsl:apply-templates select="fb:description"/>
+  <xsl:apply-templates select="fb:body"/>
+ </html>
 </xsl:template>
 
-<xsl:template match="//dc:date">
-   <meta>
-     <xsl:attribute name="name">date</xsl:attribute>
-     <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
-   </meta>
+<xsl:template match="fb:description">
+  <head>
+    <xsl:apply-templates select="fb:title-info"/>
+  </head><xsl:text>
+</xsl:text>
 </xsl:template>
 
-<xsl:template match="//dc:description">
+<xsl:template match="fb:description/fb:title-info">
+    <xsl:apply-templates select="fb:book-title"/>
+    <xsl:apply-templates select="fb:author"/>
+</xsl:template>
+
+<xsl:template match="fb:description/fb:title-info/fb:book-title">
+<title> <xsl:value-of select="."/> </title>
+</xsl:template>
+
+<xsl:template match="fb:description/fb:title-info/fb:author">
   <meta>
-    <xsl:attribute name="name">abstract</xsl:attribute>
-    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
+  <xsl:attribute name="name">author</xsl:attribute>
+  <xsl:attribute name="content">
+     <xsl:value-of select="fb:first-name"/><xsl:text> </xsl:text>
+     <xsl:value-of select="fb:middle-name"/><xsl:text> </xsl:text>
+     <xsl:value-of select="fb:last-name"/>
+  </xsl:attribute>
   </meta>
 </xsl:template>
 
-<xsl:template match="//meta:keyword">
-  <meta>
-    <xsl:attribute name="name">keywords</xsl:attribute>
-    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
-  </meta>
+<xsl:template match="fb:body">
+ <body>
+ <xsl:apply-templates select="fb:section"/>
+ </body>
 </xsl:template>
 
-<xsl:template match="//dc:subject">
-  <meta>
-    <xsl:attribute name="name">keywords</xsl:attribute>
-    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
-  </meta>
-</xsl:template>
-
-<xsl:template match="//dc:title">
-  <title> <xsl:value-of select="."/> </title>
-</xsl:template>
-
-<xsl:template match="//meta:initial-creator">
-  <meta>
-    <xsl:attribute name="name">author</xsl:attribute>
-    <xsl:attribute name="content"><xsl:value-of select="."/></xsl:attribute>
-  </meta>
-</xsl:template>
-
-<xsl:template match="office:meta/*"/>
-
-<xsl:template match="gnm:Cell">
+<xsl:template match="fb:body/fb:section">
+  <xsl:for-each select="fb:p">
   <p><xsl:value-of select="."/></p>
-</xsl:template>
-
-<xsl:template match="gnm:CellComment">
-  <blockquote><xsl:value-of select="@Text"/></blockquote>
+  </xsl:for-each>
 </xsl:template>
 
 </xsl:stylesheet>
-EOF
+'''
 
+if __name__ == '__main__':
+    proto = rclexecm.RclExecM()
+    extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
+    rclexecm.main(proto, extract)
src/filters/rclokulnote to src/filters/rclokulnote.py
--- a/src/filters/rclokulnote
+++ b/src/filters/rclokulnote.py
@@ -1,97 +1,32 @@
-#!/bin/sh
-# @(#$Id: rclsoff,v 1.12 2008-10-08 08:27:34 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Extract text from a gnumeric spreadsheet
-#================================================================
+#!/usr/bin/env python3
+# Copyright (C) 2014 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+from __future__ import print_function
 
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclgnumeric"
-filetype=gnumeric
+import sys
+import rclexecm
+import rclgenxslt
 
-
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
-
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
-
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
-
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds xsltproc 
-    
-xsltproc --novalid --nonet - "$infile" <<EOF
-<?xml version="1.0"?>
+stylesheet_all = '''<?xml version="1.0"?>
 <xsl:stylesheet version="1.0"
   xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 
 <xsl:output method="html" encoding="UTF-8"/>
 <xsl:strip-space elements="*" />
-
 
 <xsl:template match="/">
 <html>
@@ -126,5 +61,10 @@
 <xsl:template match="@*"/>
 
 </xsl:stylesheet>
-EOF
+'''
 
+if __name__ == '__main__':
+   proto = rclexecm.RclExecM()
+   extract = rclgenxslt.XSLTExtractor(proto, stylesheet_all)
+   rclexecm.main(proto, extract)
+
src/filters/rclsiduxman to src/filters/rclgenxslt.py
--- a/src/filters/rclsiduxman
+++ b/src/filters/rclgenxslt.py
@@ -1,92 +1,65 @@
-#!/bin/sh
-# @(#$Id: rclsiduxman,v 1.1 2008-06-09 09:12:05 dockes Exp $  (C) 2004 J.F.Dockes
-# Parts taken from Estraier:
-#================================================================
-# Estraier: a personal full-text search system
-# Copyright (C) 2003-2004 Mikio Hirabayashi
-#================================================================
-#================================================================
-# Strip the menu part from sidux manual pages to improve search precision
-#================================================================
+#!/usr/bin/env python3
+# Copyright (C) 2018 J.F.Dockes
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the
+#   Free Software Foundation, Inc.,
+#   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+######################################
+from __future__ import print_function
 
-# set variables
-LANG=C ; export LANG
-LC_ALL=C ; export LC_ALL
-progname="rclsiduxman"
-filetype="sidux manual htm"
+import sys
+import rclexecm
+import rclxslt
+import gzip
+
+class XSLTExtractor:
+    def __init__(self, em, stylesheet, gzip=False):
+        self.em = em
+        self.currentindex = 0
+        self.stylesheet = stylesheet
+        self.dogz = gzip
 
 
-#RECFILTCOMMONCODE
-##############################################################################
-# !! Leave the previous line unmodified!! Code imported from the
-# recfiltcommon file
+    def extractone(self, params):
+        if "filename:" not in params:
+            self.em.rclog("extractone: no mime or file name")
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        fn = params["filename:"]
+        try:
+            if self.dogz:
+                data = gzip.open(fn, 'rb').read()
+            else:
+                data = open(fn, 'rb').read()
+            docdata = rclxslt.apply_sheet_data(self.stylesheet, data)
+        except Exception as err:
+            self.em.rclog("%s: bad data: %s" % (fn, err))
+            return (False, "", "", rclexecm.RclExecM.eofnow)
 
-# Utility code common to all shell filters. This could be sourced at run
-# time, but it's slightly more efficient to include the code in the
-# filters at build time (with a sed script).
+        return (True, docdata, "", rclexecm.RclExecM.eofnext)
+    
 
-# Describe error in a way that can be interpreted by our caller
-senderror()
-{
-    echo RECFILTERROR $*
-    # Also alert on stderr just in case
-    echo ":2:$progname::: $*" 1>&2
-    exit 1
-}
+    ###### File type handler api, used by rclexecm ---------->
+    def openfile(self, params):
+        self.currentindex = 0
+        return True
 
-iscmd()
-{
-    cmd=$1
-    case $cmd in
-    */*)
-	if test -x $cmd -a ! -d $cmd ; then return 0; else return 1; fi ;;
-    *)
-      oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs
-      for d in $*;do test -x $d/$cmd -a ! -d $d/$cmd && return 0;done
-      return 1 ;;
-    esac
-}
-
-checkcmds()
-{
-    for cmd in $*;do
-      if iscmd $cmd 
-      then 
-        a=1
-      else 
-        senderror HELPERNOTFOUND $cmd
-      fi
-    done
-}
-
-# show help message
-if test $# -ne 1 -o "$1" = "--help" 
-then
-  echo "Convert a $filetype file to HTML text for Recoll indexing."
-  echo "Usage: $progname [infile]"
-  exit 1
-fi
-
-infile="$1"
-
-# check the input file existence (may be '-' for stdin)
-if test "X$infile" != X- -a ! -f "$infile"
-then
-  senderror INPUTNOSUCHFILE "$infile"
-fi
-
-# protect access to our temp files and directories
-umask 77
-
-##############################################################################
-# !! Leave the following line unmodified !
-#ENDRECFILTCOMMONCODE
-
-checkcmds sed
-# Delete everything from <div id="menu"> to <div id="main-page">
-# This prints an additional blank line at top which does not matter
-sed -n -e '1,/<div id="menu">/{x;p' -e '}' \
-    -e '/<div id="main-page">/,$p' < "$infile"
-
-# exit normally
-exit 0
+    def getipath(self, params):
+        return self.extractone(params)
+        
+    def getnext(self, params):
+        if self.currentindex >= 1:
+            return (False, "", "", rclexecm.RclExecM.eofnow)
+        else:
+            ret= self.extractone(params)
+            self.currentindex += 1
+            return ret