removed | website/filters/rclimg |
removed | website/filters/rclkwd |
removed | website/filters/rcllyx |
removed | website/filters/rclopxml |
removed | website/filters/rclscribus |
removed | website/filters/rclsoff |
removed | website/filters/rclsvg |
removed | website/filters/rcltex |
removed | website/filters/rclwpd |
changed | website/filters/filters.html |
changed | website/filters/mimeconf |
changed | website/filters/mimemap |
changed | website/filters/mimeview |
copied | website/filters/rclabw -> website/filters/rclics |
website/filters/rclimg
File was removed.
website/filters/rclkwd
File was removed.
website/filters/rcllyx
File was removed.
website/filters/rclopxml
File was removed.
website/filters/rclscribus
File was removed.
website/filters/rclsoff
File was removed.
website/filters/rclsvg
File was removed.
website/filters/rcltex
File was removed.
website/filters/rclwpd
File was removed.
website/filters/rclabw to website/filters/rclics
--- a/website/filters/rclabw +++ b/website/filters/rclics @@ -1,175 +1,180 @@ -#!/bin/sh -# @(#$Id: rclabw,v 1.2 2007/06/15 11:41:50 dockes Exp $ (C) 2004 J.F.Dockes -# Parts taken from Estraier: -#================================================================ -# Estraier: a personal full-text search system -# Copyright (C) 2003-2004 Mikio Hirabayashi -#================================================================ -#================================================================ -# Extract text from an abiword file -#================================================================ +#!/usr/bin/env python -# set variables -LANG=C ; export LANG -LC_ALL=C ; export LC_ALL -progname="rclabw" -filetype=abiword +# Read an ICS file, break it into "documents" which are events, todos, +# or journal entries, and interface with recoll execm +# +# For historical reasons, this can use either the icalendar or the +# vobject Python modules, or an internal splitter. The default is now +# to use the internal splitter, the other modules are more trouble +# than they're worth (to us and until we will want to get into date +# computations etc.) -#RECFILTCOMMONCODE -############################################################################## -# !! Leave the previous line unmodified!! Code imported from the -# recfiltcommon file +import rclexecm +import sys -# Utility code common to all shell filters. This could be sourced at run -# time, but it's slightly more efficient to include the code in the -# filters at build time (with a sed script). - -# Describe error in a way that can be interpreted by our caller -senderror() -{ - echo RECFILTERROR $* - # Also alert on stderr just in case - echo ":2:$progname::: $*" 1>&2 - exit 1 -} - -iscmd() -{ - cmd=$1 - case $cmd in - */*) - if test -x $cmd ; then return 0; else return 1; fi ;; - *) - oldifs=$IFS; IFS=":"; set -- $PATH; IFS=$oldifs - for d in $*;do test -x $d/$cmd && return 0;done - return 1 ;; - esac -} - -checkcmds() -{ - for cmd in $*;do - if iscmd $cmd - then - a=1 - else - senderror HELPERNOTFOUND $cmd - fi - done -} - -# show help message -if test $# -ne 1 -o "$1" = "--help" -then - echo "Convert a $filetype file to HTML text for Recoll indexing." - echo "Usage: $progname [infile]" - exit 1 -fi - -infile="$1" - -# check the input file existence (may be '-' for stdin) -if test "X$infile" != X- -a ! -f "$infile" -then - senderror INPUTNOSUCHFILE "$infile" -fi - -# protect access to our temp files and directories -umask 77 - -############################################################################## -# !! Leave the following line unmodified ! -#ENDRECFILTCOMMONCODE - -checkcmds iconv sed - -# check the input file existence -if test ! -f "$infile" -then - printf '%s: %s: no such file\n' "$progname" "$infile" - exit 1 -fi - -encoding=`sed -e '/<?xml version=/s/"?>$//' \ - -e '/^<?xml version=/s/.*encoding="//p;D;q' \ - -e D \ -< $infile` -if test X$encoding = X ; then encoding=UTF-8;fi - -# Note: there can be newlines inside the description field, we don't want -# them... Have 2 use 2 different selectors for the single-line and -# multiple-line cases because of the generic tag end (</m> for all meta -# tags) -descsedprog=' -/<m key="dc.description">\([^<]*\)<\/m>/ { -s//\1/ -p -q -} -/<m key="dc.description">/,/<\/m>/ { -s!.*<m key="dc.description">!! -s!</m>.*!! -H -} -${ -g -s/\n/ /g -p -} -' - -description=`sed -n -e "$descsedprog" < "$infile"` -#echo description: "$description" - -# Set program for the single line meta elements. Takes element name as -# parameter -setmetasedprog() { -metasedprog='/<m key="'$1'">/{ -s/.*<m key="'$1'">\([^<]*\).*/\1/ -'"s/\"/'/g"' -p -}' -} - -setmetasedprog dc.subject -subject=`sed -n -e "$metasedprog" "$infile"` -#echo subject: "$subject" - -setmetasedprog dc.title -title=`sed -n -e "$metasedprog" "$infile"` -#echo titre: "$title" - -setmetasedprog abiword.keywords -keywords=`sed -n -e "$metasedprog" "$infile"` -#echo keywords: "$keywords" - -setmetasedprog dc.creator -creator=`sed -n -e "$metasedprog" "$infile"` -#echo creator: "$creator" - -# Note: next expr supposes that paragraphs are always all by themselves on -# a single line in the xml (no multiple <p> per line, no embedded newlines -# in text). -contentsedprog=' -/<p[ >]/{ -s/<[^>]*>/ /g -p -} -' -content=`sed -n -e "$contentsedprog" "$infile"` -#echo content: "$content" - -# output the result -(echo '<html><head><title>' "$title" '</title>' -echo '<meta http-equiv="Content-Type" content="text/html;charset=UTF-8">' -echo '<meta name="description" content="' "$description $subject" '">' -echo '<meta name="keywords" content="' "$keywords" '">' -echo '<meta name="author" content="' "$creator" '">' -echo '</head><body><pre>' -echo "$content" -echo '</pre></body></html>') \ -| iconv -f $encoding -t UTF-8 -c -s +# Decide how we'll process the file. +modules = ('internal', 'icalendar', 'vobject') +usemodule = 'internal' +forcevobject = 0 +if usemodule != 'internal': + try: + if forcevobject: + raise Exception + from icalendar import Calendar, Event + usemodule = 'icalendar' + except: + try: + import vobject + usemodule = 'vobject' + except: + print "RECFILTERROR HELPERNOTFOUND python:icalendar" + print "RECFILTERROR HELPERNOTFOUND python:vobject" + sys.exit(1); -# exit normally -exit 0 +class IcalExtractor: + def __init__(self, em): + self.file = "" + self.contents = [] + self.em = em + self.em.setmimetype("text/plain") + + def extractone(self, index): + if index >= len(self.contents): + return(False, "", "", True) + docdata = self.contents[index] + #self.em.rclog(docdata) + + iseof = rclexecm.RclExecM.noteof + if self.currentindex >= len(self.contents) -1: + iseof = rclexecm.RclExecM.eofnext + return (True, docdata, str(index), iseof) + + ###### File type handler api, used by rclexecm ----------> + def openfile(self, params): + self.file = params["filename:"] + + try: + calstr = open(self.file, 'rb') + except Exception, e: + self.em.rclog("Openfile: open: %s" % str(e)) + return False + + self.currentindex = 0 + + if usemodule == 'internal': + self.contents = ICalSimpleSplitter().splitcalendar(calstr) + elif usemodule == 'icalendar': + try: + cal = Calendar.from_string(calstr.read()) + except Exception, e: + self.em.rclog("Openfile: read or parse error: %s" % str(e)) + return False + self.contents = cal.walk() + self.contents = [item.as_string() for item in self.contents + if (item.name == 'VEVENT' or item.name == 'VTODO' + or item.name == 'VJOURNAL')] + else: + try: + cal = vobject.readOne(calstr) + except Exception, e: + self.em.rclog("Openfile: cant parse object: %s" % str(e)) + return False + for lstnm in ('vevent_list', 'vtodo_list', 'vjournal_list'): + lst = getattr(cal, lstnm, []) + for ev in lst: + self.contents.append(ev.serialize()) + + #self.em.rclog("openfile: Entry count: %d"%(len(self.contents))) + return True + + def getipath(self, params): + try: + index = int(params["ipath:"]) + except: + return False + return self.extractone(index) + + def getnext(self, params): + if self.currentindex >= len(self.contents): + self.em.rclog("getnext: EOF hit") + return (False, "", "", rclexecm.RclExecM.eofnow) + else: + ret= self.extractone(self.currentindex) + self.currentindex += 1 + return ret + +# Trivial splitter: cut objects on BEGIN/END (only for 'interesting' objects) +# ignore all other syntax +class ICalSimpleSplitter: + # Note that if an 'interesting' element is nested inside another one, + # it will not be extracted (stay as text in external event). This is + # not an issue and I don't think it can happen with the current list + interesting = ('VTODO', 'VEVENT', 'VJOURNAL') + + def splitcalendar(self, fin): + curblkname = '' + curblk = '' + + lo = [] + for line in fin: + line = line.rstrip() + if line == '': + continue + + if curblkname: + curblk = curblk + line + "\n" + + l = line.split(":") + if len(l) < 2: + continue + + # If not currently inside a block and we see an + # 'interesting' BEGIN, start block + if curblkname == '' and l[0].upper() == "BEGIN" : + name = l[1].upper() + if name in ICalSimpleSplitter.interesting: + curblkname = name + curblk = curblk + line + "\n" + + # If currently accumulating block lines, check for end + if curblkname and l[0].upper() == "END" and \ + l[1].upper() == curblkname: + lo.append(curblk) + curblkname = '' + curblk = '' + + if curblk: + lo.append(curblk) + curblkname = '' + curblk = '' + + return lo + + +##### Main program: either talk to the parent or execute test loop + +e = rclexecm.RclExecM() +ical = IcalExtractor(e) + +if len(sys.argv) == 1: + e.mainloop(ical) +else: + # Got a file name parameter: testing without an execm parent + # Loop on all entries + if not ical.openfile({'filename:':sys.argv[1]}): + print "Open error" + sys.exit(1) + + ecnt = 0 + while 1: + ok, data, ipath, eof = ical.getnext("") + if ok: + ecnt = ecnt + 1 + print "=========== ENTRY %d =================" % ecnt + print data + print + else: + print "Got error, eof %d"%eof + break +