rclimg
188 lines (171 with data), 5.5 kB
#!/usr/bin/env perl -w
# @(#$Id: rclimg,v 1.5 2008-10-09 06:41:21 dockes Exp $ (C) 2007 Cedric Scott
#######################################################
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
######################################################
#
# Extract image tags with exiftool and convert the data to html for
# recoll indexing.
#
#
# maps image file tags to xapian tags
#
$tagMap = {
'subject' => 'subject',
'title' => 'title',
'headline' => 'title',
'caption' => 'caption',
'caption-abstract' => 'caption',
'author' => 'author',
'creator' => 'creator',
'from' => 'from',
'keywords' => 'keywords',
'keyword' => 'keyword',
'tag' => 'tag',
};
# set to non-zero if tags which map to xapian tags are to output
# in the body as well as the header
#
$headAndBody = 1;
# xapianTag
# returns a xapian tag to be used for this tag
#
sub xapianTag {
my $imgtag = shift;
while ( ( $tagre, $xapiantag) = each %{$tagMap} ) {
return $xapiantag if $imgtag =~ /^$tagre$/i;
}
return undef;
}
sub imgTagsToHtml {
my $imageFile = shift;
my $output = "";
$imageFile = '-' if $imageFile eq '';
unless ( open(IMGF, $imageFile) ) {
print STDERR "$0: can't open file $imageFile\n";
return $output; # file doesn't exist or can't be read
}
$info = ImageInfo(\*IMGF);
return $output unless $info;
$fields = [];
$other = [];
$titleHtmlTag = "";
foreach $tagname ( sort keys %{$info} ) {
$xapiantag = xapianTag($tagname);
if (defined $xapiantag ) {
push @{$fields}, [ $xapiantag, $info->{$tagname} ];
if ($xapiantag eq 'title') {
$titleHtmlTag = "<title>$info->{$tagname}</title>";
}
push @{$other}, [ $tagname, $info->{$tagname} ] if $headAndBody;
} else {
push @{$other}, [ $tagname, $info->{$tagname} ];
}
}
$output = "<html>\n<head>\n$titleHtmlTag\n" .
"<meta http-equiv=\"Content-Type\" content=\"text/html;charset=UTF-8\">\n";
foreach $tagpair ( @{$fields} ) {
($tagname, $value) = @{$tagpair};
$output = $output . "<meta name=\"$tagname\" content=\"$value\">\n";
}
$output = $output . "</head><body>\n";
foreach $tagpair (@{$other} ) {
($tagname, $value) = @{$tagpair};
$output = $output . sprintf("%30s : %s<br>\n", $tagname, $value);
}
$output = $output . "</body>\n</html>\n";
return $output;
}
####################################################################
# Code for the rclexecm filter<->indexer protocol from here
# Get one line from stdin (from recollindex), exit on eof
sub readlineorexit {
my $s = <STDIN>;
unless ($s) {
# print STDERR "RCLIMG: EOF\n";
exit 0;
}
return $s
}
# Read one named parameter
sub readparam {
my $s = readlineorexit();
if ($s eq "\n") {
return ("","");
}
my @l = split(' ', $s);
if (scalar(@l) != 2) {
print STDERR "RCLIMG: bad line:", $s;
exit 1;
}
my $paramname = lc $l[0];
my $paramsize = $l[1];
if ($paramsize > 0) {
my $n = read STDIN, $paramdata, $paramsize;
if ($n != $paramsize) {
print STDERR "RCLIMG: [$paramname] expected $paramsize, got $n\n";
exit 1;
}
}
# print STDERR "RCLIMG: [$paramname] $paramsize bytes: [$paramdata]\n";
return ($paramname, $paramdata);
}
#
# Main program starts here. Talks the rclexecm protocol
#
# JFD: replaced the "use" call with a runtime load with error checking,
# for compat with the missing filter detection code.
#use Image::ExifTool qw(:Public);
eval {require Image::ExifTool; Image::ExifTool->import(qw(:Public));};
if ($@) {
print "RECFILTERROR HELPERNOTFOUND Perl::Image::ExifTool\n";
exit(1);
}
binmode(STDIN) || die "cannot binmode STDIN";
binmode(STDOUT) || die "cannot binmode STDOUT";
#print STDERR "RCLIMG: Starting\n";
$| = 1;
while (1) {
# print STDERR "RCLIMG: waiting for command\n";
my %params = ();
# Read at most 10 parameters (we only actually use one), stop at empty line
for($i = 1; $i < 10; $i++) {
my ($name, $value) = readparam;
if ($name eq "") {
last;
}
$params{$name} = $value;
}
unless (defined $params{"filename:"}) {
print STDERR "RCLIMG: no filename ??\n";
# Recoll is requesting next subdocument (it shouldn't cause we
# returned eofnext last time), but we have none, just say so:
print "Eofnow:0\nDocument: 0\n\n";
next;
}
print "Mimetype: 9\ntext/html";
my $data = imgTagsToHtml($params{"filename:"});
my $l = length($data);
print "Document: $l\n";
# print STDERR "RCLIMG: writing $l bytes of data\n";
print $data;
# Say we have no further documents for this file
print "Eofnext: 0\n";
# End of output parameters: print empty line
print "\n";
# print STDERR "RCLIMG: done writing data\n";
}
#print STDERR "RCLIMG: Exiting\n";