Parent: [f344e8] (diff)

Download this file

rclimg.py    128 lines (106 with data), 4.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python2
# Python-based Image Tag extractor for Recoll. This is less thorough
# than the Perl-based rclimg script, but useful if you don't want to
# have to install Perl (e.g. on Windows).
#
# Uses pyexiv2. Also tried Pillow, found it useless for tags.
#
from __future__ import print_function
import sys
import os
import rclexecm
import re
try:
import pyexiv2
except:
print("RECFILTERROR HELPERNOTFOUND python:pyexiv2")
sys.exit(1);
khexre = re.compile('.*\.0[xX][0-9a-fA-F]+$')
pyexiv2_titles = {
'Xmp.dc.subject',
'Xmp.lr.hierarchicalSubject',
'Xmp.MicrosoftPhoto.LastKeywordXMP',
}
# Keys for which we set meta tags
meta_pyexiv2_keys = {
'Xmp.dc.subject',
'Xmp.lr.hierarchicalSubject',
'Xmp.MicrosoftPhoto.LastKeywordXMP',
'Xmp.digiKam.TagsList',
'Exif.Photo.DateTimeDigitized',
'Exif.Photo.DateTimeOriginal',
'Exif.Image.DateTime',
}
exiv2_dates = ['Exif.Photo.DateTimeOriginal',
'Exif.Image.DateTime', 'Exif.Photo.DateTimeDigitized']
class ImgTagExtractor:
def __init__(self, em):
self.em = em
self.currentindex = 0
def extractone(self, params):
#self.em.rclog("extractone %s" % params["filename:"])
ok = False
if "filename:" not in params:
self.em.rclog("extractone: no file name")
return (ok, docdata, "", rclexecm.RclExecM.eofnow)
filename = params["filename:"]
try:
metadata = pyexiv2.ImageMetadata(filename)
metadata.read()
keys = metadata.exif_keys + metadata.iptc_keys + metadata.xmp_keys
mdic = {}
for k in keys:
# we skip numeric keys and undecoded makernote data
if k != 'Exif.Photo.MakerNote' and not khexre.match(k):
mdic[k] = str(metadata[k].raw_value)
except Exception as err:
self.em.rclog("extractone: extract failed: [%s]" % err)
return (ok, "", "", rclexecm.RclExecM.eofnow)
docdata = b'<html><head>\n'
ttdata = set()
for k in pyexiv2_titles:
if k in mdic:
ttdata.add(self.em.htmlescape(mdic[k]))
if ttdata:
title = ""
for v in ttdata:
v = v.replace('[', '').replace(']', '').replace("'", "")
title += v + " "
docdata += rclexecm.makebytes("<title>" + title + "</title>\n")
for k in exiv2_dates:
if k in mdic:
# Recoll wants: %Y-%m-%d %H:%M:%S.
# We get 2014:06:27 14:58:47
dt = mdic[k].replace(":", "-", 2)
docdata += b'<meta name="date" content="' + \
rclexecm.makebytes(dt) + b'">\n'
break
for k,v in mdic.items():
if k == 'Xmp.digiKam.TagsList':
docdata += b'<meta name="keywords" content="' + \
rclexecm.makebytes(self.em.htmlescape(mdic[k])) + \
b'">\n'
docdata += b'</head><body>\n'
for k,v in mdic.items():
docdata += rclexecm.makebytes(k + " : " + \
self.em.htmlescape(mdic[k]) + "<br />\n")
docdata += b'</body></html>'
self.em.setmimetype("text/html")
return (True, docdata, "", rclexecm.RclExecM.eofnext)
###### File type handler api, used by rclexecm ---------->
def openfile(self, params):
self.currentindex = 0
return True
def getipath(self, params):
return self.extractone(params)
def getnext(self, params):
if self.currentindex >= 1:
return (False, "", "", rclexecm.RclExecM.eofnow)
else:
ret= self.extractone(params)
self.currentindex += 1
return ret
if __name__ == '__main__':
proto = rclexecm.RclExecM()
extract = ImgTagExtractor(proto)
rclexecm.main(proto, extract)