Parent: [7ebbdb] (diff)

Child: [c2a515] (diff)

Download this file

uprclsearch.py    166 lines (140 with data), 4.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env python
from __future__ import print_function
import uprclfolders
from uprclutils import *
from recoll import recoll
def _getchar(s, i):
if i < len(s):
return i+1,s[i]
else:
return i,None
def _readword(s, i):
w = ''
for j in range(i, len(s)):
if s[j].isspace():
return j,w
w += s[j]
return j,w
# Called with '"' already read:
def _readstring(s, i):
str = '"'
escape = False
for j in range(i, len(s)):
#print("s[j] [%s] out now [%s]" % (s[j],out))
if s[j] == '\\':
if not escape:
escape = True
str += '\\'
continue
if s[j] == '"':
str += '"'
if not escape:
return j+1, str
else:
str += s[j]
escape = False
return len(s), str
def upnpsearchtorecoll(s):
uplog("upnpsearchtorecoll:in: <%s>" % s)
s = s.replace('\t', ' ')
s = s.replace('\n', ' ')
s = s.replace('\r', ' ')
s = s.replace('\f', ' ')
out = []
hadDerived = False
i = 0
while True:
i,c = _getchar(s, i)
if not c:
break
if c.isspace():
continue
if c == "*":
if (len(out) > 1 or (len(out) == 1 and not out[-1].isspace())) or \
(len(s[i:]) and not s[i:].isspace()):
raise Exception("If * is used it must be the only input")
out = ["mime:*"]
break
if c == '(' or c == ')' or c == '>' or c == '<' or c == '=':
out.append(c)
else:
if c == '"':
i,w = _readstring(s, i)
if not w.endswith('"'):
raise Exception("Unterminated string in [%s]" % out)
else:
i -= 1
i,w = _readword(s, i)
#print("Got word [%s]" % w)
if w == 'contains':
out.append(':')
elif w == 'doesNotContain':
if len(out) < 1:
raise Exception("doesNotContain can't be the first word")
out.insert(-1, "-")
out.append(':')
elif w == 'derivedFrom':
hadDerived = True
out.append(':')
elif w == 'true':
out.append('*')
elif w == 'false':
out.append('xxxjanzocsduochterrrrm')
elif w == 'exists':
out.append(':')
elif w == 'and':
# Recoll has implied AND, but see next
pass
elif w == 'or':
# Does not work because OR/AND priorities are reversed
# between recoll and upnp. This would be very
# difficult to correct, let's hope that the callers
# use parentheses
out.append('OR')
else:
if hadDerived:
hadDerived = False
if len(w) >= 1 and w[-1] == '"':
w = w[:-1] + '*' + '"'
else:
w += '*'
out.append(w)
ostr = ""
for tok in out:
ostr += tok + " "
uplog("upnpsearchtorecoll:out: <%s>" % ostr)
return ostr
def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix):
rcls = upnpsearchtorecoll(upnps)
filterdir = uprclfolders.dirpath(objid)
rcls += " dir:\"" + filterdir + "\""
uplog("Search: recoll search: %s" % rcls)
rcldb = recoll.connect(confdir=rclconfdir)
try:
rclq = rcldb.query()
rclq.execute(rcls)
except Exception as e:
uplog("Search: recoll query raised: %s" % e)
return []
uplog("Estimated query results: %d" % (rclq.rowcount))
if rclq.rowcount == 0:
return []
entries = []
maxcnt = 0
totcnt = 0
while True:
docs = rclq.fetchmany()
for doc in docs:
id = idprefix + '$' + 'seeyoulater'
e = rcldoctoentry(id, objid, httphp, pathprefix, doc)
entries.append(e)
totcnt += 1
if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:
break
uplog("Search retrieved %d docs" % (totcnt,))
return entries
if __name__ == '__main__':
s = '(upnp:artist derivedFrom "abc\\"def\\g") or (dc:title:xxx) '
print("INPUT: %s" % s)
o = upnpsearchtorecoll(s)
print("OUTPUT: %s" % o)