|
a/src/mediaserver/cdplugins/uprcl/uprclsearch.py |
|
b/src/mediaserver/cdplugins/uprcl/uprclsearch.py |
|
... |
|
... |
19 |
if s[j].isspace():
|
19 |
if s[j].isspace():
|
20 |
return j,w
|
20 |
return j,w
|
21 |
w += s[j]
|
21 |
w += s[j]
|
22 |
return j,w
|
22 |
return j,w
|
23 |
|
23 |
|
24 |
# Called with '"' already read:
|
24 |
# Called with '"' already read.
|
|
|
25 |
|
|
|
26 |
# Upnp search term strings are double quoted, but we should not take
|
|
|
27 |
# them as recoll phrases. We separate parts which are internally
|
|
|
28 |
# quoted, and become phrases, and lists of words which we interpret as
|
|
|
29 |
# an and search (comma-separated). Internal quotes come backslash-escaped
|
25 |
def _readstring(s, i):
|
30 |
def _parsestring(s, i=0):
|
|
|
31 |
uplog("parseString: input: <%s>" % s[i:])
|
|
|
32 |
# First change '''"hello \"one phrase\"''' world" into
|
|
|
33 |
# '''hello "one phrase" world'''
|
|
|
34 |
# Note that we can't handle quoted dquotes inside string
|
26 |
str = '"'
|
35 |
str = ''
|
27 |
escape = False
|
36 |
escape = False
|
|
|
37 |
instring = False
|
28 |
for j in range(i, len(s)):
|
38 |
for j in range(i, len(s)):
|
29 |
#print("s[j] [%s] out now [%s]" % (s[j],out))
|
39 |
if instring:
|
30 |
if s[j] == '\\':
|
|
|
31 |
if not escape:
|
40 |
if escape:
|
|
|
41 |
if s[j] == '"':
|
|
|
42 |
str += '"'
|
|
|
43 |
instring = False
|
|
|
44 |
else:
|
|
|
45 |
str += '\\' + s[j]
|
|
|
46 |
escape = False
|
|
|
47 |
else:
|
|
|
48 |
if s[j] == '\\':
|
32 |
escape = True
|
49 |
escape = True
|
|
|
50 |
else:
|
33 |
str += '\\'
|
51 |
str += s[j]
|
34 |
continue
|
|
|
35 |
|
52 |
|
36 |
if s[j] == '"':
|
|
|
37 |
str += '"'
|
|
|
38 |
if not escape:
|
|
|
39 |
return j+1, str
|
|
|
40 |
else:
|
53 |
else:
|
|
|
54 |
if escape:
|
41 |
str += s[j]
|
55 |
str += s[j]
|
|
|
56 |
escape = False
|
|
|
57 |
if s[j] == '"':
|
|
|
58 |
instring = True
|
|
|
59 |
else:
|
|
|
60 |
if s[j] == '\\':
|
|
|
61 |
escape = True
|
|
|
62 |
elif s[j] == '"':
|
|
|
63 |
j += 2
|
|
|
64 |
break
|
|
|
65 |
else:
|
|
|
66 |
str += s[j]
|
|
|
67 |
|
|
|
68 |
tokens = stringToStrings(str)
|
|
|
69 |
return j, tokens
|
|
|
70 |
|
|
|
71 |
def _appendterms(out, v, field, oper):
|
|
|
72 |
uplog("_appendterms: v %s field <%s> oper <%s>" % (v,field,oper))
|
|
|
73 |
swords = ""
|
|
|
74 |
phrases = []
|
|
|
75 |
for w in v:
|
|
|
76 |
if len(w.split()) == 1:
|
|
|
77 |
if swords:
|
|
|
78 |
swords += ","
|
|
|
79 |
swords += w
|
|
|
80 |
else:
|
|
|
81 |
phrases.append(w)
|
|
|
82 |
out.append(swords)
|
|
|
83 |
for ph in phrases:
|
|
|
84 |
out.append(field)
|
|
|
85 |
out.append(oper)
|
|
|
86 |
out.append('"' + ph + '"')
|
42 |
|
87 |
|
43 |
escape = False
|
|
|
44 |
|
|
|
45 |
return len(s), str
|
|
|
46 |
|
|
|
47 |
def upnpsearchtorecoll(s):
|
88 |
def upnpsearchtorecoll(s):
|
48 |
uplog("upnpsearchtorecoll:in: <%s>" % s)
|
89 |
uplog("upnpsearchtorecoll:in: <%s>" % s)
|
49 |
|
90 |
|
50 |
s = re.sub('[\t\n\r\f ]+', ' ', s)
|
91 |
s = re.sub('[\t\n\r\f ]+', ' ', s)
|
51 |
|
92 |
|
52 |
out = []
|
93 |
out = []
|
53 |
hadDerived = False
|
94 |
hadDerived = False
|
54 |
i = 0
|
95 |
i = 0
|
|
|
96 |
field = ""
|
|
|
97 |
oper = ""
|
55 |
while True:
|
98 |
while True:
|
56 |
i,c = _getchar(s, i)
|
99 |
i,c = _getchar(s, i)
|
57 |
if not c:
|
100 |
if not c:
|
58 |
break
|
101 |
break
|
59 |
|
102 |
|
|
... |
|
... |
65 |
(len(s[i:]) and not s[i:].isspace()):
|
108 |
(len(s[i:]) and not s[i:].isspace()):
|
66 |
raise Exception("If * is used it must be the only input")
|
109 |
raise Exception("If * is used it must be the only input")
|
67 |
out = ["mime:*"]
|
110 |
out = ["mime:*"]
|
68 |
break
|
111 |
break
|
69 |
|
112 |
|
70 |
if c == '(' or c == ')' or c == '>' or c == '<' or c == '=':
|
113 |
if c == '(' or c == ')':
|
71 |
out.append(c)
|
114 |
out.append(c)
|
|
|
115 |
elif c == '>' or c == '<' or c == '=':
|
|
|
116 |
oper += c
|
72 |
else:
|
117 |
else:
|
73 |
if c == '"':
|
118 |
if c == '"':
|
74 |
i,w = _readstring(s, i)
|
119 |
i,v = _parsestring(s, i)
|
75 |
if not w.endswith('"'):
|
120 |
uplog("_parsestring ret: %s" % v)
|
76 |
raise Exception("Unterminated string in [%s]" % out)
|
121 |
_appendterms(out, v, field, oper)
|
|
|
122 |
oper = ""
|
|
|
123 |
field = ""
|
|
|
124 |
continue
|
77 |
else:
|
125 |
else:
|
78 |
i -= 1
|
126 |
i -= 1
|
79 |
i,w = _readword(s, i)
|
127 |
i,w = _readword(s, i)
|
80 |
|
128 |
|
81 |
#print("Got word [%s]" % w)
|
129 |
#print("Got word [%s]" % w)
|
82 |
if w == 'contains':
|
130 |
if w == 'contains':
|
83 |
out.append(':')
|
131 |
out.append(':')
|
|
|
132 |
oper = ':'
|
84 |
elif w == 'doesNotContain':
|
133 |
elif w == 'doesNotContain':
|
85 |
if len(out) < 1:
|
134 |
if len(out) < 1:
|
86 |
raise Exception("doesNotContain can't be the first word")
|
135 |
raise Exception("doesNotContain can't be the first word")
|
87 |
out.insert(-1, "-")
|
136 |
out.insert(-1, "-")
|
88 |
out.append(':')
|
137 |
out.append(':')
|
|
|
138 |
oper = ':'
|
89 |
elif w == 'derivedFrom':
|
139 |
elif w == 'derivedFrom':
|
90 |
hadDerived = True
|
140 |
hadDerived = True
|
91 |
out.append(':')
|
141 |
out.append(':')
|
|
|
142 |
oper = ':'
|
92 |
elif w == 'true':
|
143 |
elif w == 'true':
|
93 |
out.append('*')
|
144 |
out.append('*')
|
|
|
145 |
oper = ""
|
94 |
elif w == 'false':
|
146 |
elif w == 'false':
|
95 |
out.append('xxxjanzocsduochterrrrm')
|
147 |
out.append('xxxjanzocsduochterrrrm')
|
96 |
elif w == 'exists':
|
148 |
elif w == 'exists':
|
97 |
out.append(':')
|
149 |
out.append(':')
|
|
|
150 |
oper = ':'
|
98 |
elif w == 'and':
|
151 |
elif w == 'and':
|
99 |
# Recoll has implied AND, but see next
|
152 |
# Recoll has implied AND, but see next
|
100 |
pass
|
153 |
pass
|
101 |
elif w == 'or':
|
154 |
elif w == 'or':
|
102 |
# Does not work because OR/AND priorities are reversed
|
155 |
# Does not work because OR/AND priorities are reversed
|
103 |
# between recoll and upnp. This would be very
|
156 |
# between recoll and upnp. This would be very
|
104 |
# difficult to correct, let's hope that the callers
|
157 |
# difficult to correct, let's hope that the callers
|
105 |
# use parentheses
|
158 |
# use parentheses
|
106 |
out.append('OR')
|
159 |
out.append('OR')
|
107 |
else:
|
160 |
else:
|
108 |
if hadDerived:
|
161 |
field = upnp2rclfields[w]
|
109 |
hadDerived = False
|
|
|
110 |
if len(w) >= 1 and w[-1] == '"':
|
|
|
111 |
w = w[:-1] + '*' + '"'
|
|
|
112 |
else:
|
|
|
113 |
w += '*'
|
|
|
114 |
out.append(w)
|
162 |
out.append(field)
|
|
|
163 |
oper = ""
|
115 |
|
164 |
|
116 |
ostr = ""
|
165 |
ostr = ""
|
117 |
for tok in out:
|
166 |
for tok in out:
|
118 |
ostr += tok + " "
|
167 |
ostr += tok + " "
|
119 |
uplog("upnpsearchtorecoll:out: <%s>" % ostr)
|
168 |
uplog("upnpsearchtorecoll:out: <%s>" % ostr)
|
|
... |
|
... |
122 |
|
171 |
|
123 |
def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix):
|
172 |
def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix):
|
124 |
rcls = upnpsearchtorecoll(upnps)
|
173 |
rcls = upnpsearchtorecoll(upnps)
|
125 |
|
174 |
|
126 |
filterdir = uprclfolders.dirpath(objid)
|
175 |
filterdir = uprclfolders.dirpath(objid)
|
|
|
176 |
if filterdir and filterdir != "/":
|
127 |
rcls += " dir:\"" + filterdir + "\""
|
177 |
rcls += " dir:\"" + filterdir + "\""
|
128 |
|
178 |
|
129 |
uplog("Search: recoll search: %s" % rcls)
|
179 |
uplog("Search: recoll search: <%s>" % rcls)
|
130 |
|
180 |
|
131 |
rcldb = recoll.connect(confdir=rclconfdir)
|
181 |
rcldb = recoll.connect(confdir=rclconfdir)
|
132 |
try:
|
182 |
try:
|
133 |
rclq = rcldb.query()
|
183 |
rclq = rcldb.query()
|
134 |
rclq.execute(rcls)
|
184 |
rclq.execute(rcls)
|
|
... |
|
... |
140 |
if rclq.rowcount == 0:
|
190 |
if rclq.rowcount == 0:
|
141 |
return []
|
191 |
return []
|
142 |
|
192 |
|
143 |
entries = []
|
193 |
entries = []
|
144 |
maxcnt = 0
|
194 |
maxcnt = 0
|
145 |
totcnt = 0
|
|
|
146 |
while True:
|
195 |
while True:
|
147 |
docs = rclq.fetchmany()
|
196 |
docs = rclq.fetchmany()
|
148 |
for doc in docs:
|
197 |
for doc in docs:
|
149 |
id = idprefix + '$' + 'seeyoulater'
|
198 |
id = idprefix + '$' + 'seeyoulater'
|
150 |
e = rcldoctoentry(id, objid, httphp, pathprefix, doc)
|
199 |
e = rcldoctoentry(id, objid, httphp, pathprefix, doc)
|
|
|
200 |
if e:
|
151 |
entries.append(e)
|
201 |
entries.append(e)
|
152 |
totcnt += 1
|
202 |
if (maxcnt > 0 and len(entries) >= maxcnt) or \
|
153 |
if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:
|
203 |
len(docs) != rclq.arraysize:
|
154 |
break
|
204 |
break
|
155 |
uplog("Search retrieved %d docs" % (totcnt,))
|
205 |
uplog("Search retrieved %d docs" % (len(entries),))
|
156 |
|
206 |
|
157 |
return entries
|
207 |
return sorted(entries, cmp=cmpentries)
|
158 |
|
208 |
|
159 |
|
209 |
|
160 |
|
210 |
|
161 |
if __name__ == '__main__':
|
211 |
if __name__ == '__main__':
|
162 |
s = '(upnp:artist derivedFrom "abc\\"def\\g") or (dc:title:xxx) '
|
212 |
s = '(upnp:artist derivedFrom "abc\\"def\\g") or (dc:title:xxx) '
|