|
a/src/mediaserver/cdplugins/uprcl/uprclfolders.py |
|
b/src/mediaserver/cdplugins/uprcl/uprclfolders.py |
|
... |
|
... |
16 |
|
16 |
|
17 |
import os
|
17 |
import os
|
18 |
import shlex
|
18 |
import shlex
|
19 |
import urllib
|
19 |
import urllib
|
20 |
import sys
|
20 |
import sys
|
|
|
21 |
import time
|
|
|
22 |
from timeit import default_timer as timer
|
21 |
|
23 |
|
22 |
from uprclutils import uplog, docarturi, audiomtypes, rcldirentry, \
|
24 |
from uprclutils import uplog, docarturi, audiomtypes, rcldirentry, \
|
23 |
rcldoctoentry, cmpentries
|
25 |
rcldoctoentry, cmpentries
|
24 |
from recoll import recoll
|
26 |
from recoll import recoll
|
25 |
from recoll import rclconfig
|
27 |
from recoll import rclconfig
|
26 |
|
28 |
|
27 |
g_foldersIdPrefix = '0$uprcl$folders'
|
29 |
_foldersIdPfx = '0$uprcl$folders'
|
28 |
|
30 |
|
29 |
# Debug : limit processed recoll entries for speed
|
31 |
# Debug : limit processed recoll entries for speed
|
30 |
g_maxrecollcnt = 0
|
32 |
_maxrclcnt = 0
|
|
|
33 |
|
|
|
34 |
_dirvec = []
|
31 |
|
35 |
|
32 |
# Internal init: create the directory tree (folders view) from the doc
|
36 |
# Internal init: create the directory tree (folders view) from the doc
|
33 |
# array by splitting the url in each doc.
|
37 |
# array by splitting the url in each doc.
|
34 |
#
|
38 |
#
|
35 |
# The dirvec vector has one entry for each directory. Each entry is a
|
39 |
# The dirvec vector has one entry for each directory. Each entry is a
|
|
... |
|
... |
52 |
return len(dirvec) - 1
|
56 |
return len(dirvec) - 1
|
53 |
|
57 |
|
54 |
def _rcl2folders(docs, confdir, httphp, pathprefix):
|
58 |
def _rcl2folders(docs, confdir, httphp, pathprefix):
|
55 |
global dirvec
|
59 |
global dirvec
|
56 |
dirvec = []
|
60 |
dirvec = []
|
|
|
61 |
start = timer()
|
57 |
|
62 |
|
58 |
rclconf = rclconfig.RclConfig(confdir)
|
63 |
rclconf = rclconfig.RclConfig(confdir)
|
59 |
topdirs = [os.path.expanduser(d) for d in
|
64 |
topdirs = [os.path.expanduser(d) for d in
|
60 |
shlex.split(rclconf.getConfParam('topdirs'))]
|
65 |
shlex.split(rclconf.getConfParam('topdirs'))]
|
61 |
topdirs = [d.rstrip('/') for d in topdirs]
|
66 |
topdirs = [d.rstrip('/') for d in topdirs]
|
|
... |
|
... |
147 |
|
152 |
|
148 |
if False:
|
153 |
if False:
|
149 |
for ent in dirvec:
|
154 |
for ent in dirvec:
|
150 |
uplog("%s" % ent)
|
155 |
uplog("%s" % ent)
|
151 |
|
156 |
|
|
|
157 |
end = timer()
|
|
|
158 |
uplog("_rcl2folders took %.2f Seconds" % (end - start))
|
152 |
return dirvec
|
159 |
return dirvec
|
153 |
|
160 |
|
154 |
# Internal init: fetch all the docs by querying Recoll with [mime:*],
|
161 |
# Internal init: fetch all the docs by querying Recoll with [mime:*],
|
155 |
# which is guaranteed to match every doc without overflowing the query
|
162 |
# which is guaranteed to match every doc without overflowing the query
|
156 |
# size (because the number of mime types is limited). Something like
|
163 |
# size (because the number of mime types is limited). Something like
|
157 |
# title:* would overflow.
|
164 |
# title:* would overflow.
|
158 |
def _fetchalldocs(confdir):
|
165 |
def _fetchalldocs(confdir):
|
|
|
166 |
start = timer()
|
159 |
allthedocs = []
|
167 |
allthedocs = []
|
160 |
|
168 |
|
161 |
rcldb = recoll.connect(confdir=confdir)
|
169 |
rcldb = recoll.connect(confdir=confdir)
|
162 |
rclq = rcldb.query()
|
170 |
rclq = rcldb.query()
|
163 |
rclq.execute("mime:*", stemming=0)
|
171 |
rclq.execute("mime:*", stemming=0)
|
|
... |
|
... |
167 |
while True:
|
175 |
while True:
|
168 |
docs = rclq.fetchmany()
|
176 |
docs = rclq.fetchmany()
|
169 |
for doc in docs:
|
177 |
for doc in docs:
|
170 |
allthedocs.append(doc)
|
178 |
allthedocs.append(doc)
|
171 |
totcnt += 1
|
179 |
totcnt += 1
|
172 |
if (g_maxrecollcnt > 0 and totcnt >= g_maxrecollcnt) or \
|
180 |
if (_maxrclcnt > 0 and totcnt >= _maxrclcnt) or \
|
173 |
len(docs) != rclq.arraysize:
|
181 |
len(docs) != rclq.arraysize:
|
174 |
break
|
182 |
break
|
|
|
183 |
time.sleep(0)
|
|
|
184 |
end = timer()
|
175 |
uplog("Retrieved %d docs" % (totcnt,))
|
185 |
uplog("Retrieved %d docs in %.2f Seconds" % (totcnt,end - start))
|
176 |
return allthedocs
|
186 |
return allthedocs
|
177 |
|
187 |
|
178 |
|
188 |
|
179 |
# Initialize (read recoll data and build tree)
|
189 |
# Initialize (read recoll data and build tree). This is called by
|
|
|
190 |
# uprcl-app init
|
180 |
def inittree(confdir, httphp, pathprefix):
|
191 |
def inittree(confdir, httphp, pathprefix):
|
181 |
global g_alldocs, g_dirvec
|
192 |
global g_alldocs, _dirvec
|
182 |
|
193 |
|
183 |
g_alldocs = _fetchalldocs(confdir)
|
194 |
g_alldocs = _fetchalldocs(confdir)
|
184 |
g_dirvec = _rcl2folders(g_alldocs, confdir, httphp, pathprefix)
|
195 |
_dirvec = _rcl2folders(g_alldocs, confdir, httphp, pathprefix)
|
185 |
return g_alldocs
|
196 |
return g_alldocs
|
186 |
|
197 |
|
187 |
|
198 |
|
188 |
|
199 |
|
189 |
##############
|
200 |
##############
|
190 |
# Browsing the initialized [folders] hierarchy
|
201 |
# Browsing the initialized [folders] hierarchy
|
191 |
|
202 |
|
192 |
|
203 |
|
193 |
# Extract dirvec index from objid, according to the way we generate them.
|
204 |
# Extract dirvec index from objid, according to the way we generate them.
|
194 |
def _objidtodiridx(pid):
|
205 |
def _objidtodiridx(pid):
|
195 |
if not pid.startswith(g_foldersIdPrefix):
|
206 |
if not pid.startswith(_foldersIdPfx):
|
196 |
raise Exception("folders.browse: bad pid %s" % pid)
|
207 |
raise Exception("folders.browse: bad pid %s" % pid)
|
197 |
|
208 |
|
198 |
if len(g_alldocs) == 0:
|
209 |
if len(g_alldocs) == 0:
|
199 |
raise Exception("folders:browse: no docs")
|
210 |
raise Exception("folders:browse: no docs")
|
200 |
|
211 |
|
201 |
diridx = pid[len(g_foldersIdPrefix):]
|
212 |
diridx = pid[len(_foldersIdPfx):]
|
202 |
if not diridx:
|
213 |
if not diridx:
|
203 |
diridx = 0
|
214 |
diridx = 0
|
204 |
else:
|
215 |
else:
|
205 |
if diridx[1] != 'd':
|
216 |
if diridx[1] != 'd':
|
206 |
raise Exception("folders:browse: called on non dir objid %s" % pid)
|
217 |
raise Exception("folders:browse: called on non dir objid %s" % pid)
|
207 |
diridx = int(diridx[2:])
|
218 |
diridx = int(diridx[2:])
|
208 |
|
219 |
|
209 |
if diridx >= len(g_dirvec):
|
220 |
if diridx >= len(_dirvec):
|
210 |
raise Exception("folders:browse: bad pid %s" % pid)
|
221 |
raise Exception("folders:browse: bad pid %s" % pid)
|
211 |
|
222 |
|
212 |
return diridx
|
223 |
return diridx
|
213 |
|
224 |
|
214 |
|
225 |
|
|
... |
|
... |
218 |
|
229 |
|
219 |
|
230 |
|
220 |
# Look all non-directory docs inside directory, and return the cover
|
231 |
# Look all non-directory docs inside directory, and return the cover
|
221 |
# art we find.
|
232 |
# art we find.
|
222 |
def _arturifordir(diridx):
|
233 |
def _arturifordir(diridx):
|
223 |
for nm,ids in g_dirvec[diridx].iteritems():
|
234 |
for nm,ids in _dirvec[diridx].iteritems():
|
224 |
if ids[1] >= 0:
|
235 |
if ids[1] >= 0:
|
225 |
doc = g_alldocs[ids[1]]
|
236 |
doc = g_alldocs[ids[1]]
|
226 |
if doc.mtype != 'inode/directory' and doc.albumarturi:
|
237 |
if doc.mtype != 'inode/directory' and doc.albumarturi:
|
227 |
return doc.albumarturi
|
238 |
return doc.albumarturi
|
228 |
|
239 |
|
|
... |
|
... |
243 |
|
254 |
|
244 |
entries = []
|
255 |
entries = []
|
245 |
|
256 |
|
246 |
# The basename call is just for diridx==0 (topdirs). Remove it if
|
257 |
# The basename call is just for diridx==0 (topdirs). Remove it if
|
247 |
# this proves a performance issue
|
258 |
# this proves a performance issue
|
248 |
for nm,ids in g_dirvec[diridx].iteritems():
|
259 |
for nm,ids in _dirvec[diridx].iteritems():
|
249 |
if nm == "..":
|
260 |
if nm == "..":
|
250 |
continue
|
261 |
continue
|
251 |
thisdiridx = ids[0]
|
262 |
thisdiridx = ids[0]
|
252 |
thisdocidx = ids[1]
|
263 |
thisdocidx = ids[1]
|
253 |
if thisdocidx >= 0:
|
264 |
if thisdocidx >= 0:
|
|
... |
|
... |
258 |
|
269 |
|
259 |
if thisdiridx >= 0:
|
270 |
if thisdiridx >= 0:
|
260 |
# Skip empty directories
|
271 |
# Skip empty directories
|
261 |
if len(dirvec[thisdiridx]) == 1:
|
272 |
if len(dirvec[thisdiridx]) == 1:
|
262 |
continue
|
273 |
continue
|
263 |
id = g_foldersIdPrefix + '$' + 'd' + str(thisdiridx)
|
274 |
id = _foldersIdPfx + '$' + 'd' + str(thisdiridx)
|
264 |
if doc and doc.albumarturi:
|
275 |
if doc and doc.albumarturi:
|
265 |
arturi = doc.albumarturi
|
276 |
arturi = doc.albumarturi
|
266 |
else:
|
277 |
else:
|
267 |
arturi = _arturifordir(thisdiridx)
|
278 |
arturi = _arturifordir(thisdiridx)
|
268 |
entries.append(rcldirentry(id, pid, os.path.basename(nm),
|
279 |
entries.append(rcldirentry(id, pid, os.path.basename(nm),
|
|
... |
|
... |
271 |
# Not a directory. docidx had better been set
|
282 |
# Not a directory. docidx had better been set
|
272 |
if thisdocidx == -1:
|
283 |
if thisdocidx == -1:
|
273 |
uplog("folders:docidx -1 for non-dir entry %s"%nm)
|
284 |
uplog("folders:docidx -1 for non-dir entry %s"%nm)
|
274 |
continue
|
285 |
continue
|
275 |
doc = g_alldocs[thisdocidx]
|
286 |
doc = g_alldocs[thisdocidx]
|
276 |
id = g_foldersIdPrefix + '$i' + str(thisdocidx)
|
287 |
id = _foldersIdPfx + '$i' + str(thisdocidx)
|
277 |
e = rcldoctoentry(id, pid, httphp, pathprefix, doc)
|
288 |
e = rcldoctoentry(id, pid, httphp, pathprefix, doc)
|
278 |
if e:
|
289 |
if e:
|
279 |
entries.append(e)
|
290 |
entries.append(e)
|
280 |
|
291 |
|
281 |
return sorted(entries, cmp=cmpentries)
|
292 |
return sorted(entries, cmp=cmpentries)
|
|
... |
|
... |
294 |
if diridx == 0:
|
305 |
if diridx == 0:
|
295 |
return "/"
|
306 |
return "/"
|
296 |
|
307 |
|
297 |
lpath = []
|
308 |
lpath = []
|
298 |
while True:
|
309 |
while True:
|
299 |
fathidx = g_dirvec[diridx][".."][0]
|
310 |
fathidx = _dirvec[diridx][".."][0]
|
300 |
for nm, ids in g_dirvec[fathidx].iteritems():
|
311 |
for nm, ids in _dirvec[fathidx].iteritems():
|
301 |
if ids[0] == diridx:
|
312 |
if ids[0] == diridx:
|
302 |
lpath.append(nm)
|
313 |
lpath.append(nm)
|
303 |
break
|
314 |
break
|
304 |
diridx = fathidx
|
315 |
diridx = fathidx
|
305 |
if diridx == 0:
|
316 |
if diridx == 0:
|