--- a/src/mediaserver/cdplugins/uprcl/uprclfolders.py
+++ b/src/mediaserver/cdplugins/uprcl/uprclfolders.py
@@ -46,7 +46,6 @@
# configuration. The entries are paths instead of simple names, and
# the docidx is 0. The diridx points to a dirvec entry.
-
import os
import shlex
import urllib
@@ -60,296 +59,288 @@
from recoll import recoll
from recoll import rclconfig
-_foldersIdPfx = '0$uprcl$folders'
-
-# Debug : limit processed recoll entries for speed
-_maxrclcnt = 0
-
-_dirvec = []
-
-
-# Create new directory entry: insert in father and append dirvec slot
-# (with ".." entry)
-def _createdir(dirvec, fathidx, docidx, nm):
- dirvec.append({})
- dirvec[fathidx][nm] = (len(dirvec) - 1, docidx)
- dirvec[-1][".."] = (fathidx, -1)
- return len(dirvec) - 1
-
-
-# Walk the recoll docs array and split the URLs paths to build the
-# [folders] data structure
-def _rcl2folders(docs, confdir, httphp, pathprefix):
- global dirvec
- dirvec = []
- start = timer()
-
- rclconf = rclconfig.RclConfig(confdir)
- topdirs = [os.path.expanduser(d) for d in
- shlex.split(rclconf.getConfParam('topdirs'))]
- topdirs = [d.rstrip('/') for d in topdirs]
-
- # Create the 1st entry. This is special because it holds the
- # recoll topdirs, which are paths instead of simple names. There
- # does not seem any need to build the tree between a topdir and /
- dirvec.append({})
- dirvec[0][".."] = (0, -1)
- for d in topdirs:
- dirvec.append({})
- dirvec[0][d] = (len(dirvec)-1, -1)
- dirvec[-1][".."] = (0, -1)
-
- # Walk the doc list and update the directory tree according to the
- # url: create intermediary directories if needed, create leaf
- # entry.
- for docidx in range(len(docs)):
- doc = docs[docidx]
+class Folders(object):
+
+ # Initialize (read recoll data and build tree).
+ def __init__(self, confdir, httphp, pathprefix):
+ self._idprefix = '0$uprcl$folders'
+ self._httphp = httphp
+ self._pprefix = pathprefix
+ # Debug : limit processed recoll entries for speed
+ self._maxrclcnt = 0
+ self._fetchalldocs(confdir)
+ self._rcl2folders(confdir)
+
+ def rcldocs(self):
+ return self._rcldocs
+
+ # Create new directory entry: insert in father and append dirvec slot
+ # (with ".." entry)
+ def _createdir(self, fathidx, docidx, nm):
+ self._dirvec.append({})
+ self._dirvec[fathidx][nm] = (len(self._dirvec) - 1, docidx)
+ self._dirvec[-1][".."] = (fathidx, -1)
+ return len(self._dirvec) - 1
+
+
+ # Walk the recoll docs array and split the URLs paths to build the
+ # [folders] data structure
+ def _rcl2folders(self, confdir):
+ self._dirvec = []
+ start = timer()
+
+ rclconf = rclconfig.RclConfig(confdir)
+ topdirs = [os.path.expanduser(d) for d in
+ shlex.split(rclconf.getConfParam('topdirs'))]
+ topdirs = [d.rstrip('/') for d in topdirs]
+
+ # Create the 1st entry. This is special because it holds the
+ # recoll topdirs, which are paths instead of simple names. There
+ # does not seem any need to build the tree between a topdir and /
+ self._dirvec.append({})
+ self._dirvec[0][".."] = (0, -1)
+ for d in topdirs:
+ self._dirvec.append({})
+ self._dirvec[0][d] = (len(self._dirvec)-1, -1)
+ self._dirvec[-1][".."] = (0, -1)
+
+ # Walk the doc list and update the directory tree according to the
+ # url: create intermediary directories if needed, create leaf
+ # entry.
+ for docidx in range(len(self._rcldocs)):
+ doc = self._rcldocs[docidx]
- # Possibly enrich the doc entry with a cover art uri.
- arturi = docarturi(doc, httphp, pathprefix)
- if arturi:
- # The uri is quoted, so it's ascii and we can just store
- # it as a doc attribute
- doc.albumarturi = arturi
-
- # No need to include non-audio types in the visible tree.
- if doc.mtype not in audiomtypes:
- continue
-
- url = doc.getbinurl()
- url = url[7:]
+ # Possibly enrich the doc entry with a cover art uri.
+ arturi = docarturi(doc, self._httphp, self._pprefix)
+ if arturi:
+ # The uri is quoted, so it's ascii and we can just store
+ # it as a doc attribute
+ doc.albumarturi = arturi
+
+ # No need to include non-audio types in the visible tree.
+ if doc.mtype not in audiomtypes:
+ continue
+
+ url = doc.getbinurl()
+ url = url[7:]
+ try:
+ decoded = url.decode('utf-8')
+ except:
+ decoded = urllib.quote(url).decode('utf-8')
+
+ # Determine the root entry (topdirs element). Special because
+ # its path is not a simple name.
+ fathidx = -1
+ for rtpath,idx in self._dirvec[0].iteritems():
+ if url.startswith(rtpath):
+ fathidx = idx[0]
+ break
+ if fathidx == -1:
+ uplog("No parent in topdirs: %s" % decoded)
+ continue
+
+ # Compute rest of path
+ url1 = url[len(rtpath):]
+ if len(url1) == 0:
+ continue
+
+ # If there is a contentgroup field, just add it as a virtual
+ # directory in the path. This only affects the visible tree,
+ # not the 'real' URLs of course.
+ if doc.contentgroup:
+ a = os.path.dirname(url1).decode('utf-8', errors='replace')
+ b = os.path.basename(url1).decode('utf-8', errors='replace')
+ url1 = os.path.join(a, doc.contentgroup, b)
+
+ # Split path, then walk the vector, possibly creating
+ # directory entries as needed
+ path = url1.split('/')[1:]
+ #uplog("%s"%path, file=sys.stderr)
+ for idx in range(len(path)):
+ elt = path[idx]
+ if elt in self._dirvec[fathidx]:
+ # This path element was already seen
+ # If this is the last entry in the path, maybe update
+ # the doc idx (previous entries were created for
+ # intermediate elements without a Doc).
+ if idx == len(path) -1:
+ self._dirvec[fathidx][elt] = (self._dirvec[fathidx][elt][0], docidx)
+ #uplog("updating docidx for %s" % decoded)
+ # Update fathidx for next iteration
+ fathidx = self._dirvec[fathidx][elt][0]
+ else:
+ # Element has no entry in father directory (hence no
+ # self._dirvec entry either).
+ if idx != len(path) -1:
+ # This is an intermediate element. Create a
+ # Doc-less directory
+ fathidx = self._createdir(fathidx, -1, elt)
+ else:
+ # Last element. If directory, needs a self._dirvec entry
+ if doc.mtype == 'inode/directory':
+ fathidx = self._createdir(fathidx, docidx, elt)
+ #uplog("Setting docidx for %s" % decoded)
+ else:
+ self._dirvec[fathidx][elt] = (-1, docidx)
+
+ if False:
+ for ent in self._dirvec:
+ uplog("%s" % ent)
+
+ end = timer()
+ uplog("_rcl2folders took %.2f Seconds" % (end - start))
+
+ # Fetch all the docs by querying Recoll with [mime:*], which is
+ # guaranteed to match every doc without overflowing the query size
+ # (because the number of mime types is limited). Something like
+ # title:* would overflow. This creates the main doc array, which is
+ # then used by all modules.
+ def _fetchalldocs(self, confdir):
+ start = timer()
+
+ rcldb = recoll.connect(confdir=confdir)
+ rclq = rcldb.query()
+ rclq.execute("mime:*", stemming=0)
+ uplog("Estimated alldocs query results: %d" % (rclq.rowcount))
+
+ totcnt = 0
+ self._rcldocs = []
+ while True:
+ docs = rclq.fetchmany()
+ for doc in docs:
+ self._rcldocs.append(doc)
+ totcnt += 1
+ if (self._maxrclcnt > 0 and totcnt >= self._maxrclcnt) or \
+ len(docs) != rclq.arraysize:
+ break
+ time.sleep(0)
+ end = timer()
+ uplog("Retrieved %d docs in %.2f Seconds" % (totcnt,end - start))
+
+
+ ##############
+ # Browsing the initialized [folders] hierarchy
+
+ # Extract dirvec index from objid, according to the way we generate them.
+ def _objidtodiridx(self, pid):
+ if not pid.startswith(self._idprefix):
+ raise Exception("folders.browse: bad pid %s" % pid)
+
+ if len(self._rcldocs) == 0:
+ raise Exception("folders:browse: no docs")
+
+ diridx = pid[len(self._idprefix):]
+ if not diridx:
+ diridx = 0
+ else:
+ if diridx[1] != 'd':
+ raise Exception("folders:browse: called on non dir objid %s" %
+ pid)
+ diridx = int(diridx[2:])
+
+ if diridx >= len(self._dirvec):
+ raise Exception("folders:browse: bad pid %s" % pid)
+
+ return diridx
+
+
+ # Tell the top module what entries we define in the root
+ def rootentries(self, pid):
+ return [rcldirentry(pid + 'folders', pid, '[folders]'),]
+
+
+ # Look all non-directory docs inside directory, and return the cover
+ # art we find.
+ def _arturifordir(self, diridx):
+ for nm,ids in self._dirvec[diridx].iteritems():
+ if ids[1] >= 0:
+ doc = self._rcldocs[ids[1]]
+ if doc.mtype != 'inode/directory' and doc.albumarturi:
+ return doc.albumarturi
+
+
+ # Folder hierarchy browse method.
+ # objid is like folders$index
+ # flag is meta or children.
+ def browse(self, pid, flag):
+
+ diridx = self._objidtodiridx(pid)
+
+ # If there is only one entry in root, skip it. This means that 0
+ # and 1 point to the same dir, but this does not seem to be an
+ # issue
+ if diridx == 0 and len(self._dirvec[0]) == 2:
+ diridx = 1
+
+ entries = []
+
+ # The basename call is just for diridx==0 (topdirs). Remove it if
+ # this proves a performance issue
+ for nm,ids in self._dirvec[diridx].iteritems():
+ if nm == "..":
+ continue
+ thisdiridx = ids[0]
+ thisdocidx = ids[1]
+ if thisdocidx >= 0:
+ doc = self._rcldocs[thisdocidx]
+ else:
+ uplog("No doc for %s" % pid)
+ doc = None
+
+ if thisdiridx >= 0:
+ # Skip empty directories
+ if len(self._dirvec[thisdiridx]) == 1:
+ continue
+ id = self._idprefix + '$' + 'd' + str(thisdiridx)
+ if doc and doc.albumarturi:
+ arturi = doc.albumarturi
+ else:
+ arturi = self._arturifordir(thisdiridx)
+ entries.append(rcldirentry(id, pid, os.path.basename(nm),
+ arturi=arturi))
+ else:
+ # Not a directory. docidx had better been set
+ if thisdocidx == -1:
+ uplog("folders:docidx -1 for non-dir entry %s"%nm)
+ continue
+ doc = self._rcldocs[thisdocidx]
+ id = self._idprefix + '$i' + str(thisdocidx)
+ e = rcldoctoentry(id, pid, self._httphp, self._pprefix, doc)
+ if e:
+ entries.append(e)
+
+ return sorted(entries, cmp=cmpentries)
+
+ # Return path for objid, which has to be a container.This is good old
+ # pwd... It is called from the search module for generating a 'dir:'
+ # recoll filtering directive.
+ def dirpath(self, objid):
+ # We may get called from search, on the top dir (above
+ # [folders]). Return empty in this case
try:
- decoded = url.decode('utf-8')
+ diridx = self._objidtodiridx(objid)
except:
- decoded = urllib.quote(url).decode('utf-8')
-
- # Determine the root entry (topdirs element). Special because
- # its path is not a simple name.
- fathidx = -1
- for rtpath,idx in dirvec[0].iteritems():
- if url.startswith(rtpath):
- fathidx = idx[0]
- break
- if fathidx == -1:
- uplog("No parent in topdirs: %s" % decoded)
- continue
-
- # Compute rest of path
- url1 = url[len(rtpath):]
- if len(url1) == 0:
- continue
-
- # If there is a contentgroup field, just add it as a virtual
- # directory in the path. This only affects the visible tree,
- # not the 'real' URLs of course.
- if doc.contentgroup:
- a = os.path.dirname(url1).decode('utf-8', errors='replace')
- b = os.path.basename(url1).decode('utf-8', errors='replace')
- url1 = os.path.join(a, doc.contentgroup, b)
-
- # Split path, then walk the vector, possibly creating
- # directory entries as needed
- path = url1.split('/')[1:]
- #uplog("%s"%path, file=sys.stderr)
- for idx in range(len(path)):
- elt = path[idx]
- if elt in dirvec[fathidx]:
- # This path element was already seen
- # If this is the last entry in the path, maybe update
- # the doc idx (previous entries were created for
- # intermediate elements without a Doc).
- if idx == len(path) -1:
- dirvec[fathidx][elt] = (dirvec[fathidx][elt][0], docidx)
- #uplog("updating docidx for %s" % decoded)
- # Update fathidx for next iteration
- fathidx = dirvec[fathidx][elt][0]
- else:
- # Element has no entry in father directory (hence no
- # dirvec entry either).
- if idx != len(path) -1:
- # This is an intermediate element. Create a
- # Doc-less directory
- fathidx = _createdir(dirvec, fathidx, -1, elt)
- else:
- # Last element. If directory, needs a dirvec entry
- if doc.mtype == 'inode/directory':
- fathidx = _createdir(dirvec, fathidx, docidx, elt)
- #uplog("Setting docidx for %s" % decoded)
- else:
- dirvec[fathidx][elt] = (-1, docidx)
-
- if False:
- for ent in dirvec:
- uplog("%s" % ent)
-
- end = timer()
- uplog("_rcl2folders took %.2f Seconds" % (end - start))
- return dirvec
-
-# Fetch all the docs by querying Recoll with [mime:*], which is
-# guaranteed to match every doc without overflowing the query size
-# (because the number of mime types is limited). Something like
-# title:* would overflow. This creates the main doc array, which is
-# then used by all modules.
-def _fetchalldocs(confdir):
- start = timer()
- allthedocs = []
-
- rcldb = recoll.connect(confdir=confdir)
- rclq = rcldb.query()
- rclq.execute("mime:*", stemming=0)
- uplog("Estimated alldocs query results: %d" % (rclq.rowcount))
-
- totcnt = 0
- while True:
- docs = rclq.fetchmany()
- for doc in docs:
- allthedocs.append(doc)
- totcnt += 1
- if (_maxrclcnt > 0 and totcnt >= _maxrclcnt) or \
- len(docs) != rclq.arraysize:
- break
- time.sleep(0)
- end = timer()
- uplog("Retrieved %d docs in %.2f Seconds" % (totcnt,end - start))
- return allthedocs
-
-
-# Initialize (read recoll data and build tree). This is called by
-# uprcl-app init
-def inittree(confdir, httphp, pathprefix):
- global g_alldocs, _dirvec
+ return ""
+
+ if diridx == 0:
+ return "/"
- g_alldocs = _fetchalldocs(confdir)
- _dirvec = _rcl2folders(g_alldocs, confdir, httphp, pathprefix)
- return g_alldocs
-
-
-
-##############
-# Browsing the initialized [folders] hierarchy
-
-
-# Extract dirvec index from objid, according to the way we generate them.
-def _objidtodiridx(pid):
- if not pid.startswith(_foldersIdPfx):
- raise Exception("folders.browse: bad pid %s" % pid)
-
- if len(g_alldocs) == 0:
- raise Exception("folders:browse: no docs")
-
- diridx = pid[len(_foldersIdPfx):]
- if not diridx:
- diridx = 0
- else:
- if diridx[1] != 'd':
- raise Exception("folders:browse: called on non dir objid %s" % pid)
- diridx = int(diridx[2:])
-
- if diridx >= len(_dirvec):
- raise Exception("folders:browse: bad pid %s" % pid)
-
- return diridx
-
-
-# Tell the top module what entries we define in the root
-def rootentries(pid):
- return [rcldirentry(pid + 'folders', pid, '[folders]'),]
-
-
-# Look all non-directory docs inside directory, and return the cover
-# art we find.
-def _arturifordir(diridx):
- for nm,ids in _dirvec[diridx].iteritems():
- if ids[1] >= 0:
- doc = g_alldocs[ids[1]]
- if doc.mtype != 'inode/directory' and doc.albumarturi:
- return doc.albumarturi
-
-
-# Folder hierarchy browse method.
-# objid is like folders$index
-# flag is meta or children.
-# httphp and pathprefix are used to generate URIs
-def browse(pid, flag, httphp, pathprefix):
-
- diridx = _objidtodiridx(pid)
-
- # If there is only one entry in root, skip it. This means that 0
- # and 1 point to the same dir, but this does not seem to be an
- # issue
- if diridx == 0 and len(dirvec[0]) == 2:
- diridx = 1
-
- entries = []
-
- # The basename call is just for diridx==0 (topdirs). Remove it if
- # this proves a performance issue
- for nm,ids in _dirvec[diridx].iteritems():
- if nm == "..":
- continue
- thisdiridx = ids[0]
- thisdocidx = ids[1]
- if thisdocidx >= 0:
- doc = g_alldocs[thisdocidx]
+ lpath = []
+ while True:
+ fathidx = self._dirvec[diridx][".."][0]
+ for nm, ids in self._dirvec[fathidx].iteritems():
+ if ids[0] == diridx:
+ lpath.append(nm)
+ break
+ diridx = fathidx
+ if diridx == 0:
+ break
+
+ if not lpath:
+ path = "/"
else:
- uplog("No doc for %s" % pid)
- doc = None
-
- if thisdiridx >= 0:
- # Skip empty directories
- if len(dirvec[thisdiridx]) == 1:
- continue
- id = _foldersIdPfx + '$' + 'd' + str(thisdiridx)
- if doc and doc.albumarturi:
- arturi = doc.albumarturi
- else:
- arturi = _arturifordir(thisdiridx)
- entries.append(rcldirentry(id, pid, os.path.basename(nm),
- arturi=arturi))
- else:
- # Not a directory. docidx had better been set
- if thisdocidx == -1:
- uplog("folders:docidx -1 for non-dir entry %s"%nm)
- continue
- doc = g_alldocs[thisdocidx]
- id = _foldersIdPfx + '$i' + str(thisdocidx)
- e = rcldoctoentry(id, pid, httphp, pathprefix, doc)
- if e:
- entries.append(e)
-
- return sorted(entries, cmp=cmpentries)
-
-# Return path for objid, which has to be a container.This is good old
-# pwd... It is called from the search module for generating a 'dir:'
-# recoll filtering directive.
-def dirpath(objid):
- # We may get called from search, on the top dir (above [folders]). Return
- # empty in this case
- try:
- diridx = _objidtodiridx(objid)
- except:
- return ""
-
- if diridx == 0:
- return "/"
-
- lpath = []
- while True:
- fathidx = _dirvec[diridx][".."][0]
- for nm, ids in _dirvec[fathidx].iteritems():
- if ids[0] == diridx:
- lpath.append(nm)
- break
- diridx = fathidx
- if diridx == 0:
- break
-
- if not lpath:
- path = "/"
- else:
- path = ""
- for elt in reversed(lpath):
- path += elt + "/"
-
- return path
+ path = ""
+ for elt in reversed(lpath):
+ path += elt + "/"
+
+ return path