u/joerg-krause / upmpdcli - Code / Diff of /src/mediaserver/cdplugins/uprcl/uprclfolders.py

Diff of /src/mediaserver/cdplugins/uprcl/uprclfolders.py [4c66fb] .. [63b8dd]

Switch to side-by-side view

--- a/src/mediaserver/cdplugins/uprcl/uprclfolders.py
+++ b/src/mediaserver/cdplugins/uprcl/uprclfolders.py
@@ -46,7 +46,6 @@
 # configuration. The entries are paths instead of simple names, and
 # the docidx is 0. The diridx points to a dirvec entry.
 
-
 import os
 import shlex
 import urllib
@@ -60,296 +59,288 @@
 from recoll import recoll
 from recoll import rclconfig
 
-_foldersIdPfx = '0$uprcl$folders'
-
-# Debug : limit processed recoll entries for speed
-_maxrclcnt = 0
-
-_dirvec = []
-
-
-# Create new directory entry: insert in father and append dirvec slot
-# (with ".." entry)
-def _createdir(dirvec, fathidx, docidx, nm):
-    dirvec.append({})
-    dirvec[fathidx][nm] = (len(dirvec) - 1, docidx)
-    dirvec[-1][".."] = (fathidx, -1)
-    return len(dirvec) - 1
-
-
-# Walk the recoll docs array and split the URLs paths to build the
-# [folders] data structure
-def _rcl2folders(docs, confdir, httphp, pathprefix):
-    global dirvec
-    dirvec = []
-    start = timer()
-
-    rclconf = rclconfig.RclConfig(confdir)
-    topdirs = [os.path.expanduser(d) for d in
-               shlex.split(rclconf.getConfParam('topdirs'))]
-    topdirs = [d.rstrip('/') for d in topdirs]
-
-    # Create the 1st entry. This is special because it holds the
-    # recoll topdirs, which are paths instead of simple names. There
-    # does not seem any need to build the tree between a topdir and /
-    dirvec.append({})
-    dirvec[0][".."] = (0, -1)
-    for d in topdirs:
-        dirvec.append({})
-        dirvec[0][d] = (len(dirvec)-1, -1)
-        dirvec[-1][".."] = (0, -1)
-
-    # Walk the doc list and update the directory tree according to the
-    # url: create intermediary directories if needed, create leaf
-    # entry.
-    for docidx in range(len(docs)):
-        doc = docs[docidx]
+class Folders(object):
+
+    # Initialize (read recoll data and build tree).
+    def __init__(self, confdir, httphp, pathprefix):
+        self._idprefix = '0$uprcl$folders'
+        self._httphp = httphp
+        self._pprefix = pathprefix
+        # Debug : limit processed recoll entries for speed
+        self._maxrclcnt = 0
+        self._fetchalldocs(confdir)
+        self._rcl2folders(confdir)
+
+    def rcldocs(self):
+        return self._rcldocs
+    
+    # Create new directory entry: insert in father and append dirvec slot
+    # (with ".." entry)
+    def _createdir(self, fathidx, docidx, nm):
+        self._dirvec.append({})
+        self._dirvec[fathidx][nm] = (len(self._dirvec) - 1, docidx)
+        self._dirvec[-1][".."] = (fathidx, -1)
+        return len(self._dirvec) - 1
+
+
+    # Walk the recoll docs array and split the URLs paths to build the
+    # [folders] data structure
+    def _rcl2folders(self, confdir):
+        self._dirvec = []
+        start = timer()
+
+        rclconf = rclconfig.RclConfig(confdir)
+        topdirs = [os.path.expanduser(d) for d in
+                   shlex.split(rclconf.getConfParam('topdirs'))]
+        topdirs = [d.rstrip('/') for d in topdirs]
+
+        # Create the 1st entry. This is special because it holds the
+        # recoll topdirs, which are paths instead of simple names. There
+        # does not seem any need to build the tree between a topdir and /
+        self._dirvec.append({})
+        self._dirvec[0][".."] = (0, -1)
+        for d in topdirs:
+            self._dirvec.append({})
+            self._dirvec[0][d] = (len(self._dirvec)-1, -1)
+            self._dirvec[-1][".."] = (0, -1)
+
+        # Walk the doc list and update the directory tree according to the
+        # url: create intermediary directories if needed, create leaf
+        # entry.
+        for docidx in range(len(self._rcldocs)):
+            doc = self._rcldocs[docidx]
             
-        # Possibly enrich the doc entry with a cover art uri.
-        arturi = docarturi(doc, httphp, pathprefix)
-        if arturi:
-            # The uri is quoted, so it's ascii and we can just store
-            # it as a doc attribute
-            doc.albumarturi = arturi
-
-        # No need to include non-audio types in the visible tree.
-        if doc.mtype not in audiomtypes:
-            continue
-
-        url = doc.getbinurl()
-        url = url[7:]
+            # Possibly enrich the doc entry with a cover art uri.
+            arturi = docarturi(doc, self._httphp, self._pprefix)
+            if arturi:
+                # The uri is quoted, so it's ascii and we can just store
+                # it as a doc attribute
+                doc.albumarturi = arturi
+
+            # No need to include non-audio types in the visible tree.
+            if doc.mtype not in audiomtypes:
+                continue
+
+            url = doc.getbinurl()
+            url = url[7:]
+            try:
+                decoded = url.decode('utf-8')
+            except:
+                decoded = urllib.quote(url).decode('utf-8')
+
+            # Determine the root entry (topdirs element). Special because
+            # its path is not a simple name.
+            fathidx = -1
+            for rtpath,idx in self._dirvec[0].iteritems():
+                if url.startswith(rtpath):
+                    fathidx = idx[0]
+                    break
+            if fathidx == -1:
+                uplog("No parent in topdirs: %s" % decoded)
+                continue
+
+            # Compute rest of path
+            url1 = url[len(rtpath):]
+            if len(url1) == 0:
+                continue
+
+            # If there is a contentgroup field, just add it as a virtual
+            # directory in the path. This only affects the visible tree,
+            # not the 'real' URLs of course.
+            if doc.contentgroup:
+                a = os.path.dirname(url1).decode('utf-8', errors='replace')
+                b = os.path.basename(url1).decode('utf-8', errors='replace')
+                url1 = os.path.join(a, doc.contentgroup, b)
+            
+            # Split path, then walk the vector, possibly creating
+            # directory entries as needed
+            path = url1.split('/')[1:]
+            #uplog("%s"%path, file=sys.stderr)
+            for idx in range(len(path)):
+                elt = path[idx]
+                if elt in self._dirvec[fathidx]:
+                    # This path element was already seen
+                    # If this is the last entry in the path, maybe update
+                    # the doc idx (previous entries were created for
+                    # intermediate elements without a Doc).
+                    if idx == len(path) -1:
+                        self._dirvec[fathidx][elt] = (self._dirvec[fathidx][elt][0], docidx)
+                        #uplog("updating docidx for %s" % decoded)
+                    # Update fathidx for next iteration
+                    fathidx = self._dirvec[fathidx][elt][0]
+                else:
+                    # Element has no entry in father directory (hence no
+                    # self._dirvec entry either).
+                    if idx != len(path) -1:
+                        # This is an intermediate element. Create a
+                        # Doc-less directory
+                        fathidx = self._createdir(fathidx, -1, elt)
+                    else:
+                        # Last element. If directory, needs a self._dirvec entry
+                        if doc.mtype == 'inode/directory':
+                            fathidx = self._createdir(fathidx, docidx, elt)
+                            #uplog("Setting docidx for %s" % decoded)
+                        else:
+                            self._dirvec[fathidx][elt] = (-1, docidx)
+
+        if False:
+            for ent in self._dirvec:
+                uplog("%s" % ent)
+
+        end = timer()
+        uplog("_rcl2folders took %.2f Seconds" % (end - start))
+
+    # Fetch all the docs by querying Recoll with [mime:*], which is
+    # guaranteed to match every doc without overflowing the query size
+    # (because the number of mime types is limited). Something like
+    # title:* would overflow. This creates the main doc array, which is
+    # then used by all modules.
+    def _fetchalldocs(self, confdir):
+        start = timer()
+
+        rcldb = recoll.connect(confdir=confdir)
+        rclq = rcldb.query()
+        rclq.execute("mime:*", stemming=0)
+        uplog("Estimated alldocs query results: %d" % (rclq.rowcount))
+
+        totcnt = 0
+        self._rcldocs = []
+        while True:
+            docs = rclq.fetchmany()
+            for doc in docs:
+                self._rcldocs.append(doc)
+                totcnt += 1
+            if (self._maxrclcnt > 0 and totcnt >= self._maxrclcnt) or \
+                   len(docs) != rclq.arraysize:
+                break
+            time.sleep(0)
+        end = timer()
+        uplog("Retrieved %d docs in %.2f Seconds" % (totcnt,end - start))
+
+
+    ##############
+    # Browsing the initialized [folders] hierarchy
+
+    # Extract dirvec index from objid, according to the way we generate them.
+    def _objidtodiridx(self, pid):
+        if not pid.startswith(self._idprefix):
+            raise Exception("folders.browse: bad pid %s" % pid)
+
+        if len(self._rcldocs) == 0:
+            raise Exception("folders:browse: no docs")
+
+        diridx = pid[len(self._idprefix):]
+        if not diridx:
+            diridx = 0
+        else:
+            if diridx[1] != 'd':
+                raise Exception("folders:browse: called on non dir objid %s" %
+                                pid)
+            diridx = int(diridx[2:])
+            
+        if diridx >= len(self._dirvec):
+            raise Exception("folders:browse: bad pid %s" % pid)
+
+        return diridx
+
+
+    # Tell the top module what entries we define in the root
+    def rootentries(self, pid):
+        return [rcldirentry(pid + 'folders', pid, '[folders]'),]
+
+
+    # Look all non-directory docs inside directory, and return the cover
+    # art we find.
+    def _arturifordir(self, diridx):
+        for nm,ids in self._dirvec[diridx].iteritems():
+            if ids[1] >= 0:
+                doc = self._rcldocs[ids[1]]
+                if doc.mtype != 'inode/directory' and doc.albumarturi:
+                    return doc.albumarturi
+              
+
+    # Folder hierarchy browse method.
+    # objid is like folders$index
+    # flag is meta or children.
+    def browse(self, pid, flag):
+
+        diridx = self._objidtodiridx(pid)
+
+        # If there is only one entry in root, skip it. This means that 0
+        # and 1 point to the same dir, but this does not seem to be an
+        # issue
+        if diridx == 0 and len(self._dirvec[0]) == 2:
+            diridx = 1
+        
+        entries = []
+
+        # The basename call is just for diridx==0 (topdirs). Remove it if
+        # this proves a performance issue
+        for nm,ids in self._dirvec[diridx].iteritems():
+            if nm == "..":
+                continue
+            thisdiridx = ids[0]
+            thisdocidx = ids[1]
+            if thisdocidx >= 0:
+                doc = self._rcldocs[thisdocidx]
+            else:
+                uplog("No doc for %s" % pid)
+                doc = None
+            
+            if thisdiridx >= 0:
+                # Skip empty directories
+                if len(self._dirvec[thisdiridx]) == 1:
+                    continue
+                id = self._idprefix + '$' + 'd' + str(thisdiridx)
+                if doc and doc.albumarturi:
+                    arturi = doc.albumarturi
+                else:
+                    arturi = self._arturifordir(thisdiridx)
+                entries.append(rcldirentry(id, pid, os.path.basename(nm),
+                                           arturi=arturi))
+            else:
+                # Not a directory. docidx had better been set
+                if thisdocidx == -1:
+                    uplog("folders:docidx -1 for non-dir entry %s"%nm)
+                    continue
+                doc = self._rcldocs[thisdocidx]
+                id = self._idprefix + '$i' + str(thisdocidx)
+                e = rcldoctoentry(id, pid, self._httphp, self._pprefix, doc)
+                if e:
+                    entries.append(e)
+
+        return sorted(entries, cmp=cmpentries)
+
+    # Return path for objid, which has to be a container.This is good old
+    # pwd... It is called from the search module for generating a 'dir:'
+    # recoll filtering directive.
+    def dirpath(self, objid):
+        # We may get called from search, on the top dir (above
+        # [folders]). Return empty in this case
         try:
-            decoded = url.decode('utf-8')
+            diridx = self._objidtodiridx(objid)
         except:
-            decoded = urllib.quote(url).decode('utf-8')
-
-        # Determine the root entry (topdirs element). Special because
-        # its path is not a simple name.
-        fathidx = -1
-        for rtpath,idx in dirvec[0].iteritems():
-            if url.startswith(rtpath):
-                fathidx = idx[0]
-                break
-        if fathidx == -1:
-            uplog("No parent in topdirs: %s" % decoded)
-            continue
-
-        # Compute rest of path
-        url1 = url[len(rtpath):]
-        if len(url1) == 0:
-            continue
-
-        # If there is a contentgroup field, just add it as a virtual
-        # directory in the path. This only affects the visible tree,
-        # not the 'real' URLs of course.
-        if doc.contentgroup:
-            a = os.path.dirname(url1).decode('utf-8', errors='replace')
-            b = os.path.basename(url1).decode('utf-8', errors='replace')
-            url1 = os.path.join(a, doc.contentgroup, b)
-            
-        # Split path, then walk the vector, possibly creating
-        # directory entries as needed
-        path = url1.split('/')[1:]
-        #uplog("%s"%path, file=sys.stderr)
-        for idx in range(len(path)):
-            elt = path[idx]
-            if elt in dirvec[fathidx]:
-                # This path element was already seen
-                # If this is the last entry in the path, maybe update
-                # the doc idx (previous entries were created for
-                # intermediate elements without a Doc).
-                if idx == len(path) -1:
-                    dirvec[fathidx][elt] = (dirvec[fathidx][elt][0], docidx)
-                    #uplog("updating docidx for %s" % decoded)
-                # Update fathidx for next iteration
-                fathidx = dirvec[fathidx][elt][0]
-            else:
-                # Element has no entry in father directory (hence no
-                # dirvec entry either).
-                if idx != len(path) -1:
-                    # This is an intermediate element. Create a
-                    # Doc-less directory
-                    fathidx = _createdir(dirvec, fathidx, -1, elt)
-                else:
-                    # Last element. If directory, needs a dirvec entry
-                    if doc.mtype == 'inode/directory':
-                        fathidx = _createdir(dirvec, fathidx, docidx, elt)
-                        #uplog("Setting docidx for %s" % decoded)
-                    else:
-                        dirvec[fathidx][elt] = (-1, docidx)
-
-    if False:
-        for ent in dirvec:
-            uplog("%s" % ent)
-
-    end = timer()
-    uplog("_rcl2folders took %.2f Seconds" % (end - start))
-    return dirvec
-
-# Fetch all the docs by querying Recoll with [mime:*], which is
-# guaranteed to match every doc without overflowing the query size
-# (because the number of mime types is limited). Something like
-# title:* would overflow. This creates the main doc array, which is
-# then used by all modules.
-def _fetchalldocs(confdir):
-    start = timer()
-    allthedocs = []
-
-    rcldb = recoll.connect(confdir=confdir)
-    rclq = rcldb.query()
-    rclq.execute("mime:*", stemming=0)
-    uplog("Estimated alldocs query results: %d" % (rclq.rowcount))
-
-    totcnt = 0
-    while True:
-        docs = rclq.fetchmany()
-        for doc in docs:
-            allthedocs.append(doc)
-            totcnt += 1
-        if (_maxrclcnt > 0 and totcnt >= _maxrclcnt) or \
-               len(docs) != rclq.arraysize:
-            break
-        time.sleep(0)
-    end = timer()
-    uplog("Retrieved %d docs in %.2f Seconds" % (totcnt,end - start))
-    return allthedocs
-
-
-# Initialize (read recoll data and build tree). This is called by
-# uprcl-app init
-def inittree(confdir, httphp, pathprefix):
-    global g_alldocs, _dirvec
+            return ""
+
+        if diridx == 0:
+            return "/"
     
-    g_alldocs = _fetchalldocs(confdir)
-    _dirvec = _rcl2folders(g_alldocs, confdir, httphp, pathprefix)
-    return g_alldocs
-
-
-
-##############
-# Browsing the initialized [folders] hierarchy
-
-
-# Extract dirvec index from objid, according to the way we generate them.
-def _objidtodiridx(pid):
-    if not pid.startswith(_foldersIdPfx):
-        raise Exception("folders.browse: bad pid %s" % pid)
-
-    if len(g_alldocs) == 0:
-        raise Exception("folders:browse: no docs")
-
-    diridx = pid[len(_foldersIdPfx):]
-    if not diridx:
-        diridx = 0
-    else:
-        if diridx[1] != 'd':
-            raise Exception("folders:browse: called on non dir objid %s" % pid)
-        diridx = int(diridx[2:])
-    
-    if diridx >= len(_dirvec):
-        raise Exception("folders:browse: bad pid %s" % pid)
-
-    return diridx
-
-
-# Tell the top module what entries we define in the root
-def rootentries(pid):
-    return [rcldirentry(pid + 'folders', pid, '[folders]'),]
-
-
-# Look all non-directory docs inside directory, and return the cover
-# art we find.
-def _arturifordir(diridx):
-    for nm,ids in _dirvec[diridx].iteritems():
-        if ids[1] >= 0:
-            doc = g_alldocs[ids[1]]
-            if doc.mtype != 'inode/directory' and doc.albumarturi:
-                return doc.albumarturi
-              
-
-# Folder hierarchy browse method.
-# objid is like folders$index
-# flag is meta or children.
-# httphp and pathprefix are used to generate URIs
-def browse(pid, flag, httphp, pathprefix):
-
-    diridx = _objidtodiridx(pid)
-
-    # If there is only one entry in root, skip it. This means that 0
-    # and 1 point to the same dir, but this does not seem to be an
-    # issue
-    if diridx == 0 and len(dirvec[0]) == 2:
-        diridx = 1
-        
-    entries = []
-
-    # The basename call is just for diridx==0 (topdirs). Remove it if
-    # this proves a performance issue
-    for nm,ids in _dirvec[diridx].iteritems():
-        if nm == "..":
-            continue
-        thisdiridx = ids[0]
-        thisdocidx = ids[1]
-        if thisdocidx >= 0:
-            doc = g_alldocs[thisdocidx]
+        lpath = []
+        while True:
+            fathidx = self._dirvec[diridx][".."][0]
+            for nm, ids in self._dirvec[fathidx].iteritems():
+                if ids[0] == diridx:
+                    lpath.append(nm)
+                    break
+                diridx = fathidx
+                if diridx == 0:
+                    break
+
+        if not lpath:
+            path = "/"
         else:
-            uplog("No doc for %s" % pid)
-            doc = None
-            
-        if thisdiridx >= 0:
-            # Skip empty directories
-            if len(dirvec[thisdiridx]) == 1:
-                continue
-            id = _foldersIdPfx + '$' + 'd' + str(thisdiridx)
-            if doc and doc.albumarturi:
-                arturi = doc.albumarturi
-            else:
-                arturi = _arturifordir(thisdiridx)
-            entries.append(rcldirentry(id, pid, os.path.basename(nm),
-                                       arturi=arturi))
-        else:
-            # Not a directory. docidx had better been set
-            if thisdocidx == -1:
-                uplog("folders:docidx -1 for non-dir entry %s"%nm)
-                continue
-            doc = g_alldocs[thisdocidx]
-            id = _foldersIdPfx + '$i' + str(thisdocidx)
-            e = rcldoctoentry(id, pid, httphp, pathprefix, doc)
-            if e:
-                entries.append(e)
-
-    return sorted(entries, cmp=cmpentries)
-
-# Return path for objid, which has to be a container.This is good old
-# pwd... It is called from the search module for generating a 'dir:'
-# recoll filtering directive.
-def dirpath(objid):
-    # We may get called from search, on the top dir (above [folders]). Return
-    # empty in this case
-    try:
-        diridx = _objidtodiridx(objid)
-    except:
-        return ""
-
-    if diridx == 0:
-        return "/"
-    
-    lpath = []
-    while True:
-        fathidx = _dirvec[diridx][".."][0]
-        for nm, ids in _dirvec[fathidx].iteritems():
-            if ids[0] == diridx:
-                lpath.append(nm)
-                break
-        diridx = fathidx
-        if diridx == 0:
-            break
-
-    if not lpath:
-        path = "/"
-    else:
-        path = ""
-    for elt in reversed(lpath):
-        path += elt + "/"
-
-    return path
+            path = ""
+        for elt in reversed(lpath):
+            path += elt + "/"
+
+        return path