upmpdcli / Code / Diff of /src/mediaserver/cdplugins/uprcl/uprclfolders.py

Diff of /src/mediaserver/cdplugins/uprcl/uprclfolders.py [a1b68f] .. [4d3794]

Switch to unified view


...
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Manage the [folders] section of the tree.
#
# Object Id prefix: 0$uprcl$folders
# 
# Obect id inside the section.
#    Container: $d<diridx> where <diridx> indexes into our directory vector.
#    Item: $i<docidx> where <docidx> indexex into the docs vector.
#
# Data structure:
#
# The _dirvec vector has one entry for each directory. Each entry is a
# dictionary, mapping the names inside the directory to a pair
# (diridx,docidx), where:
#  - diridx is an index into dirvec if the name is a directory, else -1
#  - docidx is the index of the doc inside the doc array, or -1 if:
#     - There is no doc entry, which could possibly happen if there is
#       no result for an intermediary element in a path,
#       because of some recoll issue, or because this is a synthetic
#       'contentgroup' entry.
#     - Or if the doc was not yet seen, the index will then be updated
#       when we see it.
#
# Each directory has a special ".." entry with a diridx pointing to
# the parent directory. This allows building a path from a container
# id (aka pwd).
#
# No need has emerged for a "." entry.
# 
# Entry 0 in _dirvec is special: it holds the 'topdirs' from the recoll
# configuration. The entries are paths instead of simple names, and
# the docidx is 0. The diridx points to a dirvec entry.


import os
import shlex
import urllib
import sys
...
# Debug : limit processed recoll entries for speed
_maxrclcnt = 0

_dirvec = []















# Create new directory entry: insert in father and append dirvec slot
# (with ".." entry)
def _createdir(dirvec, fathidx, docidx, nm):
    dirvec.append({})
    dirvec[fathidx][nm] = (len(dirvec) - 1, docidx)
    dirvec[-1][".."] = (fathidx, -1)
    return len(dirvec) - 1


# Walk the recoll docs array and split the URLs paths to build the
# [folders] data structure
def _rcl2folders(docs, confdir, httphp, pathprefix):
    global dirvec
    dirvec = []
    start = timer()

    rclconf = rclconfig.RclConfig(confdir)
    topdirs = [os.path.expanduser(d) for d in
               shlex.split(rclconf.getConfParam('topdirs'))]
    topdirs = [d.rstrip('/') for d in topdirs]

    # Create the 1st entry. This is special because it holds the
    # recoll topdirs, which are paths instead of simple names. There
    # does not seem any need to build the tree between a topdir and /
    dirvec.append({})
    dirvec[0][".."] = (0, -1)
    for d in topdirs:
        dirvec.append({})
        dirvec[0][d] = (len(dirvec)-1, -1)
        dirvec[-1][".."] = (0, -1)

    # Walk the doc list and update the directory tree according to the
    # url: create intermediary directories if needed, create leaf
    # entry.
    for docidx in range(len(docs)):
        doc = docs[docidx]
            
        # Possibly enrich the doc entry with a cover art uri.
        arturi = docarturi(doc, httphp, pathprefix)
        if arturi:
            # The uri is quoted, so it's ascii and we can just store
            # it as a doc attribute
            doc.albumarturi = arturi
...
            decoded = url.decode('utf-8')
        except:
            decoded = urllib.quote(url).decode('utf-8')

        # Determine the root entry (topdirs element). Special because
        # its path is not a simple name.
        fathidx = -1
        for rtpath,idx in dirvec[0].iteritems():
            if url.startswith(rtpath):
                fathidx = idx[0]
                break
...

    end = timer()
    uplog("_rcl2folders took %.2f Seconds" % (end - start))
    return dirvec

# Fetch all the docs by querying Recoll with [mime:*], which is
# guaranteed to match every doc without overflowing the query size
# (because the number of mime types is limited). Something like
# title:* would overflow. This creates the main doc array, which is
# then used by all modules.
def _fetchalldocs(confdir):
    start = timer()
    allthedocs = []

    rcldb = recoll.connect(confdir=confdir)
...
                entries.append(e)

    return sorted(entries, cmp=cmpentries)

# Return path for objid, which has to be a container.This is good old
# pwd... It is called from the search module for generating a 'dir:'
# recoll filtering directive.
def dirpath(objid):
    # We may get called from search, on the top dir (above [folders]). Return
    # empty in this case
    try:

	a/src/mediaserver/cdplugins/uprcl/uprclfolders.py		b/src/mediaserver/cdplugins/uprcl/uprclfolders.py
	...		...
11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	11	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12	# GNU General Public License for more details.	12	# GNU General Public License for more details.
13	#	13	#
14	# You should have received a copy of the GNU General Public License	14	# You should have received a copy of the GNU General Public License
15	# along with this program. If not, see <http://www.gnu.org/licenses/>.	15	# along with this program. If not, see <http://www.gnu.org/licenses/>.
		16
		17	# Manage the [folders] section of the tree.
		18	#
		19	# Object Id prefix: 0$uprcl$folders
		20	#
		21	# Obect id inside the section.
		22	# Container: $d<diridx> where <diridx> indexes into our directory vector.
		23	# Item: $i<docidx> where <docidx> indexex into the docs vector.
		24	#
		25	# Data structure:
		26	#
		27	# The _dirvec vector has one entry for each directory. Each entry is a
		28	# dictionary, mapping the names inside the directory to a pair
		29	# (diridx,docidx), where:
		30	# - diridx is an index into dirvec if the name is a directory, else -1
		31	# - docidx is the index of the doc inside the doc array, or -1 if:
		32	# - There is no doc entry, which could possibly happen if there is
		33	# no result for an intermediary element in a path,
		34	# because of some recoll issue, or because this is a synthetic
		35	# 'contentgroup' entry.
		36	# - Or if the doc was not yet seen, the index will then be updated
		37	# when we see it.
		38	#
		39	# Each directory has a special ".." entry with a diridx pointing to
		40	# the parent directory. This allows building a path from a container
		41	# id (aka pwd).
		42	#
		43	# No need has emerged for a "." entry.
		44	#
		45	# Entry 0 in _dirvec is special: it holds the 'topdirs' from the recoll
		46	# configuration. The entries are paths instead of simple names, and
		47	# the docidx is 0. The diridx points to a dirvec entry.
		48
16		49
17	import os	50	import os
18	import shlex	51	import shlex
19	import urllib	52	import urllib
20	import sys	53	import sys
	...		...
31	# Debug : limit processed recoll entries for speed	64	# Debug : limit processed recoll entries for speed
32	_maxrclcnt = 0	65	_maxrclcnt = 0
33		66
34	_dirvec = []	67	_dirvec = []
35		68
36	# Internal init: create the directory tree (folders view) from the doc
37	# array by splitting the url in each doc.
38	#
39	# The dirvec vector has one entry for each directory. Each entry is a
40	# dictionary, mapping the names inside the directory to a pair (i,j),
41	# where:
42	# - i is an index into dirvec if the name is a directory, else -1
43	# - j is the index of the doc inside the doc array (or -1 if there is no doc)
44	#
45	# Entry 0 in dirvec is special: it holds the 'topdirs' from the recoll
46	# configuration. The entries are paths instead of simple names, and
47	# the doc index (j) is 0. The dir index points normally to a dirvec
48	# entry.
49		69
50	# Create new directory entry: insert in father and append dirvec slot	70	# Create new directory entry: insert in father and append dirvec slot
51	# (with ".." entry)	71	# (with ".." entry)
52	def _createdir(dirvec, fathidx, docidx, nm):	72	def _createdir(dirvec, fathidx, docidx, nm):
53	dirvec.append({})	73	dirvec.append({})
54	dirvec[fathidx][nm] = (len(dirvec) - 1, docidx)	74	dirvec[fathidx][nm] = (len(dirvec) - 1, docidx)
55	dirvec[-1][".."] = (fathidx, -1)	75	dirvec[-1][".."] = (fathidx, -1)
56	return len(dirvec) - 1	76	return len(dirvec) - 1
57		77
		78
		79	# Walk the recoll docs array and split the URLs paths to build the
		80	# [folders] data structure
58	def _rcl2folders(docs, confdir, httphp, pathprefix):	81	def _rcl2folders(docs, confdir, httphp, pathprefix):
59	global dirvec	82	global dirvec
60	dirvec = []	83	dirvec = []
61	start = timer()	84	start = timer()
62		85
63	rclconf = rclconfig.RclConfig(confdir)	86	rclconf = rclconfig.RclConfig(confdir)
64	topdirs = [os.path.expanduser(d) for d in	87	topdirs = [os.path.expanduser(d) for d in
65	shlex.split(rclconf.getConfParam('topdirs'))]	88	shlex.split(rclconf.getConfParam('topdirs'))]
66	topdirs = [d.rstrip('/') for d in topdirs]	89	topdirs = [d.rstrip('/') for d in topdirs]
67		90
		91	# Create the 1st entry. This is special because it holds the
		92	# recoll topdirs, which are paths instead of simple names. There
		93	# does not seem any need to build the tree between a topdir and /
68	dirvec.append({})	94	dirvec.append({})
69	dirvec[0][".."] = (0, -1)	95	dirvec[0][".."] = (0, -1)
70	for d in topdirs:	96	for d in topdirs:
71	dirvec.append({})	97	dirvec.append({})
72	dirvec[0][d] = (len(dirvec)-1, -1)	98	dirvec[0][d] = (len(dirvec)-1, -1)
73	dirvec[-1][".."] = (0, -1)	99	dirvec[-1][".."] = (0, -1)
74		100
75	# Walk the doc list and update the directory tree according to the	101	# Walk the doc list and update the directory tree according to the
76	# url (create intermediary directories if needed, create leaf	102	# url: create intermediary directories if needed, create leaf
77	# entry	103	# entry.
78	for docidx in range(len(docs)):	104	for docidx in range(len(docs)):
79	doc = docs[docidx]	105	doc = docs[docidx]
80		106
		107	# Possibly enrich the doc entry with a cover art uri.
81	arturi = docarturi(doc, httphp, pathprefix)	108	arturi = docarturi(doc, httphp, pathprefix)
82	if arturi:	109	if arturi:
83	# The uri is quoted, so it's ascii and we can just store	110	# The uri is quoted, so it's ascii and we can just store
84	# it as a doc attribute	111	# it as a doc attribute
85	doc.albumarturi = arturi	112	doc.albumarturi = arturi
	...		...
94	decoded = url.decode('utf-8')	121	decoded = url.decode('utf-8')
95	except:	122	except:
96	decoded = urllib.quote(url).decode('utf-8')	123	decoded = urllib.quote(url).decode('utf-8')
97		124
98	# Determine the root entry (topdirs element). Special because	125	# Determine the root entry (topdirs element). Special because
99	# path not simple name	126	# its path is not a simple name.
100	fathidx = -1	127	fathidx = -1
101	for rtpath,idx in dirvec[0].iteritems():	128	for rtpath,idx in dirvec[0].iteritems():
102	if url.startswith(rtpath):	129	if url.startswith(rtpath):
103	fathidx = idx[0]	130	fathidx = idx[0]
104	break	131	break
	...		...
156		183
157	end = timer()	184	end = timer()
158	uplog("_rcl2folders took %.2f Seconds" % (end - start))	185	uplog("_rcl2folders took %.2f Seconds" % (end - start))
159	return dirvec	186	return dirvec
160		187
161	# Internal init: fetch all the docs by querying Recoll with [mime:*],	188	# Fetch all the docs by querying Recoll with [mime:*], which is
162	# which is guaranteed to match every doc without overflowing the query	189	# guaranteed to match every doc without overflowing the query size
163	# size (because the number of mime types is limited). Something like	190	# (because the number of mime types is limited). Something like
164	# title:* would overflow.	191	# title:* would overflow. This creates the main doc array, which is
		192	# then used by all modules.
165	def _fetchalldocs(confdir):	193	def _fetchalldocs(confdir):
166	start = timer()	194	start = timer()
167	allthedocs = []	195	allthedocs = []
168		196
169	rcldb = recoll.connect(confdir=confdir)	197	rcldb = recoll.connect(confdir=confdir)
	...		...
290	entries.append(e)	318	entries.append(e)
291		319
292	return sorted(entries, cmp=cmpentries)	320	return sorted(entries, cmp=cmpentries)
293		321
294	# Return path for objid, which has to be a container.This is good old	322	# Return path for objid, which has to be a container.This is good old
295	# pwd... It is called from the search module for generating a dir:	323	# pwd... It is called from the search module for generating a 'dir:'
296	# recoll filtering directive.	324	# recoll filtering directive.
297	def dirpath(objid):	325	def dirpath(objid):
298	# We may get called from search, on the top dir (above [folders]). Return	326	# We may get called from search, on the top dir (above [folders]). Return
299	# empty in this case	327	# empty in this case
300	try:	328	try: