|
a/src/mediaserver/cdplugins/uprcl/folders.py |
|
b/src/mediaserver/cdplugins/uprcl/folders.py |
1 |
from __future__ import print_function
|
|
|
2 |
|
1 |
|
3 |
import os
|
2 |
import os
|
4 |
import shlex
|
3 |
import shlex
|
5 |
import urllib
|
4 |
import urllib
|
6 |
import sys
|
5 |
import sys
|
|
|
6 |
|
|
|
7 |
from uprclutils import *
|
7 |
|
8 |
|
8 |
from recoll import recoll
|
9 |
from recoll import recoll
|
9 |
from recoll import rclconfig
|
10 |
from recoll import rclconfig
|
10 |
|
11 |
|
11 |
confdir = "/home/dockes/.recoll-mp3"
|
12 |
confdir = "/home/dockes/.recoll-mp3"
|
|
... |
|
... |
19 |
# splitting the url in each doc.
|
20 |
# splitting the url in each doc.
|
20 |
#
|
21 |
#
|
21 |
# The dirvec vector has one entry for each directory. Each entry is a
|
22 |
# The dirvec vector has one entry for each directory. Each entry is a
|
22 |
# dictionary, mapping the names inside the directory to a pair (i,j),
|
23 |
# dictionary, mapping the names inside the directory to a pair (i,j),
|
23 |
# where:
|
24 |
# where:
|
24 |
# - i is an index into dirvec if the name is a directory, else 0
|
25 |
# - i is an index into dirvec if the name is a directory, else -1
|
25 |
# - j is the index of the doc inside the doc array
|
26 |
# - j is the index of the doc inside the doc array (or -1 if there is no doc)
|
26 |
#
|
27 |
#
|
27 |
# Entry 0 in dirvec is special: it holds the 'topdirs' from the recoll
|
28 |
# Entry 0 in dirvec is special: it holds the 'topdirs' from the recoll
|
28 |
# configuration. The entries are paths instead of simple names, and
|
29 |
# configuration. The entries are paths instead of simple names, and
|
29 |
# the doc index (j) is 0. The dir index points normally to a dirvec
|
30 |
# the doc index (j) is 0. The dir index points normally to a dirvec
|
30 |
# entry.
|
31 |
# entry.
|
|
... |
|
... |
34 |
|
35 |
|
35 |
topidx = 0
|
36 |
topidx = 0
|
36 |
dirvec.append({})
|
37 |
dirvec.append({})
|
37 |
for d in topdirs:
|
38 |
for d in topdirs:
|
38 |
topidx += 1
|
39 |
topidx += 1
|
39 |
dirvec[0][d] = (topidx, 0)
|
40 |
dirvec[0][d] = (topidx, -1)
|
40 |
dirvec.append({})
|
41 |
dirvec.append({})
|
41 |
|
42 |
|
|
|
43 |
# Walk the doc list and update the directory tree according to the
|
|
|
44 |
# url (create intermediary directories if needed, create leaf
|
|
|
45 |
# entry
|
42 |
for docidx in range(len(docs)):
|
46 |
for docidx in range(len(docs)):
|
43 |
doc = docs[docidx]
|
47 |
doc = docs[docidx]
|
44 |
url = doc.getbinurl()
|
48 |
url = doc.getbinurl()
|
45 |
url = url[7:]
|
49 |
url = url[7:]
|
46 |
try:
|
50 |
try:
|
47 |
decoded = url.decode('utf-8')
|
51 |
decoded = url.decode('utf-8')
|
48 |
except:
|
52 |
except:
|
49 |
decoded = urllib.quote(url).decode('utf-8')
|
53 |
decoded = urllib.quote(url).decode('utf-8')
|
50 |
|
54 |
|
|
|
55 |
# Determine the root entry (topdirs element). Special because
|
|
|
56 |
# path not simple name
|
51 |
fathidx = -1
|
57 |
fathidx = -1
|
52 |
for rtpath,idx in dirvec[0].iteritems():
|
58 |
for rtpath,idx in dirvec[0].iteritems():
|
53 |
if url.startswith(rtpath):
|
59 |
if url.startswith(rtpath):
|
54 |
fathidx = idx[0]
|
60 |
fathidx = idx[0]
|
55 |
break
|
61 |
break
|
56 |
if fathidx == -1:
|
62 |
if fathidx == -1:
|
57 |
print("No parent in topdirs: %s" % decoded)
|
63 |
uplog("No parent in topdirs: %s" % decoded)
|
58 |
continue
|
64 |
continue
|
59 |
|
65 |
|
|
|
66 |
# Compute rest of path
|
60 |
url1 = url[len(rtpath):]
|
67 |
url1 = url[len(rtpath):]
|
61 |
if len(url1) == 0:
|
68 |
if len(url1) == 0:
|
62 |
continue
|
69 |
continue
|
63 |
|
70 |
|
|
|
71 |
# Split path, then walk the vector, possibly creating
|
|
|
72 |
# directory entries as needed
|
64 |
path = url1.split('/')[1:]
|
73 |
path = url1.split('/')[1:]
|
65 |
#print("%s"%path, file=sys.stderr)
|
74 |
#uplog("%s"%path, file=sys.stderr)
|
66 |
for idx in range(len(path)):
|
75 |
for idx in range(len(path)):
|
67 |
elt = path[idx]
|
76 |
elt = path[idx]
|
68 |
if elt in dirvec[fathidx]:
|
77 |
if elt in dirvec[fathidx]:
|
|
|
78 |
# This path element was already seen
|
|
|
79 |
# If this is the last entry in the path, maybe update
|
|
|
80 |
# the doc idx (previous entries were created for
|
|
|
81 |
# intermediate elements without a Doc).
|
|
|
82 |
#uplog("NEED TO UPDATE DOC")
|
|
|
83 |
dirvec[fathidx][elt] = (dirvec[fathidx][elt][0], docidx)
|
|
|
84 |
# Update fathidx for next iteration
|
69 |
fathidx = dirvec[fathidx][elt][0]
|
85 |
fathidx = dirvec[fathidx][elt][0]
|
70 |
else:
|
86 |
else:
|
71 |
if idx != len(path) -1 or doc.mtype == 'inode/directory':
|
87 |
# Element has no entry in father directory (hence no
|
|
|
88 |
# dirvec entry either).
|
|
|
89 |
if idx != len(path) -1:
|
|
|
90 |
# This is an intermediate element. Create a
|
|
|
91 |
# Doc-less directory
|
72 |
topidx += 1
|
92 |
topidx += 1
|
73 |
dirvec.append({})
|
93 |
dirvec.append({})
|
74 |
dirvec[fathidx][elt] = (topidx, docidx)
|
94 |
dirvec[fathidx][elt] = (topidx, -1)
|
75 |
fathidx = topidx
|
95 |
fathidx = topidx
|
76 |
else:
|
96 |
else:
|
|
|
97 |
# Last element. If directory, needs a dirvec entry
|
|
|
98 |
if doc.mtype == 'inode/directory':
|
|
|
99 |
topidx += 1
|
|
|
100 |
dirvec.append({})
|
77 |
dirvec[fathidx][elt] = (topidx, docidx)
|
101 |
dirvec[fathidx][elt] = (topidx, docidx)
|
|
|
102 |
fathidx = topidx
|
|
|
103 |
else:
|
|
|
104 |
dirvec[fathidx][elt] = (-1, docidx)
|
78 |
|
105 |
|
79 |
if False:
|
106 |
if False:
|
80 |
for ent in dirvec:
|
107 |
for ent in dirvec:
|
81 |
print("%s" % ent)
|
108 |
uplog("%s" % ent)
|
82 |
|
109 |
|
|
|
110 |
return dirvec
|
83 |
|
111 |
|
84 |
|
112 |
# Fetch all the docs by querying Recoll with [mime:*], which is
|
|
|
113 |
# guaranteed to match every doc without overflowing the query size
|
|
|
114 |
# (because the number of mime types is limited). Something like
|
|
|
115 |
# title:* would overflow.
|
85 |
def fetchalldocs(confdir):
|
116 |
def fetchalldocs(confdir):
|
86 |
global allthedocs
|
|
|
87 |
allthedocs = []
|
117 |
allthedocs = []
|
88 |
|
118 |
|
89 |
rcldb = recoll.connect(confdir=confdir)
|
119 |
rcldb = recoll.connect(confdir=confdir)
|
90 |
rclq = rcldb.query()
|
120 |
rclq = rcldb.query()
|
91 |
rclq.execute("mime:*", stemming=0)
|
121 |
rclq.execute("mime:*", stemming=0)
|
92 |
print("Estimated query results: %d" % (rclq.rowcount))
|
122 |
uplog("Estimated alldocs query results: %d" % (rclq.rowcount))
|
93 |
|
123 |
|
94 |
maxcnt = 0
|
124 |
maxcnt = 0
|
95 |
totcnt = 0
|
125 |
totcnt = 0
|
96 |
while True:
|
126 |
while True:
|
97 |
docs = rclq.fetchmany()
|
127 |
docs = rclq.fetchmany()
|
98 |
for doc in docs:
|
128 |
for doc in docs:
|
99 |
allthedocs.append(doc)
|
129 |
allthedocs.append(doc)
|
100 |
totcnt += 1
|
130 |
totcnt += 1
|
101 |
if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:
|
131 |
if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:
|
102 |
break
|
132 |
break
|
103 |
print("Retrieved %d docs" % (totcnt,))
|
133 |
uplog("Retrieved %d docs" % (totcnt,))
|
|
|
134 |
return allthedocs
|
104 |
|
135 |
|
105 |
fetchalldocs(confdir)
|
136 |
def inittree(confdir):
|
106 |
rcl2folders(allthedocs)
|
137 |
global g_alldocs, g_dirvec
|
|
|
138 |
|
|
|
139 |
g_alldocs = fetchalldocs(confdir)
|
|
|
140 |
g_dirvec = rcl2folders(g_alldocs)
|
107 |
|
141 |
|
108 |
print("%s" % dirvec[0])
|
142 |
|
109 |
print("%s" % dirvec[1])
|
143 |
g_myprefix = '0$uprcl$folders'
|
110 |
print("%s" % dirvec[2])
|
144 |
|
111 |
print("%s" % dirvec[3])
|
145 |
# objid is like folders$index
|
|
|
146 |
# flag is meta or children.
|
|
|
147 |
def browse(pid, flag):
|
|
|
148 |
global g_alldocs, g_dirvec
|
|
|
149 |
|
|
|
150 |
if not pid.startswith(g_myprefix):
|
|
|
151 |
uplog("folders.browse: bad pid %s" % pid)
|
|
|
152 |
return []
|
|
|
153 |
|
|
|
154 |
try:
|
|
|
155 |
len(g_alldocs)
|
|
|
156 |
except:
|
|
|
157 |
inittree(confdir)
|
|
|
158 |
|
|
|
159 |
if len(g_alldocs) == 0:
|
|
|
160 |
uplog("folders:browse: no docs")
|
|
|
161 |
return []
|
|
|
162 |
|
|
|
163 |
diridx = pid[len(g_myprefix):]
|
|
|
164 |
if not diridx:
|
|
|
165 |
diridx = 0
|
|
|
166 |
else:
|
|
|
167 |
diridx = int(diridx[1:])
|
|
|
168 |
|
|
|
169 |
if diridx >= len(g_dirvec):
|
|
|
170 |
uplog("folders:browse: bad pid %s" % pid)
|
|
|
171 |
return []
|
|
|
172 |
|
|
|
173 |
entries = []
|
|
|
174 |
Need to treat diridx 0 special (take simple paths)
|
|
|
175 |
for nm,ids in g_dirvec[diridx].iteritems():
|
|
|
176 |
id = g_myprefix + '$' + str(ids[0])
|
|
|
177 |
entries.append(rcldirentry(id, pid, nm))
|
|
|
178 |
|
|
|
179 |
return entries
|