|
a |
|
b/src/mediaserver/cdplugins/uprcl/folders.py |
|
|
1 |
from __future__ import print_function
|
|
|
2 |
|
|
|
3 |
import os
|
|
|
4 |
import shlex
|
|
|
5 |
import urllib
|
|
|
6 |
import sys
|
|
|
7 |
|
|
|
8 |
from recoll import recoll
|
|
|
9 |
from recoll import rclconfig
|
|
|
10 |
|
|
|
11 |
confdir = "/home/dockes/.recoll-mp3"
|
|
|
12 |
|
|
|
13 |
rclconf = rclconfig.RclConfig(confdir)
|
|
|
14 |
|
|
|
15 |
topdirs = [os.path.expanduser(d) for d in
|
|
|
16 |
shlex.split(rclconf.getConfParam('topdirs'))]
|
|
|
17 |
|
|
|
18 |
# Create the directory tree (folders view) from the doc array by
|
|
|
19 |
# splitting the url in each doc.
|
|
|
20 |
#
|
|
|
21 |
# The dirvec vector has one entry for each directory. Each entry is a
|
|
|
22 |
# dictionary, mapping the names inside the directory to a pair (i,j),
|
|
|
23 |
# where:
|
|
|
24 |
# - i is an index into dirvec if the name is a directory, else 0
|
|
|
25 |
# - j is the index of the doc inside the doc array
|
|
|
26 |
#
|
|
|
27 |
# Entry 0 in dirvec is special: it holds the 'topdirs' from the recoll
|
|
|
28 |
# configuration. The entries are paths instead of simple names, and
|
|
|
29 |
# the doc index (j) is 0. The dir index points normally to a dirvec
|
|
|
30 |
# entry.
|
|
|
31 |
def rcl2folders(docs):
|
|
|
32 |
global dirvec
|
|
|
33 |
dirvec = []
|
|
|
34 |
|
|
|
35 |
topidx = 0
|
|
|
36 |
dirvec.append({})
|
|
|
37 |
for d in topdirs:
|
|
|
38 |
topidx += 1
|
|
|
39 |
dirvec[0][d] = (topidx, 0)
|
|
|
40 |
dirvec.append({})
|
|
|
41 |
|
|
|
42 |
for docidx in range(len(docs)):
|
|
|
43 |
doc = docs[docidx]
|
|
|
44 |
url = doc.getbinurl()
|
|
|
45 |
url = url[7:]
|
|
|
46 |
try:
|
|
|
47 |
decoded = url.decode('utf-8')
|
|
|
48 |
except:
|
|
|
49 |
decoded = urllib.quote(url).decode('utf-8')
|
|
|
50 |
|
|
|
51 |
fathidx = -1
|
|
|
52 |
for rtpath,idx in dirvec[0].iteritems():
|
|
|
53 |
if url.startswith(rtpath):
|
|
|
54 |
fathidx = idx[0]
|
|
|
55 |
break
|
|
|
56 |
if fathidx == -1:
|
|
|
57 |
print("No parent in topdirs: %s" % decoded)
|
|
|
58 |
continue
|
|
|
59 |
|
|
|
60 |
url1 = url[len(rtpath):]
|
|
|
61 |
if len(url1) == 0:
|
|
|
62 |
continue
|
|
|
63 |
|
|
|
64 |
path = url1.split('/')[1:]
|
|
|
65 |
#print("%s"%path, file=sys.stderr)
|
|
|
66 |
for idx in range(len(path)):
|
|
|
67 |
elt = path[idx]
|
|
|
68 |
if elt in dirvec[fathidx]:
|
|
|
69 |
fathidx = dirvec[fathidx][elt][0]
|
|
|
70 |
else:
|
|
|
71 |
if idx != len(path) -1 or doc.mtype == 'inode/directory':
|
|
|
72 |
topidx += 1
|
|
|
73 |
dirvec.append({})
|
|
|
74 |
dirvec[fathidx][elt] = (topidx, docidx)
|
|
|
75 |
fathidx = topidx
|
|
|
76 |
else:
|
|
|
77 |
dirvec[fathidx][elt] = (topidx, docidx)
|
|
|
78 |
|
|
|
79 |
if False:
|
|
|
80 |
for ent in dirvec:
|
|
|
81 |
print("%s" % ent)
|
|
|
82 |
|
|
|
83 |
|
|
|
84 |
|
|
|
85 |
def fetchalldocs(confdir):
|
|
|
86 |
global allthedocs
|
|
|
87 |
allthedocs = []
|
|
|
88 |
|
|
|
89 |
rcldb = recoll.connect(confdir=confdir)
|
|
|
90 |
rclq = rcldb.query()
|
|
|
91 |
rclq.execute("mime:*", stemming=0)
|
|
|
92 |
print("Estimated query results: %d" % (rclq.rowcount))
|
|
|
93 |
|
|
|
94 |
maxcnt = 0
|
|
|
95 |
totcnt = 0
|
|
|
96 |
while True:
|
|
|
97 |
docs = rclq.fetchmany()
|
|
|
98 |
for doc in docs:
|
|
|
99 |
allthedocs.append(doc)
|
|
|
100 |
totcnt += 1
|
|
|
101 |
if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:
|
|
|
102 |
break
|
|
|
103 |
print("Retrieved %d docs" % (totcnt,))
|
|
|
104 |
|
|
|
105 |
fetchalldocs(confdir)
|
|
|
106 |
rcl2folders(allthedocs)
|
|
|
107 |
|
|
|
108 |
print("%s" % dirvec[0])
|
|
|
109 |
print("%s" % dirvec[1])
|
|
|
110 |
print("%s" % dirvec[2])
|
|
|
111 |
print("%s" % dirvec[3])
|