a b/src/mediaserver/cdplugins/uprcl/folders.py
1
from __future__ import print_function
2
3
import os
4
import shlex
5
import urllib
6
import sys
7
8
from recoll import recoll
9
from recoll import rclconfig
10
11
confdir = "/home/dockes/.recoll-mp3"
12
13
rclconf = rclconfig.RclConfig(confdir)
14
15
topdirs = [os.path.expanduser(d) for d in
16
           shlex.split(rclconf.getConfParam('topdirs'))]
17
18
# Create the directory tree (folders view) from the doc array by
19
# splitting the url in each doc.
20
#
21
# The dirvec vector has one entry for each directory. Each entry is a
22
# dictionary, mapping the names inside the directory to a pair (i,j),
23
# where:
24
#  - i is an index into dirvec if the name is a directory, else 0
25
#  - j is the index of the doc inside the doc array
26
#
27
# Entry 0 in dirvec is special: it holds the 'topdirs' from the recoll
28
# configuration. The entries are paths instead of simple names, and
29
# the doc index (j) is 0. The dir index points normally to a dirvec
30
# entry.
31
def rcl2folders(docs):
32
    global dirvec
33
    dirvec = []
34
35
    topidx = 0
36
    dirvec.append({})
37
    for d in topdirs:
38
        topidx += 1
39
        dirvec[0][d] = (topidx, 0)
40
        dirvec.append({})
41
42
    for docidx in range(len(docs)):
43
        doc = docs[docidx]
44
        url = doc.getbinurl()
45
        url = url[7:]
46
        try:
47
            decoded = url.decode('utf-8')
48
        except:
49
            decoded = urllib.quote(url).decode('utf-8')
50
51
        fathidx = -1
52
        for rtpath,idx in dirvec[0].iteritems():
53
            if url.startswith(rtpath):
54
                fathidx = idx[0]
55
                break
56
        if fathidx == -1:
57
            print("No parent in topdirs: %s" % decoded)
58
            continue
59
60
        url1 = url[len(rtpath):]
61
        if len(url1) == 0:
62
            continue
63
64
        path = url1.split('/')[1:]
65
        #print("%s"%path, file=sys.stderr)
66
        for idx in range(len(path)):
67
            elt = path[idx]
68
            if elt in dirvec[fathidx]:
69
                fathidx = dirvec[fathidx][elt][0]
70
            else:
71
                if idx != len(path) -1 or doc.mtype == 'inode/directory':
72
                    topidx += 1
73
                    dirvec.append({})
74
                    dirvec[fathidx][elt] = (topidx, docidx)
75
                    fathidx = topidx
76
                else:
77
                    dirvec[fathidx][elt] = (topidx, docidx)
78
79
    if False:
80
        for ent in dirvec:
81
            print("%s" % ent)
82
83
84
85
def fetchalldocs(confdir):
86
    global allthedocs
87
    allthedocs = []
88
89
    rcldb = recoll.connect(confdir=confdir)
90
    rclq = rcldb.query()
91
    rclq.execute("mime:*", stemming=0)
92
    print("Estimated query results: %d" % (rclq.rowcount))
93
94
    maxcnt = 0
95
    totcnt = 0
96
    while True:
97
        docs = rclq.fetchmany()
98
        for doc in docs:
99
            allthedocs.append(doc)
100
            totcnt += 1
101
        if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:
102
            break
103
    print("Retrieved %d docs" % (totcnt,))
104
105
fetchalldocs(confdir)
106
rcl2folders(allthedocs)
107
108
print("%s" % dirvec[0])
109
print("%s" % dirvec[1])
110
print("%s" % dirvec[2])
111
print("%s" % dirvec[3])