a/src/mediaserver/cdplugins/uprcl/folders.py b/src/mediaserver/cdplugins/uprcl/folders.py
1
from __future__ import print_function
2
1
3
import os
2
import os
4
import shlex
3
import shlex
5
import urllib
4
import urllib
6
import sys
5
import sys
6
7
from uprclutils import *
7
8
8
from recoll import recoll
9
from recoll import recoll
9
from recoll import rclconfig
10
from recoll import rclconfig
10
11
11
confdir = "/home/dockes/.recoll-mp3"
12
confdir = "/home/dockes/.recoll-mp3"
...
...
19
# splitting the url in each doc.
20
# splitting the url in each doc.
20
#
21
#
21
# The dirvec vector has one entry for each directory. Each entry is a
22
# The dirvec vector has one entry for each directory. Each entry is a
22
# dictionary, mapping the names inside the directory to a pair (i,j),
23
# dictionary, mapping the names inside the directory to a pair (i,j),
23
# where:
24
# where:
24
#  - i is an index into dirvec if the name is a directory, else 0
25
#  - i is an index into dirvec if the name is a directory, else -1
25
#  - j is the index of the doc inside the doc array
26
#  - j is the index of the doc inside the doc array (or -1 if there is no doc)
26
#
27
#
27
# Entry 0 in dirvec is special: it holds the 'topdirs' from the recoll
28
# Entry 0 in dirvec is special: it holds the 'topdirs' from the recoll
28
# configuration. The entries are paths instead of simple names, and
29
# configuration. The entries are paths instead of simple names, and
29
# the doc index (j) is 0. The dir index points normally to a dirvec
30
# the doc index (j) is 0. The dir index points normally to a dirvec
30
# entry.
31
# entry.
...
...
34
35
35
    topidx = 0
36
    topidx = 0
36
    dirvec.append({})
37
    dirvec.append({})
37
    for d in topdirs:
38
    for d in topdirs:
38
        topidx += 1
39
        topidx += 1
39
        dirvec[0][d] = (topidx, 0)
40
        dirvec[0][d] = (topidx, -1)
40
        dirvec.append({})
41
        dirvec.append({})
41
42
43
    # Walk the doc list and update the directory tree according to the
44
    # url (create intermediary directories if needed, create leaf
45
    # entry
42
    for docidx in range(len(docs)):
46
    for docidx in range(len(docs)):
43
        doc = docs[docidx]
47
        doc = docs[docidx]
44
        url = doc.getbinurl()
48
        url = doc.getbinurl()
45
        url = url[7:]
49
        url = url[7:]
46
        try:
50
        try:
47
            decoded = url.decode('utf-8')
51
            decoded = url.decode('utf-8')
48
        except:
52
        except:
49
            decoded = urllib.quote(url).decode('utf-8')
53
            decoded = urllib.quote(url).decode('utf-8')
50
54
55
        # Determine the root entry (topdirs element). Special because
56
        # path not simple name
51
        fathidx = -1
57
        fathidx = -1
52
        for rtpath,idx in dirvec[0].iteritems():
58
        for rtpath,idx in dirvec[0].iteritems():
53
            if url.startswith(rtpath):
59
            if url.startswith(rtpath):
54
                fathidx = idx[0]
60
                fathidx = idx[0]
55
                break
61
                break
56
        if fathidx == -1:
62
        if fathidx == -1:
57
            print("No parent in topdirs: %s" % decoded)
63
            uplog("No parent in topdirs: %s" % decoded)
58
            continue
64
            continue
59
65
66
        # Compute rest of path
60
        url1 = url[len(rtpath):]
67
        url1 = url[len(rtpath):]
61
        if len(url1) == 0:
68
        if len(url1) == 0:
62
            continue
69
            continue
63
70
71
        # Split path, then walk the vector, possibly creating
72
        # directory entries as needed
64
        path = url1.split('/')[1:]
73
        path = url1.split('/')[1:]
65
        #print("%s"%path, file=sys.stderr)
74
        #uplog("%s"%path, file=sys.stderr)
66
        for idx in range(len(path)):
75
        for idx in range(len(path)):
67
            elt = path[idx]
76
            elt = path[idx]
68
            if elt in dirvec[fathidx]:
77
            if elt in dirvec[fathidx]:
78
                # This path element was already seen
79
                # If this is the last entry in the path, maybe update
80
                # the doc idx (previous entries were created for
81
                # intermediate elements without a Doc).
82
                #uplog("NEED TO UPDATE DOC")
83
                dirvec[fathidx][elt] = (dirvec[fathidx][elt][0], docidx)
84
                # Update fathidx for next iteration
69
                fathidx = dirvec[fathidx][elt][0]
85
                fathidx = dirvec[fathidx][elt][0]
70
            else:
86
            else:
71
                if idx != len(path) -1 or doc.mtype == 'inode/directory':
87
                # Element has no entry in father directory (hence no
88
                # dirvec entry either).
89
                if idx != len(path) -1:
90
                    # This is an intermediate element. Create a
91
                    # Doc-less directory
72
                    topidx += 1
92
                    topidx += 1
73
                    dirvec.append({})
93
                    dirvec.append({})
74
                    dirvec[fathidx][elt] = (topidx, docidx)
94
                    dirvec[fathidx][elt] = (topidx, -1)
75
                    fathidx = topidx
95
                    fathidx = topidx
76
                else:
96
                else:
97
                    # Last element. If directory, needs a dirvec entry
98
                    if doc.mtype == 'inode/directory':
99
                        topidx += 1
100
                        dirvec.append({})
77
                    dirvec[fathidx][elt] = (topidx, docidx)
101
                        dirvec[fathidx][elt] = (topidx, docidx)
102
                        fathidx = topidx
103
                    else:
104
                        dirvec[fathidx][elt] = (-1, docidx)
78
105
79
    if False:
106
    if False:
80
        for ent in dirvec:
107
        for ent in dirvec:
81
            print("%s" % ent)
108
            uplog("%s" % ent)
82
109
110
    return dirvec
83
111
84
112
# Fetch all the docs by querying Recoll with [mime:*], which is
113
# guaranteed to match every doc without overflowing the query size
114
# (because the number of mime types is limited). Something like
115
# title:* would overflow.
85
def fetchalldocs(confdir):
116
def fetchalldocs(confdir):
86
    global allthedocs
87
    allthedocs = []
117
    allthedocs = []
88
118
89
    rcldb = recoll.connect(confdir=confdir)
119
    rcldb = recoll.connect(confdir=confdir)
90
    rclq = rcldb.query()
120
    rclq = rcldb.query()
91
    rclq.execute("mime:*", stemming=0)
121
    rclq.execute("mime:*", stemming=0)
92
    print("Estimated query results: %d" % (rclq.rowcount))
122
    uplog("Estimated alldocs query results: %d" % (rclq.rowcount))
93
123
94
    maxcnt = 0
124
    maxcnt = 0
95
    totcnt = 0
125
    totcnt = 0
96
    while True:
126
    while True:
97
        docs = rclq.fetchmany()
127
        docs = rclq.fetchmany()
98
        for doc in docs:
128
        for doc in docs:
99
            allthedocs.append(doc)
129
            allthedocs.append(doc)
100
            totcnt += 1
130
            totcnt += 1
101
        if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:
131
        if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:
102
            break
132
            break
103
    print("Retrieved %d docs" % (totcnt,))
133
    uplog("Retrieved %d docs" % (totcnt,))
134
    return allthedocs
104
135
105
fetchalldocs(confdir)
136
def inittree(confdir):
106
rcl2folders(allthedocs)
137
    global g_alldocs, g_dirvec
138
    
139
    g_alldocs = fetchalldocs(confdir)
140
    g_dirvec = rcl2folders(g_alldocs)
107
141
108
print("%s" % dirvec[0])
142
109
print("%s" % dirvec[1])
143
g_myprefix = '0$uprcl$folders'
110
print("%s" % dirvec[2])
144
111
print("%s" % dirvec[3])
145
# objid is like folders$index
146
# flag is meta or children. 
147
def browse(pid, flag):
148
    global g_alldocs, g_dirvec
149
150
    if not pid.startswith(g_myprefix):
151
        uplog("folders.browse: bad pid %s" % pid)
152
        return []
153
154
    try:
155
        len(g_alldocs)
156
    except:
157
        inittree(confdir)
158
159
    if len(g_alldocs) == 0:
160
        uplog("folders:browse: no docs")
161
        return []
162
163
    diridx = pid[len(g_myprefix):]
164
    if not diridx:
165
        diridx = 0
166
    else:
167
        diridx = int(diridx[1:])
168
    
169
    if diridx >= len(g_dirvec):
170
        uplog("folders:browse: bad pid %s" % pid)
171
        return []
172
173
    entries = []
174
    Need to treat diridx 0 special (take simple paths)
175
    for nm,ids in g_dirvec[diridx].iteritems():
176
        id = g_myprefix + '$' + str(ids[0])
177
        entries.append(rcldirentry(id, pid, nm))
178
179
    return entries