Switch to unified view

a/src/mediaserver/cdplugins/uprcl/uprcltags.py b/src/mediaserver/cdplugins/uprcl/uprcltags.py
...
...
4
import sqlite3
4
import sqlite3
5
from timeit import default_timer as timer
5
from timeit import default_timer as timer
6
6
7
from uprclutils import *
7
from uprclutils import *
8
8
9
# After initialization, this holds the list of all records out of recoll
10
g_alldocs = []
11
12
g_myprefix = '0$uprcl$'
13
9
sqconn = sqlite3.connect(':memory:')
14
sqconn = sqlite3.connect(':memory:')
10
#sqconn = sqlite3.connect('/tmp/tracks.sqlite')
15
#sqconn = sqlite3.connect('/tmp/tracks.sqlite')
11
16
17
# Tags for which we create auxiliary tables for facet descent.
18
#
19
# TBD: The list will come from the config file one day
20
#
12
# TBD All Artists, Orchestra, Group
21
# TBD: All Artists, Orchestra, Group
13
#
22
#
14
# Maybe we'd actually need a 3rd value for the recoll field name, but
23
# Maybe we'd actually need a 3rd value for the recoll field name, but
15
# it can be the same for the currently relevant fields.
24
# it can be the same for the currently relevant fields.
25
tagtables = {
16
tables = {'Artist' : 'artist',
26
    'Artist' : 'artist',
17
          'Date' : 'date',
27
    'Date' : 'date',
18
          'Genre' : 'genre',
28
    'Genre' : 'genre',
19
          'All Artists' : 'allartists',
29
#   'All Artists' : 'allartists',
20
          'Composer' : 'composer',
30
    'Composer' : 'composer',
21
          'Conductor' : 'conductor',
31
    'Conductor' : 'conductor',
22
          'Orchestra' : 'orchestra',
32
    'Orchestra' : 'orchestra',
23
          'Group' : 'grouptb',
33
#   'Group' : 'grouptb',
24
          'Comment' : 'comment'
34
    'Comment' : 'comment'
25
          }
35
    }
26
          
36
37
# Translation only used when fetching fields from the recoll record
38
coltorclfield = {
39
    # mutagen in easy mode extracts TPE2 as performer, but the id3
40
    # standard does say that TPE2 is for orchestra
41
    # TBD: we currently can't extract the common: TPXXX=Orchestra=value
42
    'orchestra' : 'performer',
43
    }
44
45
46
def colid(col):
47
    return col + '_id'
48
49
# Create the db. Each tag table has 2 columns: <tagname>_id and
50
# value. The join column in the main tracks table is also named
51
# <tagname>_id
27
def createsqdb(conn):
52
def createsqdb(conn):
28
    c = conn.cursor()
53
    c = conn.cursor()
29
    try:
54
    try:
30
        c.execute('''DROP TABLE albums''')
55
        c.execute('''DROP TABLE albums''')
31
        c.execute('''DROP TABLE tracks''')
56
        c.execute('''DROP TABLE tracks''')
...
...
36
           (albid INTEGER PRIMARY KEY, albtitle TEXT, albfolder TEXT)''')
61
           (albid INTEGER PRIMARY KEY, albtitle TEXT, albfolder TEXT)''')
37
62
38
    tracksstmt = '''CREATE TABLE tracks
63
    tracksstmt = '''CREATE TABLE tracks
39
    (docidx INT, albid INT, trackno INT, title TEXT'''
64
    (docidx INT, albid INT, trackno INT, title TEXT'''
40
65
41
    for tt,tb in tables.iteritems():
66
    for tb in tagtables.itervalues():
42
        try:
67
        try:
43
            c.execute('DROP TABLE ' + tb)
68
            c.execute('DROP TABLE ' + tb)
44
        except:
69
        except:
45
            pass
70
            pass
46
        stmt = 'CREATE TABLE ' + tb + \
71
        stmt = 'CREATE TABLE ' + tb + \
47
               ' (' + tb + '_id' + ' INTEGER PRIMARY KEY, value TEXT)'
72
               ' (' + colid(tb) + ' INTEGER PRIMARY KEY, value TEXT)'
48
        c.execute(stmt)
73
        c.execute(stmt)
49
        tracksstmt += ',' + tb + '_id INT'
74
        tracksstmt += ',' + colid(tb) + ' INT'
50
75
51
    tracksstmt += ')'
76
    tracksstmt += ')'
52
    c.execute(tracksstmt)
77
    c.execute(tracksstmt)
53
    
78
    
54
79
80
# Insert new value if not existing, return rowid of new or existing row
55
def auxtableinsert(sqconn, tb, value):
81
def auxtableinsert(sqconn, tb, value):
56
    c = sqconn.cursor()
82
    c = sqconn.cursor()
57
    col = tb + '_id'
83
    col = colid(tb)
58
    stmt = 'SELECT ' + col + ' FROM ' + tb + ' WHERE value = ?'
84
    stmt = 'SELECT ' + col + ' FROM ' + tb + ' WHERE value = ?'
59
    c.execute(stmt, (value,))
85
    c.execute(stmt, (value,))
60
    r = c.fetchone()
86
    r = c.fetchone()
61
    if r:
87
    if r:
62
        rowid = r[0]
88
        rowid = r[0]
...
...
64
        stmt = 'INSERT INTO ' + tb + '(value) VALUES(?)'
90
        stmt = 'INSERT INTO ' + tb + '(value) VALUES(?)'
65
        c.execute(stmt, (value,))
91
        c.execute(stmt, (value,))
66
        rowid = c.lastrowid
92
        rowid = c.lastrowid
67
93
68
    return rowid
94
    return rowid
69
            
95
70
g_alldocs = []
96
# Create the db and fill it up with the values we need, taken out of
97
# the recoll records list
71
def recolltosql(docs):
98
def recolltosql(docs):
72
    global g_alldocs
99
    global g_alldocs
73
    g_alldocs = docs
100
    g_alldocs = docs
74
    
101
    
75
    createsqdb(sqconn)
102
    createsqdb(sqconn)
76
103
104
    # Compute a list of table names and corresponding recoll
105
    # fields. most often they are identical
106
    tabfields = []
107
    for tb in tagtables.itervalues():
108
        if tb in coltorclfield:
109
            rclfld = coltorclfield[tb]
110
        else:
111
            rclfld = tb
112
        tabfields.append((tb, rclfld))
113
        
77
    c = sqconn.cursor()
114
    c = sqconn.cursor()
78
    maxcnt = 0
115
    maxcnt = 0
79
    totcnt = 0
116
    totcnt = 0
80
    for docidx in range(len(docs)):
117
    for docidx in range(len(docs)):
81
        doc = docs[docidx]
118
        doc = docs[docidx]
82
        totcnt += 1
119
        totcnt += 1
120
83
        album = getattr(doc, 'album', None)
121
        album = getattr(doc, 'album', None)
84
        if not album:
122
        if not album:
85
            if doc.mtype != 'inode/directory' and \
123
            if doc.mtype != 'inode/directory' and \
86
                   doc.mtype != 'image/jpeg':
124
                   doc.mtype != 'image/jpeg':
87
                pass
125
                pass
88
                #uplog("No album: mtype %s title %s" % (doc.mtype, doc.url))
126
                #uplog("No album: mtype %s title %s" % (doc.mtype, doc.url))
89
            continue
127
            continue
128
90
        folder = docfolder(doc).decode('utf-8', errors = 'replace')
129
        folder = docfolder(doc).decode('utf-8', errors = 'replace')
130
91
        try:
131
        try:
92
            l= doc.tracknumber.split('/')
132
            l= doc.tracknumber.split('/')
93
            trackno = int(l[0])
133
            trackno = int(l[0])
94
        except:
134
        except:
95
            trackno = 0
135
            trackno = 0
96
            
136
            
97
        # Create album record if needed. There is probably a
137
        # Create album record if needed. There is probably a
98
        # single-statement syntax for this
138
        # single-statement syntax for this. The albums table is
139
        # special, can't use auxtableinsert()
99
        c.execute('''SELECT albid FROM albums
140
        c.execute('''SELECT albid FROM albums
100
        WHERE albtitle = ? AND albfolder = ?''', (album, folder))
141
        WHERE albtitle = ? AND albfolder = ?''', (album, folder))
101
        r = c.fetchone()
142
        r = c.fetchone()
102
        if r:
143
        if r:
103
            albid = r[0]
144
            albid = r[0]
104
        else:
145
        else:
105
            c.execute('''INSERT INTO albums(albtitle, albfolder)
146
            c.execute('''INSERT INTO albums(albtitle, albfolder)
106
            VALUES (?,?)''', (album, folder))
147
            VALUES (?,?)''', (album, folder))
107
            albid = c.lastrowid
148
            albid = c.lastrowid
108
149
109
110
        columns = 'docidx,albid,trackno,title'
150
        columns = 'docidx,albid,trackno,title'
111
        values = [docidx, albid, trackno, doc.title]
151
        values = [docidx, albid, trackno, doc.title]
112
        placehold = '?,?,?,?'
152
        placehold = '?,?,?,?'
113
        for tt,tb in tables.iteritems():
153
        for tb,rclfld in tabfields:
114
            value = getattr(doc, tb, None)
154
            value = getattr(doc, rclfld, None)
115
            if not value:
155
            if not value:
116
                continue
156
                continue
117
            rowid = auxtableinsert(sqconn, tb, value)
157
            rowid = auxtableinsert(sqconn, tb, value)
118
            columns += ',' + tb + '_id'
158
            columns += ',' + colid(tb)
119
            values.append(rowid)
159
            values.append(rowid)
120
            placehold += ',?'
160
            placehold += ',?'
121
161
122
        stmt = 'INSERT INTO tracks(' + columns + ') VALUES(' + placehold + ')'
162
        stmt = 'INSERT INTO tracks(' + columns + ') VALUES(' + placehold + ')'
123
        c.execute(stmt, values)
163
        c.execute(stmt, values)
124
        #uplog(doc.title)
164
        #uplog(doc.title)
125
165
126
127
    sqconn.commit()
166
    sqconn.commit()
128
    uplog("recolltosql: processed %d docs" % totcnt)
167
    uplog("recolltosql: processed %d docs" % totcnt)
129
168
169
# Create our top-level directories, with fixed entries, and stuff from
170
# the tags tables
130
def rootentries(pid):
171
def rootentries(pid):
172
    c = sqconn.cursor()
173
    c.execute("SELECT COUNT(*) from albums")
174
    nalbs = str(c.fetchone()[0])
175
    c.execute("SELECT COUNT(*) from tracks")
176
    nitems = str(c.fetchone()[0])
131
    entries = [rcldirentry(pid + 'albums', pid, 'albums'),
177
    entries = [rcldirentry(pid + 'albums', pid, nalbs + ' albums'),
132
               rcldirentry(pid + 'items', pid, 'items')]
178
               rcldirentry(pid + 'items', pid, nitems + ' items')]
133
    for tt,tb in tables.iteritems():
179
    for tt in sorted(tagtables.iterkeys()):
134
        entries.append(rcldirentry(pid + '=' + tt , pid, tt))
180
        entries.append(rcldirentry(pid + '=' + tt , pid, tt))
135
    return entries
181
    return entries
136
182
137
g_myprefix = '0$uprcl$'
183
# Check what tags still have multiple values inside the selected set,
138
184
# and return their list.
139
def colid(col):
140
    return col + '_id'
141
142
def analyzesubtree(sqconn, recs):
185
def analyzesubtree(sqconn, recs):
143
    docids = ','.join([str(r[0]) for r in recs])
186
    docids = ','.join([str(r[0]) for r in recs])
144
    uplog("analyzesubtree, docids %s" % docids)
187
    uplog("analyzesubtree, docids %s" % docids)
145
    c1 = sqconn.cursor()
188
    c1 = sqconn.cursor()
146
    tags = []
189
    tags = []
147
    for tt,tb in tables.iteritems():
190
    for tt,tb in tagtables.iteritems():
148
        stmt = 'SELECT COUNT(DISTINCT ' + colid(tb) + \
191
        stmt = 'SELECT COUNT(DISTINCT ' + colid(tb) + \
149
               ') FROM tracks WHERE docidx IN (' + docids + ')'
192
               ') FROM tracks WHERE docidx IN (' + docids + ')'
150
        uplog("analyzesubtree: executing: <%s>" % stmt)
193
        uplog("analyzesubtree: executing: <%s>" % stmt)
151
        c1.execute(stmt)
194
        c1.execute(stmt)
152
        for r in c1:
195
        for r in c1:
...
...
154
            uplog("Found %d distinct values for %s" % (cnt, tb))
197
            uplog("Found %d distinct values for %s" % (cnt, tb))
155
            if cnt > 1:
198
            if cnt > 1:
156
                tags.append(tt)
199
                tags.append(tt)
157
    return tags
200
    return tags
158
201
202
# Main browsing routine. Given an objid, translate it into a select
203
# statement and return the corresponding records
159
def seltagsbrowse(pid, qpath, flag, httphp, pathprefix):
204
def seltagsbrowse(pid, qpath, flag, httphp, pathprefix):
160
    uplog("seltagsbrowse. qpath %s" % qpath)
205
    uplog("seltagsbrowse. qpath %s" % qpath)
161
    qlen = len(qpath)
206
    qlen = len(qpath)
162
    selwhat = ''
207
    selwhat = ''
163
    selwhere = ''
208
    selwhere = ''
164
    values = []
209
    values = []
165
    i = 0
210
    i = 0
166
    while i < qlen:
211
    while i < qlen:
167
        elt = qpath[i]
212
        elt = qpath[i]
168
        if elt.startswith('='):
213
        if elt.startswith('='):
169
            col = tables[elt[1:]] 
214
            col = tagtables[elt[1:]] 
170
        selwhere = selwhere + ' AND ' if selwhere else ' WHERE '
215
        selwhere = selwhere + ' AND ' if selwhere else ' WHERE '
171
        if i == qlen - 1:
216
        if i == qlen - 1:
172
            # We want to display all unique values for the column
217
            # We want to display all unique values for the column
173
            # artist.artist_id, artist.value
218
            # artist.artist_id, artist.value
174
            selwhat = col + '.' + col + '_id, ' + col + '.value'
219
            selwhat = col + '.' + col + '_id, ' + col + '.value'
175
            # tracks.artist_id = artist.artist_id
220
            # tracks.artist_id = artist.artist_id
176
            selwhere += 'tracks.'+ col + '_id = ' + col + '.' + col + '_id'
221
            selwhere += 'tracks.' + colid(col) + ' = ' + col + '.' + colid(col)
177
        else:
222
        else:
178
            # Look at the value specified for the =xx column
223
            # Look at the value specified for the =xx column
179
            selwhat = 'tracks.docidx'
224
            selwhat = 'tracks.docidx'
180
            selwhere += 'tracks.' + col + '_id =  ?'
225
            selwhere += 'tracks.' + colid(col) + ' =  ?'
181
            i += 1
226
            i += 1
182
            values.append(int(qpath[i]))
227
            values.append(int(qpath[i]))
183
        i += 1
228
        i += 1
184
            
229
            
185
    c = sqconn.cursor()
230
    c = sqconn.cursor()
...
...
187
    #    uplog("selres: %s" % r)
232
    #    uplog("selres: %s" % r)
188
    entries = []
233
    entries = []
189
    if selwhat == 'tracks.docidx':
234
    if selwhat == 'tracks.docidx':
190
        # SELECT docidx FROM tracks
235
        # SELECT docidx FROM tracks
191
        # WHERE col1_id = ? AND col2_id = ?
236
        # WHERE col1_id = ? AND col2_id = ?
192
        stmt = "SELECT docidx FROM tracks %s" % selwhere
237
        stmt = "SELECT docidx FROM tracks %s ORDER BY trackno" % selwhere
193
        uplog("seltagsbrowse: executing <%s> values %s" % (stmt, values))
238
        uplog("seltagsbrowse: executing <%s> values %s" % (stmt, values))
194
        c.execute(stmt, values)
239
        c.execute(stmt, values)
195
        recs = c.fetchall()
240
        recs = c.fetchall()
196
        subqs = analyzesubtree(sqconn, recs)
241
        subqs = analyzesubtree(sqconn, recs)
197
        if not subqs:
242
        if not subqs:
198
            for r in recs:
243
            for r in recs:
199
                docidx = r[0]
244
                docidx = r[0]
200
                id = pid + '$*i' + str(docidx)
245
                id = pid + '$*i' + str(docidx)
201
                entries.append(rcldoctoentry(id, pid, httphp, pathprefix,
246
                entries.append(rcldoctoentry(id, pid, httphp, pathprefix,
202
                                             g_alldocs[docidx]))
247
                                             g_alldocs[docidx]))
248
                entries = sorted(entries, cmp=cmpentries)
203
        else:
249
        else:
204
            for tt in subqs:
250
            for tt in subqs:
205
                id = pid + '$=' + tt
251
                id = pid + '$=' + tt
206
                entries.append(rcldirentry(id, pid, tt))
252
                entries.append(rcldirentry(id, pid, tt))
207
    else:
253
    else:
...
...
209
        # WHERE tracks.col_id = col.col_id
255
        # WHERE tracks.col_id = col.col_id
210
        # GROUP BY tracks.col_id
256
        # GROUP BY tracks.col_id
211
        # ORDER BY col.value
257
        # ORDER BY col.value
212
        stmt = "SELECT " + selwhat + " FROM tracks, " + col + \
258
        stmt = "SELECT " + selwhat + " FROM tracks, " + col + \
213
               selwhere + \
259
               selwhere + \
214
               " GROUP BY tracks." + col + '_id' + \
260
               " GROUP BY tracks." + colid(col) + \
215
               " ORDER BY value"
261
               " ORDER BY value"
216
        uplog("seltagsbrowse: executing <%s> values %s" % (stmt, values))
262
        uplog("seltagsbrowse: executing <%s> values %s" % (stmt, values))
217
        c.execute(stmt, values)
263
        c.execute(stmt, values)
218
        for r in c:
264
        for r in c:
219
            id = pid + '$' + str(r[0])
265
            id = pid + '$' + str(r[0])
220
            entries.append(rcldirentry(id, pid, r[1]))
266
            entries.append(rcldirentry(id, pid, r[1]))
221
    return entries
267
    return entries
222
268
269
270
def albumsbrowse(pid, qpath, flag, httphp, pathprefix):
271
    c = sqconn.cursor()
272
    entries = []
273
    if len(qpath) == 1:
274
        c.execute('''SELECT albid, albtitle FROM albums
275
        ORDER BY albtitle''')
276
        for r in c:
277
            id = pid + '$*' + str(r[0])
278
            entries.append(rcldirentry(id, pid, r[1]))
279
    elif len(qpath) == 2:
280
        e1 = qpath[1]
281
        if not e1.startswith("*"):
282
            raise Exception("Bad album id in albums tree. Pth: %s" %idpath)
283
        albid = int(e1[1:])
284
        c.execute('''SELECT docidx FROM tracks WHERE albid = ? ORDER BY
285
        trackno''', (albid,))
286
        for r in c:
287
            docidx = r[0]
288
            id = pid + '$*i' + str(docidx)
289
            entries.append(rcldoctoentry(id, pid, httphp, pathprefix,
290
                                         g_alldocs[docidx]))
291
    else:
292
        raise Exception("Bad path in album tree (too deep): <%s>"%idpath)
293
294
    return entries
295
296
297
# Top level browse routine. Handle the special cases and call the
298
# appropriate worker routine.
223
def browse(pid, flag, httphp, pathprefix):
299
def browse(pid, flag, httphp, pathprefix):
224
    idpath = pid.replace(g_myprefix, '', 1)
300
    idpath = pid.replace(g_myprefix, '', 1)
301
    uplog('tags:browse: idpath <%s>' % idpath)
225
    entries = []
302
    entries = []
226
    uplog('tags:browse: idpath <%s>' % idpath)
227
    qpath = idpath.split('$')
303
    qpath = idpath.split('$')
228
    c = sqconn.cursor()
229
    if idpath.startswith('items'):
304
    if idpath.startswith('items'):
305
        c = sqconn.cursor()
230
        c.execute('''SELECT docidx FROM tracks ORDER BY title''')
306
        c.execute('''SELECT docidx FROM tracks ORDER BY title''')
231
        for r in c:
307
        for r in c:
232
            docidx = r[0]
308
            docidx = r[0]
233
            id = pid + '$*i' + str(docidx)
309
            id = pid + '$*i' + str(docidx)
234
            entries.append(rcldoctoentry(id, pid, httphp, pathprefix,
310
            entries.append(rcldoctoentry(id, pid, httphp, pathprefix,
235
                                         g_alldocs[docidx]))
311
                                         g_alldocs[docidx]))
236
    elif idpath.startswith('albums'):
312
    elif idpath.startswith('albums'):
237
        if len(qpath) == 1:
313
        entries = albumsbrowse(pid, qpath, flag, httphp, pathprefix)
238
            c.execute('''SELECT albid, albtitle FROM albums
239
            ORDER BY albtitle''')
240
            for r in c:
241
                id = pid + '$*' + str(r[0])
242
                entries.append(rcldirentry(id, pid, r[1]))
243
        elif len(qpath) == 2:
244
            e1 = qpath[1]
245
            if not e1.startswith("*"):
246
                raise Exception("Bad album id in albums tree. Pth: %s" %idpath)
247
            albid = int(e1[1:])
248
            c.execute('''SELECT docidx FROM tracks WHERE albid = ? ORDER BY
249
            trackno''', (albid,))
250
            for r in c:
251
                docidx = r[0]
252
                id = pid + '$*i' + str(docidx)
253
                entries.append(rcldoctoentry(id, pid, httphp, pathprefix,
254
                                             g_alldocs[docidx]))
255
        else:
256
            raise Exception("Bad path in album tree (too deep): <%s>"%idpath)
257
    elif idpath.startswith('='):
314
    elif idpath.startswith('='):
258
        entries = seltagsbrowse(pid, qpath, flag, httphp, pathprefix)
315
        entries = seltagsbrowse(pid, qpath, flag, httphp, pathprefix)
259
    else:
316
    else:
260
        raise Exception('Bad path in tags tree (start>):<%s>'%idpath)
317
        raise Exception('Bad path in tags tree (start): <%s>' % idpath)
261
    return entries
318
    return entries
262
319
320
321
322
323
324
325
326
327
328
############ Misc test/trial code, not used by uprcl ########################
263
329
264
def misctries():
330
def misctries():
265
    c = sqconn.cursor()
331
    c = sqconn.cursor()
266
    c.execute('''SELECT COUNT(*) FROM tracks''')
332
    c.execute('''SELECT COUNT(*) FROM tracks''')
267
    uplog("Count(*) %d" % (c.fetchone()[0],))
333
    uplog("Count(*) %d" % (c.fetchone()[0],))