Switch to unified view

a/src/mediaserver/cdplugins/uprcl/uprcltags.py b/src/mediaserver/cdplugins/uprcl/uprcltags.py
...
...
16
16
17
import sys
17
import sys
18
import sqlite3
18
import sqlite3
19
from timeit import default_timer as timer
19
from timeit import default_timer as timer
20
20
21
from uprclutils import *
21
from uprclutils import g_myprefix, audiomtypes, docfolder, uplog, \
22
     rcldirentry, rcldoctoentry, cmpentries
22
23
23
# After initialization, this holds the list of all records out of recoll
24
# After initialization, this holds the list of all records out of
25
# recoll (it's a reference to the original in uprclfolders)
24
g_alldocs = []
26
g_alldocs = []
25
26
g_myprefix = '0$uprcl$'
27
27
28
sqconn = sqlite3.connect(':memory:')
28
sqconn = sqlite3.connect(':memory:')
29
#sqconn = sqlite3.connect('/tmp/tracks.sqlite')
29
#sqconn = sqlite3.connect('/tmp/tracks.sqlite')
30
30
31
# Tags for which we create auxiliary tables for facet descent.
31
# Tags for which we create auxiliary tables for facet descent.
...
...
53
# record. None at the moment
53
# record. None at the moment
54
coltorclfield = {
54
coltorclfield = {
55
    }
55
    }
56
56
57
57
58
def colid(col):
58
def _clid(col):
59
    return col + '_id'
59
    return col + '_id'
60
60
61
# Create the db. Each tag table has 2 columns: <tagname>_id and
61
# Create the db. Each tag table has 2 columns: <tagname>_id and
62
# value. The join column in the main tracks table is also named
62
# value. The join column in the main tracks table is also named
63
# <tagname>_id
63
# <tagname>_id
64
def createsqdb(conn):
64
def _createsqdb(conn):
65
    c = conn.cursor()
65
    c = conn.cursor()
66
    try:
66
    try:
67
        c.execute('''DROP TABLE albums''')
67
        c.execute('''DROP TABLE albums''')
68
        c.execute('''DROP TABLE tracks''')
68
        c.execute('''DROP TABLE tracks''')
69
    except:
69
    except:
...
...
79
        try:
79
        try:
80
            c.execute('DROP TABLE ' + tb)
80
            c.execute('DROP TABLE ' + tb)
81
        except:
81
        except:
82
            pass
82
            pass
83
        stmt = 'CREATE TABLE ' + tb + \
83
        stmt = 'CREATE TABLE ' + tb + \
84
               ' (' + colid(tb) + ' INTEGER PRIMARY KEY, value TEXT)'
84
               ' (' + _clid(tb) + ' INTEGER PRIMARY KEY, value TEXT)'
85
        c.execute(stmt)
85
        c.execute(stmt)
86
        tracksstmt += ',' + colid(tb) + ' INT'
86
        tracksstmt += ',' + _clid(tb) + ' INT'
87
87
88
    tracksstmt += ')'
88
    tracksstmt += ')'
89
    c.execute(tracksstmt)
89
    c.execute(tracksstmt)
90
    
90
    
91
91
92
# Insert new value if not existing, return rowid of new or existing row
92
# Insert new value if not existing, return rowid of new or existing row
93
def auxtableinsert(sqconn, tb, value):
93
def _auxtableinsert(sqconn, tb, value):
94
    c = sqconn.cursor()
94
    c = sqconn.cursor()
95
    stmt = 'SELECT ' + colid(tb) + ' FROM ' + tb + ' WHERE value = ?'
95
    stmt = 'SELECT ' + _clid(tb) + ' FROM ' + tb + ' WHERE value = ?'
96
    c.execute(stmt, (value,))
96
    c.execute(stmt, (value,))
97
    r = c.fetchone()
97
    r = c.fetchone()
98
    if r:
98
    if r:
99
        rowid = r[0]
99
        rowid = r[0]
100
    else:
100
    else:
...
...
108
# the recoll records list
108
# the recoll records list
109
def recolltosql(docs):
109
def recolltosql(docs):
110
    global g_alldocs
110
    global g_alldocs
111
    g_alldocs = docs
111
    g_alldocs = docs
112
    
112
    
113
    createsqdb(sqconn)
113
    _createsqdb(sqconn)
114
114
115
    # Compute a list of table names and corresponding recoll
115
    # Compute a list of table names and corresponding recoll
116
    # fields. most often they are identical
116
    # fields. most often they are identical
117
    tabfields = []
117
    tabfields = []
118
    for tb in tagtables.itervalues():
118
    for tb in tagtables.itervalues():
...
...
174
        placehold = '?,?,?,?'
174
        placehold = '?,?,?,?'
175
        for tb,rclfld in tabfields:
175
        for tb,rclfld in tabfields:
176
            value = getattr(doc, rclfld, None)
176
            value = getattr(doc, rclfld, None)
177
            if not value:
177
            if not value:
178
                continue
178
                continue
179
            rowid = auxtableinsert(sqconn, tb, value)
179
            rowid = _auxtableinsert(sqconn, tb, value)
180
            columns += ',' + colid(tb)
180
            columns += ',' + _clid(tb)
181
            values.append(rowid)
181
            values.append(rowid)
182
            placehold += ',?'
182
            placehold += ',?'
183
183
184
        stmt = 'INSERT INTO tracks(' + columns + ') VALUES(' + placehold + ')'
184
        stmt = 'INSERT INTO tracks(' + columns + ') VALUES(' + placehold + ')'
185
        c.execute(stmt, values)
185
        c.execute(stmt, values)
186
        #uplog(doc.title)
186
        #uplog(doc.title)
187
187
188
    sqconn.commit()
188
    sqconn.commit()
189
    uplog("recolltosql: processed %d docs" % totcnt)
189
    uplog("recolltosql: processed %d docs" % totcnt)
190
190
191
191
# Create our top-level directories, with fixed entries, and stuff from
192
# Create our top-level directories, with fixed entries, and stuff from
192
# the tags tables
193
# the tags tables
193
def rootentries(pid):
194
def rootentries(pid):
194
    c = sqconn.cursor()
195
    c = sqconn.cursor()
...
...
202
        entries.append(rcldirentry(pid + '=' + tt , pid, tt))
203
        entries.append(rcldirentry(pid + '=' + tt , pid, tt))
203
    return entries
204
    return entries
204
205
205
# Check what tags still have multiple values inside the selected set,
206
# Check what tags still have multiple values inside the selected set,
206
# and return their list.
207
# and return their list.
207
def subtreetags(docidsl):
208
def _subtreetags(docidsl):
208
    docids = ','.join([str(i) for i in docidsl])
209
    docids = ','.join([str(i) for i in docidsl])
209
    uplog("subtreetags, docids %s" % docids)
210
    uplog("subtreetags, docids %s" % docids)
210
    c = sqconn.cursor()
211
    c = sqconn.cursor()
211
    tags = []
212
    tags = []
212
    for tt,tb in tagtables.iteritems():
213
    for tt,tb in tagtables.iteritems():
213
        stmt = 'SELECT COUNT(DISTINCT ' + colid(tb) + \
214
        stmt = 'SELECT COUNT(DISTINCT ' + _clid(tb) + \
214
               ') FROM tracks WHERE docidx IN (' + docids + ')'
215
               ') FROM tracks WHERE docidx IN (' + docids + ')'
215
        uplog("subtreetags: executing: <%s>" % stmt)
216
        uplog("subtreetags: executing: <%s>" % stmt)
216
        c.execute(stmt)
217
        c.execute(stmt)
217
        for r in c:
218
        for r in c:
218
            cnt = r[0]
219
            cnt = r[0]
219
            uplog("Found %d distinct values for %s" % (cnt, tb))
220
            uplog("Found %d distinct values for %s" % (cnt, tb))
220
            if cnt > 1:
221
            if cnt > 1:
221
                tags.append(tt)
222
                tags.append(tt)
222
    return tags
223
    return tags
223
224
224
def trackentriesforstmt(stmt, values, pid, httphp, pathprefix):
225
def _trackentriesforstmt(stmt, values, pid, httphp, pathprefix):
225
    c = sqconn.cursor()
226
    c = sqconn.cursor()
226
    c.execute(stmt, values)
227
    c.execute(stmt, values)
227
    entries = []
228
    entries = []
228
    for r in c:
229
    for r in c:
229
        docidx = r[0]
230
        docidx = r[0]
...
...
234
    
235
    
235
236
236
# Return a list of trackids as selected by the current
237
# Return a list of trackids as selected by the current
237
# path <selwhere> is like: WHERE col1_id = ? AND col2_id = ? [...], and
238
# path <selwhere> is like: WHERE col1_id = ? AND col2_id = ? [...], and
238
# <values> holds the corresponding values
239
# <values> holds the corresponding values
239
def docidsforsel(selwhere, values):
240
def _docidsforsel(selwhere, values):
240
    c = sqconn.cursor()
241
    c = sqconn.cursor()
241
    stmt = 'SELECT docidx FROM tracks ' + selwhere + ' ORDER BY trackno'
242
    stmt = 'SELECT docidx FROM tracks ' + selwhere + ' ORDER BY trackno'
242
    uplog("docidsforsel: executing <%s> values %s" % (stmt, values))
243
    uplog("docidsforsel: executing <%s> values %s" % (stmt, values))
243
    c.execute(stmt, values)
244
    c.execute(stmt, values)
244
    return [r[0] for r in c.fetchall()]
245
    return [r[0] for r in c.fetchall()]
245
246
246
def trackentriesforalbum(albid, pid, httphp, pathprefix):
247
def _trackentriesforalbum(albid, pid, httphp, pathprefix):
247
    stmt = 'SELECT docidx FROM tracks WHERE album_id = ? ORDER BY trackno'
248
    stmt = 'SELECT docidx FROM tracks WHERE album_id = ? ORDER BY trackno'
248
    return trackentriesforstmt(stmt, (albid,), pid, httphp, pathprefix)
249
    return _trackentriesforstmt(stmt, (albid,), pid, httphp, pathprefix)
249
    
250
    
250
# This is called when an 'albums' element is encountered in the
251
# This is called when an 'albums' element is encountered in the
251
# selection path.
252
# selection path.
252
def tagsbrowsealbums(pid, qpath, i, selwhere, values, httphp, pathprefix):
253
def _tagsbrowsealbums(pid, qpath, i, selwhere, values, httphp, pathprefix):
253
    c = sqconn.cursor()
254
    c = sqconn.cursor()
254
    docidsl = docidsforsel(selwhere, values)
255
    docidsl = _docidsforsel(selwhere, values)
255
    entries = []
256
    entries = []
256
    if i == len(qpath)-1:
257
    if i == len(qpath)-1:
257
        albidsl = subtreealbums(docidsl)
258
        albidsl = _subtreealbums(docidsl)
258
        albids = ','.join([str(a) for a in albidsl])
259
        albids = ','.join([str(a) for a in albidsl])
259
        c.execute('SELECT album_id, albtitle FROM albums WHERE album_id in (' +
260
        c.execute('SELECT album_id, albtitle FROM albums WHERE album_id in (' +
260
                  albids + ') ORDER BY albtitle')
261
                  albids + ') ORDER BY albtitle')
261
        for r in c:
262
        for r in c:
262
            id = pid + '$' + str(r[0])
263
            id = pid + '$' + str(r[0])
...
...
268
        c.execute(stmt, (albid,))
269
        c.execute(stmt, (albid,))
269
        r = c.fetchone()
270
        r = c.fetchone()
270
        ntracks = int(r[0])
271
        ntracks = int(r[0])
271
        stmt = 'SELECT docidx FROM tracks WHERE album_id = ? AND docidx IN (' +\
272
        stmt = 'SELECT docidx FROM tracks WHERE album_id = ? AND docidx IN (' +\
272
               docids + ')'
273
               docids + ')'
273
        entries = trackentriesforstmt(stmt, (albid,), pid, httphp, pathprefix)
274
        entries = _trackentriesforstmt(stmt, (albid,), pid, httphp, pathprefix)
274
        if ntracks != len(entries):
275
        if ntracks != len(entries):
275
            id = pid + '$' + 'showca'
276
            id = pid + '$' + 'showca'
276
            entries = [rcldirentry(id, pid, '>> Complete Album')] + entries
277
            entries = [rcldirentry(id, pid, '>> Complete Album')] + entries
277
    elif i == len(qpath)-3:
278
    elif i == len(qpath)-3:
278
        # Note that minim has an additional level here, probably to
279
        # Note that minim has an additional level here, probably to
...
...
281
        #    0$=Composer$17738$albums$2$showca.0$hcalbum$*i13458
282
        #    0$=Composer$17738$albums$2$showca.0$hcalbum$*i13458
282
        # I don't know what the .0 is for.
283
        # I don't know what the .0 is for.
283
        # The 'hcalbum' level usually has 2 entries '>> Hide Content' 
284
        # The 'hcalbum' level usually has 2 entries '>> Hide Content' 
284
        # and the album title. TBD
285
        # and the album title. TBD
285
        albid = int(qpath[-2])
286
        albid = int(qpath[-2])
286
        entries = trackentriesforalbum(albid, pid, httphp, pathprefix)
287
        entries = _trackentriesforalbum(albid, pid, httphp, pathprefix)
287
        
288
        
288
    return entries
289
    return entries
289
290
290
# This is called when an 'items' element is encountered in the
291
# This is called when an 'items' element is encountered in the
291
# selection path. We just list the selected tracks
292
# selection path. We just list the selected tracks
292
def tagsbrowseitems(pid, qpath, i, selwhere, values, httphp, pathprefix):
293
def _tagsbrowseitems(pid, qpath, i, selwhere, values, httphp, pathprefix):
293
    stmt = 'SELECT docidx FROM tracks ' + selwhere
294
    stmt = 'SELECT docidx FROM tracks ' + selwhere
294
    return trackentriesforstmt(stmt, values, pid, httphp, pathprefix)
295
    return _trackentriesforstmt(stmt, values, pid, httphp, pathprefix)
295
296
296
297
297
# Return all albums ids to which any of the currently selected tracks
298
# Return all albums ids to which any of the currently selected tracks
298
# (designated by a docid set) belong
299
# (designated by a docid set) belong
299
def subtreealbums(docidsl):
300
def _subtreealbums(docidsl):
300
    docids = ','.join([str(r) for r in docidsl])
301
    docids = ','.join([str(r) for r in docidsl])
301
    albids = []
302
    albids = []
302
    stmt = 'SELECT album_id from tracks where docidx IN (' + docids + ') ' + \
303
    stmt = 'SELECT album_id from tracks where docidx IN (' + docids + ') ' + \
303
           'GROUP BY album_id'
304
           'GROUP BY album_id'
304
    c = sqconn.cursor()
305
    c = sqconn.cursor()
...
...
310
    return albids
311
    return albids
311
    
312
    
312
# Main browsing routine. Given an objid, translate it into a select
313
# Main browsing routine. Given an objid, translate it into a select
313
# statement, plus further processing, and return the corresponding
314
# statement, plus further processing, and return the corresponding
314
# records
315
# records
315
def tagsbrowse(pid, qpath, flag, httphp, pathprefix):
316
def _tagsbrowse(pid, qpath, flag, httphp, pathprefix):
316
    uplog("tagsbrowse. pid %s qpath %s" % (pid, qpath))
317
    uplog("tagsbrowse. pid %s qpath %s" % (pid, qpath))
317
    qlen = len(qpath)
318
    qlen = len(qpath)
318
    selwhat = ''
319
    selwhat = ''
319
    selwhere = ''
320
    selwhere = ''
320
    values = []
321
    values = []
...
...
331
        
332
        
332
        # detect the special values albums items etc. here. Their
333
        # detect the special values albums items etc. here. Their
333
        # presence changes how we process the rest (showing tracks and
334
        # presence changes how we process the rest (showing tracks and
334
        # albums and not dealing with other tags any more)
335
        # albums and not dealing with other tags any more)
335
        if elt == 'albums':
336
        if elt == 'albums':
336
            return tagsbrowsealbums(pid, qpath, i, selwhere, values, httphp,
337
            return _tagsbrowsealbums(pid, qpath, i, selwhere, values, httphp,
337
                                    pathprefix)
338
                                    pathprefix)
338
        elif elt == 'items':
339
        elif elt == 'items':
339
            return tagsbrowseitems(pid, qpath, i, selwhere, values, httphp,
340
            return _tagsbrowseitems(pid, qpath, i, selwhere, values, httphp,
340
                                  pathprefix)
341
                                    pathprefix)
341
            
342
            
342
        selwhere = selwhere + ' AND ' if selwhere else ' WHERE '
343
        selwhere = selwhere + ' AND ' if selwhere else ' WHERE '
343
        if i == qlen - 1:
344
        if i == qlen - 1:
344
            # We want to display all unique values for the column
345
            # We want to display all unique values for the column
345
            # artist.artist_id, artist.value
346
            # artist.artist_id, artist.value
346
            selwhat = col + '.' + colid(col) + ', ' + col + '.value'
347
            selwhat = col + '.' + _clid(col) + ', ' + col + '.value'
347
            # tracks.artist_id = artist.artist_id
348
            # tracks.artist_id = artist.artist_id
348
            selwhere += 'tracks.' + colid(col) + ' = ' + col + '.' + colid(col)
349
            selwhere += 'tracks.' + _clid(col) + ' = ' + col + '.' + _clid(col)
349
        else:
350
        else:
350
            # Look at the value specified for the =xx column. The
351
            # Look at the value specified for the =xx column. The
351
            # selwhat value is only used as a flag
352
            # selwhat value is only used as a flag
352
            selwhat = 'tracks.docidx'
353
            selwhat = 'tracks.docidx'
353
            selwhere += 'tracks.' + colid(col) + ' =  ?'
354
            selwhere += 'tracks.' + _clid(col) + ' =  ?'
354
            i += 1
355
            i += 1
355
            values.append(int(qpath[i]))
356
            values.append(int(qpath[i]))
356
        i += 1
357
        i += 1
357
            
358
            
358
359
359
    # TBD: Need a ">> Complete Album" entry if there is a single
360
    # TBD: Need a ">> Complete Album" entry if there is a single
360
    # album, no subqs and not all the tracks are listed
361
    # album, no subqs and not all the tracks are listed
361
    entries = []
362
    entries = []
362
    if selwhat == 'tracks.docidx':
363
    if selwhat == 'tracks.docidx':
363
        docids = docidsforsel(selwhere, values)
364
        docids = _docidsforsel(selwhere, values)
364
        albids = subtreealbums(docids)
365
        albids = _subtreealbums(docids)
365
        subqs = subtreetags(docids)
366
        subqs = _subtreetags(docids)
366
        if len(albids) > 1:
367
        if len(albids) > 1:
367
            id = pid + '$albums'
368
            id = pid + '$albums'
368
            entries.append(rcldirentry(id, pid, str(len(albids)) + ' albums'))
369
            entries.append(rcldirentry(id, pid, str(len(albids)) + ' albums'))
369
            if subqs:
370
            if subqs:
370
                id = pid + '$items'
371
                id = pid + '$items'
...
...
388
        # WHERE tracks.col_id = col.col_id
389
        # WHERE tracks.col_id = col.col_id
389
        # GROUP BY tracks.col_id
390
        # GROUP BY tracks.col_id
390
        # ORDER BY col.value
391
        # ORDER BY col.value
391
        stmt = "SELECT " + selwhat + " FROM tracks, " + col + \
392
        stmt = "SELECT " + selwhat + " FROM tracks, " + col + \
392
               selwhere + \
393
               selwhere + \
393
               " GROUP BY tracks." + colid(col) + \
394
               " GROUP BY tracks." + _clid(col) + \
394
               " ORDER BY value"
395
               " ORDER BY value"
395
        uplog("tagsbrowse: executing <%s> values %s" % (stmt, values))
396
        uplog("tagsbrowse: executing <%s> values %s" % (stmt, values))
396
        c = sqconn.cursor()
397
        c = sqconn.cursor()
397
        c.execute(stmt, values)
398
        c.execute(stmt, values)
398
        for r in c:
399
        for r in c:
...
...
402
403
403
404
404
# Browse the top-level tree named like 'xxx albums'. There are just 2
405
# Browse the top-level tree named like 'xxx albums'. There are just 2
405
# levels: the whole albums list, then for each entry the specified
406
# levels: the whole albums list, then for each entry the specified
406
# albums track list
407
# albums track list
407
def albumsbrowse(pid, qpath, flag, httphp, pathprefix):
408
def _albumsbrowse(pid, qpath, flag, httphp, pathprefix):
408
    c = sqconn.cursor()
409
    c = sqconn.cursor()
409
    entries = []
410
    entries = []
410
    if len(qpath) == 1:
411
    if len(qpath) == 1:
411
        c.execute('SELECT album_id, albtitle FROM albums ORDER BY albtitle')
412
        c.execute('SELECT album_id, albtitle FROM albums ORDER BY albtitle')
412
        for r in c:
413
        for r in c:
...
...
415
    elif len(qpath) == 2:
416
    elif len(qpath) == 2:
416
        e1 = qpath[1]
417
        e1 = qpath[1]
417
        if not e1.startswith("*"):
418
        if not e1.startswith("*"):
418
            raise Exception("Bad album id in albums tree. Pth: %s" %idpath)
419
            raise Exception("Bad album id in albums tree. Pth: %s" %idpath)
419
        album_id = int(e1[1:])
420
        album_id = int(e1[1:])
420
        entries = trackentriesforalbum(album_id, pid, httphp, pathprefix)
421
        entries = _trackentriesforalbum(album_id, pid, httphp, pathprefix)
421
    else:
422
    else:
422
        raise Exception("Bad path in album tree (too deep): <%s>"%idpath)
423
        raise Exception("Bad path in album tree (too deep): <%s>"%idpath)
423
424
424
    return entries
425
    return entries
425
426
...
...
431
    uplog('tags:browse: idpath <%s>' % idpath)
432
    uplog('tags:browse: idpath <%s>' % idpath)
432
    entries = []
433
    entries = []
433
    qpath = idpath.split('$')
434
    qpath = idpath.split('$')
434
    if idpath.startswith('items'):
435
    if idpath.startswith('items'):
435
        stmt = 'SELECT docidx FROM tracks'
436
        stmt = 'SELECT docidx FROM tracks'
436
        entries = trackentriesforstmt(stmt, (), pid, httphp, pathprefix)
437
        entries = _trackentriesforstmt(stmt, (), pid, httphp, pathprefix)
437
    elif idpath.startswith('albums'):
438
    elif idpath.startswith('albums'):
438
        entries = albumsbrowse(pid, qpath, flag, httphp, pathprefix)
439
        entries = _albumsbrowse(pid, qpath, flag, httphp, pathprefix)
439
    elif idpath.startswith('='):
440
    elif idpath.startswith('='):
440
        entries = tagsbrowse(pid, qpath, flag, httphp, pathprefix)
441
        entries = _tagsbrowse(pid, qpath, flag, httphp, pathprefix)
441
    else:
442
    else:
442
        raise Exception('Bad path in tags tree (start): <%s>' % idpath)
443
        raise Exception('Bad path in tags tree (start): <%s>' % idpath)
443
    return entries
444
    return entries
444
445
445
446