Switch to unified view

a b/src/mediaserver/cdplugins/uprcl/uprcltags.py
1
from __future__ import print_function
2
3
import sys
4
import sqlite3
5
from timeit import default_timer as timer
6
7
from uprclutils import *
8
9
sqconn = sqlite3.connect(':memory:')
10
#sqconn = sqlite3.connect('/tmp/tracks.sqlite')
11
12
# TBD All Artists, Orchestra, Group
13
#
14
# Maybe we'd actually need a 3rd value for the recoll field name, but
15
# it can be the same for the currently relevant fields.
16
tables = {'Artist' : 'artist',
17
          'Date' : 'date',
18
          'Genre' : 'genre',
19
          'All Artists' : 'allartists',
20
          'Composer' : 'composer',
21
          'Conductor' : 'conductor',
22
          'Orchestra' : 'orchestra',
23
          'Group' : 'grouptb',
24
          'Comment' : 'comment'
25
          }
26
          
27
def createsqdb(conn):
28
    c = conn.cursor()
29
    try:
30
        c.execute('''DROP TABLE albums''')
31
        c.execute('''DROP TABLE tracks''')
32
    except:
33
        pass
34
    c.execute(
35
        '''CREATE TABLE albums
36
           (albid INTEGER PRIMARY KEY, albtitle TEXT, albfolder TEXT)''')
37
38
    tracksstmt = '''CREATE TABLE tracks
39
    (docidx INT, albid INT, trackno INT, title TEXT'''
40
41
    for tt,tb in tables.iteritems():
42
        try:
43
            c.execute('DROP TABLE ' + tb)
44
        except:
45
            pass
46
        stmt = 'CREATE TABLE ' + tb + \
47
               ' (' + tb + '_id' + ' INTEGER PRIMARY KEY, value TEXT)'
48
        c.execute(stmt)
49
        tracksstmt += ',' + tb + '_id INT'
50
51
    tracksstmt += ')'
52
    c.execute(tracksstmt)
53
    
54
55
def auxtableinsert(sqconn, tb, value):
56
    c = sqconn.cursor()
57
    col = tb + '_id'
58
    stmt = 'SELECT ' + col + ' FROM ' + tb + ' WHERE value = ?'
59
    c.execute(stmt, (value,))
60
    r = c.fetchone()
61
    if r:
62
        rowid = r[0]
63
    else:
64
        stmt = 'INSERT INTO ' + tb + '(value) VALUES(?)'
65
        c.execute(stmt, (value,))
66
        rowid = c.lastrowid
67
68
    return rowid
69
            
70
g_alldocs = []
71
def recolltosql(docs):
72
    global g_alldocs
73
    g_alldocs = docs
74
    
75
    createsqdb(sqconn)
76
77
    c = sqconn.cursor()
78
    maxcnt = 0
79
    totcnt = 0
80
    for docidx in range(len(docs)):
81
        doc = docs[docidx]
82
        totcnt += 1
83
        album = getattr(doc, 'album', None)
84
        if not album:
85
            if doc.mtype != 'inode/directory' and \
86
                   doc.mtype != 'image/jpeg':
87
                pass
88
                #uplog("No album: mtype %s title %s" % (doc.mtype, doc.url))
89
            continue
90
        folder = docfolder(doc).decode('utf-8', errors = 'replace')
91
        try:
92
            l= doc.tracknumber.split('/')
93
            trackno = int(l[0])
94
        except:
95
            trackno = 0
96
            
97
        # Create album record if needed. There is probably a
98
        # single-statement syntax for this
99
        c.execute('''SELECT albid FROM albums
100
        WHERE albtitle = ? AND albfolder = ?''', (album, folder))
101
        r = c.fetchone()
102
        if r:
103
            albid = r[0]
104
        else:
105
            c.execute('''INSERT INTO albums(albtitle, albfolder)
106
            VALUES (?,?)''', (album, folder))
107
            albid = c.lastrowid
108
109
110
        columns = 'docidx,albid,trackno,title'
111
        values = [docidx, albid, trackno, doc.title]
112
        placehold = '?,?,?,?'
113
        for tt,tb in tables.iteritems():
114
            value = getattr(doc, tb, None)
115
            if not value:
116
                continue
117
            rowid = auxtableinsert(sqconn, tb, value)
118
            columns += ',' + tb + '_id'
119
            values.append(rowid)
120
            placehold += ',?'
121
122
        stmt = 'INSERT INTO tracks(' + columns + ') VALUES(' + placehold + ')'
123
        c.execute(stmt, values)
124
        #uplog(doc.title)
125
126
127
    sqconn.commit()
128
    uplog("recolltosql: processed %d docs" % totcnt)
129
130
def rootentries(pid):
131
    entries = [rcldirentry(pid + 'albums', pid, 'albums'),
132
               rcldirentry(pid + 'items', pid, 'items')]
133
    for tt,tb in tables.iteritems():
134
        entries.append(rcldirentry(pid + '=' + tt , pid, tt))
135
    return entries
136
137
g_myprefix = '0$uprcl$'
138
139
def colid(col):
140
    return col + '_id'
141
142
def analyzesubtree(sqconn, recs):
143
    docids = ','.join([str(r[0]) for r in recs])
144
    uplog("analyzesubtree, docids %s" % docids)
145
    c1 = sqconn.cursor()
146
    tags = []
147
    for tt,tb in tables.iteritems():
148
        stmt = 'SELECT COUNT(DISTINCT ' + colid(tb) + \
149
               ') FROM tracks WHERE docidx IN (' + docids + ')'
150
        uplog("analyzesubtree: executing: <%s>" % stmt)
151
        c1.execute(stmt)
152
        for r in c1:
153
            cnt = r[0]
154
            uplog("Found %d distinct values for %s" % (cnt, tb))
155
            if cnt > 1:
156
                tags.append(tt)
157
    return tags
158
159
def seltagsbrowse(pid, qpath, flag, httphp, pathprefix):
160
    uplog("seltagsbrowse. qpath %s" % qpath)
161
    qlen = len(qpath)
162
    selwhat = ''
163
    selwhere = ''
164
    values = []
165
    i = 0
166
    while i < qlen:
167
        elt = qpath[i]
168
        if elt.startswith('='):
169
            col = tables[elt[1:]] 
170
        selwhere = selwhere + ' AND ' if selwhere else ' WHERE '
171
        if i == qlen - 1:
172
            # We want to display all unique values for the column
173
            # artist.artist_id, artist.value
174
            selwhat = col + '.' + col + '_id, ' + col + '.value'
175
            # tracks.artist_id = artist.artist_id
176
            selwhere += 'tracks.'+ col + '_id = ' + col + '.' + col + '_id'
177
        else:
178
            # Look at the value specified for the =xx column
179
            selwhat = 'tracks.docidx'
180
            selwhere += 'tracks.' + col + '_id =  ?'
181
            i += 1
182
            values.append(int(qpath[i]))
183
        i += 1
184
            
185
    c = sqconn.cursor()
186
    #for r in c:
187
    #    uplog("selres: %s" % r)
188
    entries = []
189
    if selwhat == 'tracks.docidx':
190
        # SELECT docidx FROM tracks
191
        # WHERE col1_id = ? AND col2_id = ?
192
        stmt = "SELECT docidx FROM tracks %s" % selwhere
193
        uplog("seltagsbrowse: executing <%s> values %s" % (stmt, values))
194
        c.execute(stmt, values)
195
        recs = c.fetchall()
196
        subqs = analyzesubtree(sqconn, recs)
197
        if not subqs:
198
            for r in recs:
199
                docidx = r[0]
200
                id = pid + '$*i' + str(docidx)
201
                entries.append(rcldoctoentry(id, pid, httphp, pathprefix,
202
                                             g_alldocs[docidx]))
203
        else:
204
            for tt in subqs:
205
                id = pid + '$=' + tt
206
                entries.append(rcldirentry(id, pid, tt))
207
    else:
208
        # SELECT col.value FROM tracks, col
209
        # WHERE tracks.col_id = col.col_id
210
        # GROUP BY tracks.col_id
211
        # ORDER BY col.value
212
        stmt = "SELECT " + selwhat + " FROM tracks, " + col + \
213
               selwhere + \
214
               " GROUP BY tracks." + col + '_id' + \
215
               " ORDER BY value"
216
        uplog("seltagsbrowse: executing <%s> values %s" % (stmt, values))
217
        c.execute(stmt, values)
218
        for r in c:
219
            id = pid + '$' + str(r[0])
220
            entries.append(rcldirentry(id, pid, r[1]))
221
    return entries
222
223
def browse(pid, flag, httphp, pathprefix):
224
    idpath = pid.replace(g_myprefix, '', 1)
225
    entries = []
226
    uplog('tags:browse: idpath <%s>' % idpath)
227
    qpath = idpath.split('$')
228
    c = sqconn.cursor()
229
    if idpath.startswith('items'):
230
        c.execute('''SELECT docidx FROM tracks ORDER BY title''')
231
        for r in c:
232
            docidx = r[0]
233
            id = pid + '$*i' + str(docidx)
234
            entries.append(rcldoctoentry(id, pid, httphp, pathprefix,
235
                                         g_alldocs[docidx]))
236
    elif idpath.startswith('albums'):
237
        if len(qpath) == 1:
238
            c.execute('''SELECT albid, albtitle FROM albums
239
            ORDER BY albtitle''')
240
            for r in c:
241
                id = pid + '$*' + str(r[0])
242
                entries.append(rcldirentry(id, pid, r[1]))
243
        elif len(qpath) == 2:
244
            e1 = qpath[1]
245
            if not e1.startswith("*"):
246
                raise Exception("Bad album id in albums tree. Pth: %s" %idpath)
247
            albid = int(e1[1:])
248
            c.execute('''SELECT docidx FROM tracks WHERE albid = ? ORDER BY
249
            trackno''', (albid,))
250
            for r in c:
251
                docidx = r[0]
252
                id = pid + '$*i' + str(docidx)
253
                entries.append(rcldoctoentry(id, pid, httphp, pathprefix,
254
                                             g_alldocs[docidx]))
255
        else:
256
            raise Exception("Bad path in album tree (too deep): <%s>"%idpath)
257
    elif idpath.startswith('='):
258
        entries = seltagsbrowse(pid, qpath, flag, httphp, pathprefix)
259
    else:
260
        raise Exception('Bad path in tags tree (start>):<%s>'%idpath)
261
    return entries
262
263
264
def misctries():
265
    c = sqconn.cursor()
266
    c.execute('''SELECT COUNT(*) FROM tracks''')
267
    uplog("Count(*) %d" % (c.fetchone()[0],))
268
    
269
    #for row in c.execute('''SELECT album
270
    #                        FROM tracks where artist LIKE "%Gould%"
271
    #                        GROUP BY album'''):
272
    #    uplog("%s" % (row,))
273
274
    # For some strange reason it appears that GROUP BY is faster than SELECT
275
    # DISTINCT
276
    stmt = '''SELECT album FROM tracks GROUP BY album ORDER BY album'''
277
    start = timer()
278
    for row in c.execute(stmt):
279
        #uplog("%s" % (row[0].encode('UTF-8')))
280
        pass
281
    end = timer()
282
    uplog("Select took %.2f Seconds" % (end - start))
283
    for row in c.execute('''SELECT COUNT(DISTINCT album) from tracks'''):
284
        uplog("Album count %d" % row[0])
285
286
287
if __name__ == '__main__':
288
    confdir = "/home/dockes/.recoll-mp3"
289
    from recoll import recoll
290
291
    def fetchalldocs(confdir):
292
        allthedocs = []
293
        rcldb = recoll.connect(confdir=confdir)
294
        rclq = rcldb.query()
295
        rclq.execute("mime:*", stemming=0)
296
        uplog("Estimated alldocs query results: %d" % (rclq.rowcount))
297
        maxcnt = 1000
298
        totcnt = 0
299
        while True:
300
            docs = rclq.fetchmany()
301
            for doc in docs:
302
                allthedocs.append(doc)
303
                totcnt += 1
304
            if (maxcnt > 0 and totcnt >= maxcnt) or \
305
                   len(docs) != rclq.arraysize:
306
                break
307
        uplog("Retrieved %d docs" % (totcnt,))
308
        return allthedocs
309
    
310
    start = timer()
311
    docs = fetchalldocs(confdir)
312
    end = timer()
313
    uplog("Recoll extract took %.2f Seconds" % (end - start))
314
    start = timer()
315
    recolltosql(docs)
316
    end = timer()
317
    uplog("SQL db create took %.2f Seconds" % (end - start))
318