Switch to unified view

a/src/mediaserver/cdplugins/uprcl/uprclsearch.py b/src/mediaserver/cdplugins/uprcl/uprclsearch.py
...
...
19
        if s[j].isspace():
19
        if s[j].isspace():
20
            return j,w
20
            return j,w
21
        w += s[j]
21
        w += s[j]
22
    return j,w
22
    return j,w
23
23
24
# Called with '"' already read:
24
# Called with '"' already read.
25
26
# Upnp search term strings are double quoted, but we should not take
27
# them as recoll phrases. We separate parts which are internally
28
# quoted, and become phrases, and lists of words which we interpret as
29
# an and search (comma-separated). Internal quotes come backslash-escaped
25
def _readstring(s, i):
30
def _parsestring(s, i=0):
31
    uplog("parseString: input: <%s>" % s[i:])
32
    # First change '''"hello \"one phrase\"''' world" into
33
    #  '''hello "one phrase" world'''
34
    # Note that we can't handle quoted dquotes inside string
26
    str = '"'
35
    str = ''
27
    escape = False
36
    escape = False
37
    instring = False
28
    for j in range(i, len(s)):
38
    for j in range(i, len(s)):
29
        #print("s[j] [%s] out now [%s]" % (s[j],out))
39
        if instring:
30
        if s[j] == '\\':
31
            if not escape:
40
            if escape:
41
                if s[j] == '"':
42
                    str += '"'
43
                    instring = False
44
                else:
45
                    str += '\\' + s[j]
46
                escape = False
47
            else:
48
                if s[j] == '\\':
32
                escape = True
49
                    escape = True
50
                else:
33
                str += '\\'
51
                    str += s[j]
34
            continue
35
52
36
        if s[j] == '"':
37
            str += '"'
38
            if not escape:
39
                return j+1, str
40
        else:
53
        else:
54
            if escape:
41
            str += s[j]
55
                str += s[j]
56
                escape = False
57
                if s[j] == '"':
58
                    instring = True
59
            else:
60
                if s[j] == '\\':
61
                    escape = True
62
                elif s[j] == '"':
63
                    j += 2
64
                    break
65
                else:
66
                    str += s[j]
67
                
68
    tokens = stringToStrings(str)
69
    return j, tokens
70
71
def _appendterms(out, v, field, oper):
72
    uplog("_appendterms: v %s field <%s> oper <%s>" % (v,field,oper))
73
    swords = ""
74
    phrases = []
75
    for w in v:
76
        if len(w.split()) == 1:
77
            if swords:
78
                swords += ","
79
            swords += w
80
        else:
81
            phrases.append(w)
82
    out.append(swords)
83
    for ph in phrases:
84
        out.append(field)
85
        out.append(oper)
86
        out.append('"' + ph + '"')
42
        
87
            
43
        escape = False        
44
45
    return len(s), str
46
47
def upnpsearchtorecoll(s):
88
def upnpsearchtorecoll(s):
48
    uplog("upnpsearchtorecoll:in: <%s>" % s)
89
    uplog("upnpsearchtorecoll:in: <%s>" % s)
49
90
50
    s = re.sub('[\t\n\r\f ]+', ' ', s)
91
    s = re.sub('[\t\n\r\f ]+', ' ', s)
51
92
52
    out = []
93
    out = []
53
    hadDerived = False
94
    hadDerived = False
54
    i = 0
95
    i = 0
96
    field = ""
97
    oper = ""
55
    while True:
98
    while True:
56
        i,c = _getchar(s, i)
99
        i,c = _getchar(s, i)
57
        if not c:
100
        if not c:
58
            break
101
            break
59
102
...
...
65
                   (len(s[i:]) and not s[i:].isspace()):
108
                   (len(s[i:]) and not s[i:].isspace()):
66
                raise Exception("If * is used it must be the only input")
109
                raise Exception("If * is used it must be the only input")
67
            out = ["mime:*"]
110
            out = ["mime:*"]
68
            break
111
            break
69
112
70
        if c == '(' or c == ')' or c == '>' or c == '<' or c == '=':
113
        if c == '(' or c == ')': 
71
            out.append(c)
114
            out.append(c)
115
        elif c == '>' or c == '<' or c == '=':
116
            oper += c
72
        else:
117
        else:
73
            if c == '"':
118
            if c == '"':
74
                i,w = _readstring(s, i)
119
                i,v = _parsestring(s, i)
75
                if not w.endswith('"'):
120
                uplog("_parsestring ret: %s" % v)
76
                    raise Exception("Unterminated string in [%s]" % out)
121
                _appendterms(out, v, field, oper)
122
                oper = ""
123
                field = ""
124
                continue
77
            else:
125
            else:
78
                i -= 1
126
                i -= 1
79
                i,w = _readword(s, i)
127
                i,w = _readword(s, i)
80
128
81
            #print("Got word [%s]" % w)
129
            #print("Got word [%s]" % w)
82
            if w == 'contains':
130
            if w == 'contains':
83
                out.append(':')
131
                out.append(':')
132
                oper = ':'
84
            elif w == 'doesNotContain':
133
            elif w == 'doesNotContain':
85
                if len(out) < 1:
134
                if len(out) < 1:
86
                    raise Exception("doesNotContain can't be the first word")
135
                    raise Exception("doesNotContain can't be the first word")
87
                out.insert(-1, "-")
136
                out.insert(-1, "-")
88
                out.append(':')
137
                out.append(':')
138
                oper = ':'
89
            elif w == 'derivedFrom':
139
            elif w == 'derivedFrom':
90
                hadDerived = True
140
                hadDerived = True
91
                out.append(':')
141
                out.append(':')
142
                oper = ':'
92
            elif w == 'true':
143
            elif w == 'true':
93
                out.append('*')
144
                out.append('*')
145
                oper = ""
94
            elif w == 'false':
146
            elif w == 'false':
95
                out.append('xxxjanzocsduochterrrrm')
147
                out.append('xxxjanzocsduochterrrrm')
96
            elif w == 'exists':
148
            elif w == 'exists':
97
                out.append(':')
149
                out.append(':')
150
                oper = ':'
98
            elif w == 'and':
151
            elif w == 'and':
99
                # Recoll has implied AND, but see next
152
                # Recoll has implied AND, but see next
100
                pass
153
                pass
101
            elif w == 'or':
154
            elif w == 'or':
102
                # Does not work because OR/AND priorities are reversed
155
                # Does not work because OR/AND priorities are reversed
103
                # between recoll and upnp. This would be very
156
                # between recoll and upnp. This would be very
104
                # difficult to correct, let's hope that the callers
157
                # difficult to correct, let's hope that the callers
105
                # use parentheses
158
                # use parentheses
106
                out.append('OR')
159
                out.append('OR')
107
            else:
160
            else:
108
                if hadDerived:
161
                field = upnp2rclfields[w]
109
                    hadDerived = False
110
                    if len(w) >= 1 and w[-1] == '"':
111
                        w = w[:-1] + '*' + '"'
112
                    else:
113
                        w += '*'
114
                out.append(w)
162
                out.append(field)
163
                oper = ""
115
164
116
    ostr = ""
165
    ostr = ""
117
    for tok in out:
166
    for tok in out:
118
        ostr += tok + " "
167
        ostr += tok + " "
119
    uplog("upnpsearchtorecoll:out: <%s>" % ostr)
168
    uplog("upnpsearchtorecoll:out: <%s>" % ostr)
...
...
122
171
123
def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix):
172
def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix):
124
    rcls = upnpsearchtorecoll(upnps)
173
    rcls = upnpsearchtorecoll(upnps)
125
174
126
    filterdir = uprclfolders.dirpath(objid)
175
    filterdir = uprclfolders.dirpath(objid)
176
    if filterdir and filterdir != "/":
127
    rcls += " dir:\"" + filterdir + "\""
177
        rcls += " dir:\"" + filterdir + "\""
128
    
178
    
129
    uplog("Search: recoll search: %s" % rcls)
179
    uplog("Search: recoll search: <%s>" % rcls)
130
180
131
    rcldb = recoll.connect(confdir=rclconfdir)
181
    rcldb = recoll.connect(confdir=rclconfdir)
132
    try:
182
    try:
133
        rclq = rcldb.query()
183
        rclq = rcldb.query()
134
        rclq.execute(rcls)
184
        rclq.execute(rcls)
...
...
140
    if rclq.rowcount == 0:
190
    if rclq.rowcount == 0:
141
        return []
191
        return []
142
    
192
    
143
    entries = []
193
    entries = []
144
    maxcnt = 0
194
    maxcnt = 0
145
    totcnt = 0
146
    while True:
195
    while True:
147
        docs = rclq.fetchmany()
196
        docs = rclq.fetchmany()
148
        for doc in docs:
197
        for doc in docs:
149
            id = idprefix + '$' + 'seeyoulater'
198
            id = idprefix + '$' + 'seeyoulater'
150
            e = rcldoctoentry(id, objid, httphp, pathprefix, doc)
199
            e = rcldoctoentry(id, objid, httphp, pathprefix, doc)
200
            if e:
151
            entries.append(e)
201
                entries.append(e)
152
            totcnt += 1
202
        if (maxcnt > 0 and len(entries) >= maxcnt) or \
153
        if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:
203
               len(docs) != rclq.arraysize:
154
            break
204
            break
155
    uplog("Search retrieved %d docs" % (totcnt,))
205
    uplog("Search retrieved %d docs" % (len(entries),))
156
206
157
    return entries
207
    return sorted(entries, cmp=cmpentries)
158
208
159
209
160
210
161
if __name__ == '__main__':
211
if __name__ == '__main__':
162
    s = '(upnp:artist derivedFrom  "abc\\"def\\g") or (dc:title:xxx) '
212
    s = '(upnp:artist derivedFrom  "abc\\"def\\g") or (dc:title:xxx) '