upmpdcli / Code / Diff of /src/mediaserver/cdplugins/uprcl/uprclsearch.py

Diff of /src/mediaserver/cdplugins/uprcl/uprclsearch.py [c2a515] .. [b1ab1b]

Switch to unified view


...
        if s[j].isspace():
            return j,w
        w += s[j]
    return j,w

# Called with '"' already read.

# Upnp search term strings are double quoted, but we should not take
# them as recoll phrases. We separate parts which are internally
# quoted, and become phrases, and lists of words which we interpret as
# an and search (comma-separated). Internal quotes come backslash-escaped
def _parsestring(s, i=0):
    uplog("parseString: input: <%s>" % s[i:])
    # First change '''"hello \"one phrase\"''' world" into
    #  '''hello "one phrase" world'''
    # Note that we can't handle quoted dquotes inside string
    str = ''
    escape = False
    instring = False
    for j in range(i, len(s)):
        if instring:

            if escape:
                if s[j] == '"':
                    str += '"'
                    instring = False
                else:
                    str += '\\' + s[j]
                escape = False
            else:
                if s[j] == '\\':
                    escape = True
                else:
                    str += s[j]






        else:
            if escape:
                str += s[j]
                escape = False
                if s[j] == '"':
                    instring = True
            else:
                if s[j] == '\\':
                    escape = True
                elif s[j] == '"':
                    j += 2
                    break
                else:
                    str += s[j]
                
    tokens = stringToStrings(str)
    return j, tokens

def _appendterms(out, v, field, oper):
    uplog("_appendterms: v %s field <%s> oper <%s>" % (v,field,oper))
    swords = ""
    phrases = []
    for w in v:
        if len(w.split()) == 1:
            if swords:
                swords += ","
            swords += w
        else:
            phrases.append(w)
    out.append(swords)
    for ph in phrases:
        out.append(field)
        out.append(oper)
        out.append('"' + ph + '"')
            




def upnpsearchtorecoll(s):
    uplog("upnpsearchtorecoll:in: <%s>" % s)

    s = re.sub('[\t\n\r\f ]+', ' ', s)

    out = []
    hadDerived = False
    i = 0
    field = ""
    oper = ""
    while True:
        i,c = _getchar(s, i)
        if not c:
            break

...
                   (len(s[i:]) and not s[i:].isspace()):
                raise Exception("If * is used it must be the only input")
            out = ["mime:*"]
            break

        if c == '(' or c == ')': 
            out.append(c)
        elif c == '>' or c == '<' or c == '=':
            oper += c
        else:
            if c == '"':
                i,v = _parsestring(s, i)
                uplog("_parsestring ret: %s" % v)
                _appendterms(out, v, field, oper)
                oper = ""
                field = ""
                continue
            else:
                i -= 1
                i,w = _readword(s, i)

            #print("Got word [%s]" % w)
            if w == 'contains':
                out.append(':')
                oper = ':'
            elif w == 'doesNotContain':
                if len(out) < 1:
                    raise Exception("doesNotContain can't be the first word")
                out.insert(-1, "-")
                out.append(':')
                oper = ':'
            elif w == 'derivedFrom':
                hadDerived = True
                out.append(':')
                oper = ':'
            elif w == 'true':
                out.append('*')
                oper = ""
            elif w == 'false':
                out.append('xxxjanzocsduochterrrrm')
            elif w == 'exists':
                out.append(':')
                oper = ':'
            elif w == 'and':
                # Recoll has implied AND, but see next
                pass
            elif w == 'or':
                # Does not work because OR/AND priorities are reversed
                # between recoll and upnp. This would be very
                # difficult to correct, let's hope that the callers
                # use parentheses
                out.append('OR')
            else:
                field = upnp2rclfields[w]





                out.append(field)
                oper = ""

    ostr = ""
    for tok in out:
        ostr += tok + " "
    uplog("upnpsearchtorecoll:out: <%s>" % ostr)
...

def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix):
    rcls = upnpsearchtorecoll(upnps)

    filterdir = uprclfolders.dirpath(objid)
    if filterdir and filterdir != "/":
        rcls += " dir:\"" + filterdir + "\""
    
    uplog("Search: recoll search: <%s>" % rcls)

    rcldb = recoll.connect(confdir=rclconfdir)
    try:
        rclq = rcldb.query()
        rclq.execute(rcls)
...
    if rclq.rowcount == 0:
        return []
    
    entries = []
    maxcnt = 0

    while True:
        docs = rclq.fetchmany()
        for doc in docs:
            id = idprefix + '$' + 'seeyoulater'
            e = rcldoctoentry(id, objid, httphp, pathprefix, doc)
            if e:
                entries.append(e)
        if (maxcnt > 0 and len(entries) >= maxcnt) or \
               len(docs) != rclq.arraysize:
            break
    uplog("Search retrieved %d docs" % (len(entries),))

    return sorted(entries, cmp=cmpentries)



if __name__ == '__main__':
    s = '(upnp:artist derivedFrom  "abc\\"def\\g") or (dc:title:xxx) '

	a/src/mediaserver/cdplugins/uprcl/uprclsearch.py		b/src/mediaserver/cdplugins/uprcl/uprclsearch.py
	...		...
19	if s[j].isspace():	19	if s[j].isspace():
20	return j,w	20	return j,w
21	w += s[j]	21	w += s[j]
22	return j,w	22	return j,w
23		23
24	# Called with '"' already read:	24	# Called with '"' already read.
		25
		26	# Upnp search term strings are double quoted, but we should not take
		27	# them as recoll phrases. We separate parts which are internally
		28	# quoted, and become phrases, and lists of words which we interpret as
		29	# an and search (comma-separated). Internal quotes come backslash-escaped
25	def _readstring(s, i):	30	def _parsestring(s, i=0):
		31	uplog("parseString: input: <%s>" % s[i:])
		32	# First change '''"hello \"one phrase\"''' world" into
		33	# '''hello "one phrase" world'''
		34	# Note that we can't handle quoted dquotes inside string
26	str = '"'	35	str = ''
27	escape = False	36	escape = False
		37	instring = False
28	for j in range(i, len(s)):	38	for j in range(i, len(s)):
29	#print("s[j] [%s] out now [%s]" % (s[j],out))	39	if instring:
30	if s[j] == '\\':
31	if not escape:	40	if escape:
		41	if s[j] == '"':
		42	str += '"'
		43	instring = False
		44	else:
		45	str += '\\' + s[j]
		46	escape = False
		47	else:
		48	if s[j] == '\\':
32	escape = True	49	escape = True
		50	else:
33	str += '\\'	51	str += s[j]
34	continue
35		52
36	if s[j] == '"':
37	str += '"'
38	if not escape:
39	return j+1, str
40	else:	53	else:
		54	if escape:
41	str += s[j]	55	str += s[j]
		56	escape = False
		57	if s[j] == '"':
		58	instring = True
		59	else:
		60	if s[j] == '\\':
		61	escape = True
		62	elif s[j] == '"':
		63	j += 2
		64	break
		65	else:
		66	str += s[j]
		67
		68	tokens = stringToStrings(str)
		69	return j, tokens
		70
		71	def _appendterms(out, v, field, oper):
		72	uplog("_appendterms: v %s field <%s> oper <%s>" % (v,field,oper))
		73	swords = ""
		74	phrases = []
		75	for w in v:
		76	if len(w.split()) == 1:
		77	if swords:
		78	swords += ","
		79	swords += w
		80	else:
		81	phrases.append(w)
		82	out.append(swords)
		83	for ph in phrases:
		84	out.append(field)
		85	out.append(oper)
		86	out.append('"' + ph + '"')
42		87
43	escape = False
44
45	return len(s), str
46
47	def upnpsearchtorecoll(s):	88	def upnpsearchtorecoll(s):
48	uplog("upnpsearchtorecoll:in: <%s>" % s)	89	uplog("upnpsearchtorecoll:in: <%s>" % s)
49		90
50	s = re.sub('[\t\n\r\f ]+', ' ', s)	91	s = re.sub('[\t\n\r\f ]+', ' ', s)
51		92
52	out = []	93	out = []
53	hadDerived = False	94	hadDerived = False
54	i = 0	95	i = 0
		96	field = ""
		97	oper = ""
55	while True:	98	while True:
56	i,c = _getchar(s, i)	99	i,c = _getchar(s, i)
57	if not c:	100	if not c:
58	break	101	break
59		102
	...		...
65	(len(s[i:]) and not s[i:].isspace()):	108	(len(s[i:]) and not s[i:].isspace()):
66	raise Exception("If * is used it must be the only input")	109	raise Exception("If * is used it must be the only input")
67	out = ["mime:*"]	110	out = ["mime:*"]
68	break	111	break
69		112
70	if c == '(' or c == ')' or c == '>' or c == '<' or c == '=':	113	if c == '(' or c == ')':
71	out.append(c)	114	out.append(c)
		115	elif c == '>' or c == '<' or c == '=':
		116	oper += c
72	else:	117	else:
73	if c == '"':	118	if c == '"':
74	i,w = _readstring(s, i)	119	i,v = _parsestring(s, i)
75	if not w.endswith('"'):	120	uplog("_parsestring ret: %s" % v)
76	raise Exception("Unterminated string in [%s]" % out)	121	_appendterms(out, v, field, oper)
		122	oper = ""
		123	field = ""
		124	continue
77	else:	125	else:
78	i -= 1	126	i -= 1
79	i,w = _readword(s, i)	127	i,w = _readword(s, i)
80		128
81	#print("Got word [%s]" % w)	129	#print("Got word [%s]" % w)
82	if w == 'contains':	130	if w == 'contains':
83	out.append(':')	131	out.append(':')
		132	oper = ':'
84	elif w == 'doesNotContain':	133	elif w == 'doesNotContain':
85	if len(out) < 1:	134	if len(out) < 1:
86	raise Exception("doesNotContain can't be the first word")	135	raise Exception("doesNotContain can't be the first word")
87	out.insert(-1, "-")	136	out.insert(-1, "-")
88	out.append(':')	137	out.append(':')
		138	oper = ':'
89	elif w == 'derivedFrom':	139	elif w == 'derivedFrom':
90	hadDerived = True	140	hadDerived = True
91	out.append(':')	141	out.append(':')
		142	oper = ':'
92	elif w == 'true':	143	elif w == 'true':
93	out.append('*')	144	out.append('*')
		145	oper = ""
94	elif w == 'false':	146	elif w == 'false':
95	out.append('xxxjanzocsduochterrrrm')	147	out.append('xxxjanzocsduochterrrrm')
96	elif w == 'exists':	148	elif w == 'exists':
97	out.append(':')	149	out.append(':')
		150	oper = ':'
98	elif w == 'and':	151	elif w == 'and':
99	# Recoll has implied AND, but see next	152	# Recoll has implied AND, but see next
100	pass	153	pass
101	elif w == 'or':	154	elif w == 'or':
102	# Does not work because OR/AND priorities are reversed	155	# Does not work because OR/AND priorities are reversed
103	# between recoll and upnp. This would be very	156	# between recoll and upnp. This would be very
104	# difficult to correct, let's hope that the callers	157	# difficult to correct, let's hope that the callers
105	# use parentheses	158	# use parentheses
106	out.append('OR')	159	out.append('OR')
107	else:	160	else:
108	if hadDerived:	161	field = upnp2rclfields[w]
109	hadDerived = False
110	if len(w) >= 1 and w[-1] == '"':
111	w = w[:-1] + '*' + '"'
112	else:
113	w += '*'
114	out.append(w)	162	out.append(field)
		163	oper = ""
115		164
116	ostr = ""	165	ostr = ""
117	for tok in out:	166	for tok in out:
118	ostr += tok + " "	167	ostr += tok + " "
119	uplog("upnpsearchtorecoll:out: <%s>" % ostr)	168	uplog("upnpsearchtorecoll:out: <%s>" % ostr)
	...		...
122		171
123	def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix):	172	def search(rclconfdir, objid, upnps, idprefix, httphp, pathprefix):
124	rcls = upnpsearchtorecoll(upnps)	173	rcls = upnpsearchtorecoll(upnps)
125		174
126	filterdir = uprclfolders.dirpath(objid)	175	filterdir = uprclfolders.dirpath(objid)
		176	if filterdir and filterdir != "/":
127	rcls += " dir:\"" + filterdir + "\""	177	rcls += " dir:\"" + filterdir + "\""
128		178
129	uplog("Search: recoll search: %s" % rcls)	179	uplog("Search: recoll search: <%s>" % rcls)
130		180
131	rcldb = recoll.connect(confdir=rclconfdir)	181	rcldb = recoll.connect(confdir=rclconfdir)
132	try:	182	try:
133	rclq = rcldb.query()	183	rclq = rcldb.query()
134	rclq.execute(rcls)	184	rclq.execute(rcls)
	...		...
140	if rclq.rowcount == 0:	190	if rclq.rowcount == 0:
141	return []	191	return []
142		192
143	entries = []	193	entries = []
144	maxcnt = 0	194	maxcnt = 0
145	totcnt = 0
146	while True:	195	while True:
147	docs = rclq.fetchmany()	196	docs = rclq.fetchmany()
148	for doc in docs:	197	for doc in docs:
149	id = idprefix + '$' + 'seeyoulater'	198	id = idprefix + '$' + 'seeyoulater'
150	e = rcldoctoentry(id, objid, httphp, pathprefix, doc)	199	e = rcldoctoentry(id, objid, httphp, pathprefix, doc)
		200	if e:
151	entries.append(e)	201	entries.append(e)
152	totcnt += 1	202	if (maxcnt > 0 and len(entries) >= maxcnt) or \
153	if (maxcnt > 0 and totcnt >= maxcnt) or len(docs) != rclq.arraysize:	203	len(docs) != rclq.arraysize:
154	break	204	break
155	uplog("Search retrieved %d docs" % (totcnt,))	205	uplog("Search retrieved %d docs" % (len(entries),))
156		206
157	return entries	207	return sorted(entries, cmp=cmpentries)
158		208
159		209
160		210
161	if __name__ == '__main__':	211	if __name__ == '__main__':
162	s = '(upnp:artist derivedFrom "abc\\"def\\g") or (dc:title:xxx) '	212	s = '(upnp:artist derivedFrom "abc\\"def\\g") or (dc:title:xxx) '