|
a/src/python/xesam/xesam-recoll-service |
|
b/src/python/xesam/xesam-recoll-service |
|
... |
|
... |
71 |
|
71 |
|
72 |
SLICE = 10
|
72 |
SLICE = 10
|
73 |
|
73 |
|
74 |
def __init__ (self, searcher, session, search_handle, \
|
74 |
def __init__ (self, searcher, session, search_handle, \
|
75 |
query=None, xml=None) :
|
75 |
query=None, xml=None) :
|
|
|
76 |
# Our parent class does possible xml-to-query parsing. We end up
|
|
|
77 |
# with a query
|
76 |
xesam.server.Search.__init__ (self, searcher, session, search_handle, \
|
78 |
xesam.server.Search.__init__ (self, searcher, session, search_handle, \
|
77 |
query=query, xml=xml)
|
79 |
query=query, xml=xml)
|
78 |
|
80 |
|
79 |
self._hit_fields = session.get_property (xesam.SESSION_HIT_FIELDS)
|
81 |
self._hit_fields = session.get_property (xesam.SESSION_HIT_FIELDS)
|
80 |
if self._hit_fields is None:
|
82 |
if self._hit_fields is None:
|
|
... |
|
... |
83 |
self._hit_fields = ["xesam:url"]
|
85 |
self._hit_fields = ["xesam:url"]
|
84 |
xesam.debug("RecollSearch: fields:" % self._hit_fields)
|
86 |
xesam.debug("RecollSearch: fields:" % self._hit_fields)
|
85 |
xesam.debug ("Created %s with handle %s and query:\n%s" %
|
87 |
xesam.debug ("Created %s with handle %s and query:\n%s" %
|
86 |
(self.__class__, self.get_handle(), self.get_query()))
|
88 |
(self.__class__, self.get_handle(), self.get_query()))
|
87 |
|
89 |
|
88 |
# Only user queries for now...
|
90 |
# Instantiate a recoll query
|
89 |
if not isinstance(self.get_query(), xesam.query.UserQuery):
|
|
|
90 |
raise Exception ("Only UserQuery supported ATM, sorry.")
|
|
|
91 |
self.rclquery = self._searcher.rcldb.query()
|
91 |
self.rclquery = self._searcher.rcldb.query()
|
92 |
|
92 |
|
93 |
# In the latest version (>0.95), primary/secondary is replaced by
|
93 |
# In the latest version (>0.95), primary/secondary is replaced by
|
94 |
# a field list.
|
94 |
# a field list.
|
95 |
sortfield = session.get_property(xesam.SESSION_SORT_PRIMARY)
|
95 |
sortfield = session.get_property(xesam.SESSION_SORT_PRIMARY)
|
96 |
order = session.get_property(xesam.SESSION_SORT_ORDER)
|
96 |
order = session.get_property(xesam.SESSION_SORT_ORDER)
|
97 |
|
97 |
|
|
... |
|
... |
103 |
# END TEST
|
103 |
# END TEST
|
104 |
|
104 |
|
105 |
if sortfield:
|
105 |
if sortfield:
|
106 |
self.rclquery.sortby(sortfield, order == "ascending" and 1 or 0)
|
106 |
self.rclquery.sortby(sortfield, order == "ascending" and 1 or 0)
|
107 |
|
107 |
|
|
|
108 |
def doc_to_hit(self, doc):
|
|
|
109 |
data = []
|
|
|
110 |
for fld in self._hit_fields:
|
|
|
111 |
# Need to handle ContentCategory and SourceCategory
|
|
|
112 |
fld = fld.lower().replace("xesam:", "")
|
|
|
113 |
xesam.debug("Adding data for fld %s" % (fld))
|
|
|
114 |
if fld == "snippet":
|
|
|
115 |
data.append(self._searcher.rcldb.makeDocAbstract(doc,
|
|
|
116 |
self.rclquery))
|
|
|
117 |
elif fld == "contentmodified":
|
|
|
118 |
data.append(timestampToIso8601(getattr(doc, "mtime")))
|
|
|
119 |
else:
|
|
|
120 |
data.append(getattr(doc, fld, ""))
|
|
|
121 |
return data
|
|
|
122 |
|
108 |
def start (self):
|
123 |
def start (self):
|
109 |
xesam.debug ("RecollSearch '%s' got [%s]" %
|
124 |
xesam.debug ("RecollSearch")
|
110 |
(self.get_handle(), self.get_query().get_string()))
|
125 |
|
|
|
126 |
if isinstance(self.get_query(), xesam.query.UserQuery):
|
111 |
self.nres = self.rclquery.execute(self.get_query().get_string())
|
127 |
self.nres = self.rclquery.execute(self.get_query().get_string())
|
|
|
128 |
elif isinstance(self.get_query(), xesam.query.CompositeQuery):
|
|
|
129 |
self.build_search(None)
|
|
|
130 |
self.nres = self.rclquery.executesd(self.searchdata)
|
|
|
131 |
else:
|
|
|
132 |
raise Exception ("Neither UserQuery nor composite query ??")
|
|
|
133 |
|
112 |
hits = 0
|
134 |
hits = 0
|
113 |
done = 0
|
135 |
done = 0
|
114 |
while self.rclquery.next >= 0 and self.rclquery.next < self.nres:
|
136 |
while self.rclquery.next >= 0 and self.rclquery.next < self.nres:
|
115 |
doc = self.rclquery.fetchone()
|
137 |
doc = self.rclquery.fetchone()
|
116 |
data = []
|
138 |
data = self.doc_to_hit(doc)
|
117 |
for fld in self._hit_fields:
|
|
|
118 |
# Need to handle ContentCategory and SourceCategory
|
|
|
119 |
fld = fld.lower().replace("xesam:", "")
|
|
|
120 |
xesam.debug("Adding data for fld %s" % (fld))
|
|
|
121 |
if fld == "snippet":
|
|
|
122 |
data.append(self._searcher.rcldb.makeDocAbstract(doc,
|
|
|
123 |
self.rclquery))
|
|
|
124 |
elif fld == "contentmodified":
|
|
|
125 |
data.append(timestampToIso8601(getattr(doc, "mtime")))
|
|
|
126 |
else:
|
|
|
127 |
data.append(getattr(doc, fld, ""))
|
|
|
128 |
self.add_new_hit (self._hit_fields, data)
|
139 |
self.add_new_hit (self._hit_fields, data)
|
129 |
hits += 1
|
140 |
hits += 1
|
130 |
if hits >= self.SLICE:
|
141 |
if hits >= self.SLICE:
|
131 |
break
|
142 |
break
|
132 |
else:
|
143 |
else:
|
|
... |
|
... |
147 |
|
158 |
|
148 |
if self._stopped:
|
159 |
if self._stopped:
|
149 |
return xesam.server.Search.get_hits(self, num_hits)
|
160 |
return xesam.server.Search.get_hits(self, num_hits)
|
150 |
|
161 |
|
151 |
hits = 0
|
162 |
hits = 0
|
152 |
done = 0;
|
163 |
done = 0
|
153 |
while self.rclquery.next >= 0 and self.rclquery.next < self.nres:
|
164 |
while self.rclquery.next >= 0 and self.rclquery.next < self.nres:
|
154 |
doc = self.rclquery.fetchone()
|
165 |
doc = self.rclquery.fetchone()
|
155 |
data = []
|
166 |
data = self.doc_to_hit(doc)
|
156 |
for fld in self._hit_fields:
|
|
|
157 |
if self.FLDTRANS.has_key (fld):
|
|
|
158 |
data.append(self.FLDTRANS[fld](doc))
|
|
|
159 |
else:
|
|
|
160 |
data.append("")
|
|
|
161 |
self.add_new_hit (self._hit_fields, data)
|
167 |
self.add_new_hit (self._hit_fields, data)
|
162 |
hits += 1
|
168 |
hits += 1
|
163 |
if hits >= self.SLICE or hits >= num_hits:
|
169 |
if hits >= self.SLICE or hits >= num_hits:
|
164 |
break
|
170 |
break
|
165 |
else:
|
171 |
else:
|
|
... |
|
... |
174 |
xesam.debug ("Search '%s' emitted 'done'" % self.get_handle())
|
180 |
xesam.debug ("Search '%s' emitted 'done'" % self.get_handle())
|
175 |
self.stop()
|
181 |
self.stop()
|
176 |
|
182 |
|
177 |
return xesam.server.Search.get_hits(self, num_hits)
|
183 |
return xesam.server.Search.get_hits(self, num_hits)
|
178 |
|
184 |
|
|
|
185 |
# Build Recoll searchData tree out of Xesam CompositeQuery
|
|
|
186 |
# This is recursive, parent is the current SearchData into which we are
|
|
|
187 |
# adding clauses/subsearches.
|
|
|
188 |
def build_search(self, parent = None):
|
|
|
189 |
|
|
|
190 |
Arrete apres le hackfest parce qu'apparemment il y a de gros changements la
|
|
|
191 |
dedans.
|
|
|
192 |
|
|
|
193 |
Il manque plein de trucs, comme traiter les "names" des selectors (see
|
|
|
194 |
xesam/query.py: SELECTORS, traiter les differents types de values, et leurs
|
|
|
195 |
attributs (phrase, slack, ordered etc, cf xesam/query.py: StringValue
|
|
|
196 |
|
|
|
197 |
|
|
|
198 |
xq = self.get_query()
|
|
|
199 |
cl = xq.get_clause()
|
|
|
200 |
if isinstance(cl, xesam.query.SelectorClause):
|
|
|
201 |
xesam.debug ("Selector clause")
|
|
|
202 |
if parent is None:
|
|
|
203 |
# Top clause is a selector. Needs to have a parent
|
|
|
204 |
self.searchdata = recoll.SearchData()
|
|
|
205 |
parent = self.searchdata
|
|
|
206 |
|
|
|
207 |
# Fields. If there are several we must turn this into an OR
|
|
|
208 |
# combination of searches on the different fields
|
|
|
209 |
nfields = length(cl.get_fields())
|
|
|
210 |
if nfields > 1:
|
|
|
211 |
orsearch = recoll.SearchData(type = "or")
|
|
|
212 |
for fld in cl.get_fields():
|
|
|
213 |
simple_selec_addclause(self, orsearch, cl, fld)
|
|
|
214 |
parent.addclause(type = "sub", subSearch = orsearch)
|
|
|
215 |
elif nfields == 1:
|
|
|
216 |
simple_selec_addclause(self, parent, cl, cl.get_fields[0])
|
|
|
217 |
else:
|
|
|
218 |
simple_selec_addclause(self, parent, cl)
|
|
|
219 |
elif isinstance(cl, xesam.query.CollectorClause):
|
|
|
220 |
xesam.debug ("Collector clause")
|
|
|
221 |
else:
|
|
|
222 |
raise Exception("Non Collector or Selector Clause found")
|
|
|
223 |
|
|
|
224 |
# Add Recoll clause for xesam selector clause, dealing with at most
|
|
|
225 |
# one field.
|
|
|
226 |
def simple_selec_addclause(self, parent, cl, fld = ""):
|
|
|
227 |
parent.addclause(type = "and", field = fld,
|
|
|
228 |
qstring = cl.get_value().get())
|
179 |
|
229 |
|
180 |
if __name__ == "__main__":
|
230 |
if __name__ == "__main__":
|
181 |
RecollServer().start()
|
231 |
RecollServer().start()
|