|
a/src/rcldb/searchdata.h |
|
b/src/rcldb/searchdata.h |
|
... |
|
... |
46 |
};
|
46 |
};
|
47 |
|
47 |
|
48 |
class SearchDataClause;
|
48 |
class SearchDataClause;
|
49 |
|
49 |
|
50 |
/**
|
50 |
/**
|
51 |
Data structure representing a Recoll user query, for translation
|
51 |
A SearchData object represents a Recoll user query, for translation
|
52 |
into a Xapian query tree. This could probably better called a 'question'.
|
52 |
into a Xapian query tree. This could probably better called a 'question'.
|
53 |
|
53 |
|
54 |
This is a list of search clauses combined through either OR or AND.
|
54 |
This is a list of SearchDataClause objects combined through either
|
|
|
55 |
OR or AND.
|
55 |
|
56 |
|
56 |
Clauses either reflect user entry in a query field: some text, a
|
57 |
Clauses either reflect user entry in a query field: some text, a
|
57 |
clause type (AND/OR/NEAR etc.), possibly a distance, or points to
|
58 |
clause type (AND/OR/NEAR etc.), possibly a distance, or are the
|
|
|
59 |
result of parsing query language input. A clause can also point to
|
58 |
another SearchData representing a subquery.
|
60 |
another SearchData representing a subquery.
|
59 |
|
61 |
|
60 |
The content of each clause when added may not be fully parsed yet
|
62 |
The content of each clause when added may not be fully parsed yet
|
61 |
(may come directly from a gui field). It will be parsed and may be
|
63 |
(may come directly from a gui field). It will be parsed and may be
|
62 |
translated to several queries in the Xapian sense, for exemple
|
64 |
translated to several queries in the Xapian sense, for exemple
|
63 |
several terms and phrases as would result from
|
65 |
several terms and phrases as would result from
|
64 |
["this is a phrase" term1 term2] .
|
66 |
["this is a phrase" term1 term2] .
|
65 |
|
67 |
|
66 |
This is why the clauses also have an AND/OR/... type.
|
68 |
This is why the clauses also have an AND/OR/... type. They are an
|
|
|
69 |
intermediate form between the primary user input and
|
|
|
70 |
the final Xapian::Query tree.
|
67 |
|
71 |
|
68 |
A phrase clause could be added either explicitly or using double quotes:
|
72 |
For example, a phrase clause could be added either explicitly or
|
69 |
{SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
|
73 |
using double quotes: {SCLT_PHRASE, [this is a phrase]} or as
|
|
|
74 |
{SCLT_XXX, ["this is a phrase"]}
|
70 |
|
75 |
|
71 |
*/
|
76 |
*/
|
72 |
class SearchData {
|
77 |
class SearchData {
|
73 |
public:
|
78 |
public:
|
74 |
SearchData(SClType tp, const string& stemlang)
|
79 |
SearchData(SClType tp, const string& stemlang)
|
75 |
: m_tp(tp), m_haveDates(false), m_maxSize(size_t(-1)),
|
80 |
: m_tp(tp), m_stemlang(stemlang)
|
76 |
m_minSize(size_t(-1)), m_haveWildCards(false), m_stemlang(stemlang)
|
|
|
77 |
{
|
81 |
{
|
78 |
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
82 |
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
79 |
m_tp = SCLT_OR;
|
83 |
m_tp = SCLT_OR;
|
|
|
84 |
commoninit();
|
80 |
}
|
85 |
}
|
81 |
SearchData()
|
86 |
SearchData()
|
82 |
: m_tp(SCLT_AND), m_haveDates(false), m_maxSize(size_t(-1)),
|
87 |
: m_tp(SCLT_AND), m_stemlang("english")
|
83 |
m_minSize(size_t(-1)), m_haveWildCards(false), m_stemlang("english")
|
|
|
84 |
{
|
88 |
{
|
|
|
89 |
commoninit();
|
85 |
}
|
90 |
}
|
86 |
|
91 |
|
87 |
~SearchData() {erase();}
|
92 |
~SearchData()
|
|
|
93 |
{
|
|
|
94 |
erase();
|
|
|
95 |
}
|
88 |
|
96 |
|
89 |
/** Make pristine */
|
97 |
/** Make pristine */
|
90 |
void erase();
|
98 |
void erase();
|
91 |
|
99 |
|
92 |
/** Is there anything but a file name search in here ? */
|
100 |
/** Is there anything but a file name search in here ? */
|
|
... |
|
... |
94 |
|
102 |
|
95 |
/** Do we have wildcards anywhere apart from filename searches ? */
|
103 |
/** Do we have wildcards anywhere apart from filename searches ? */
|
96 |
bool haveWildCards() {return m_haveWildCards;}
|
104 |
bool haveWildCards() {return m_haveWildCards;}
|
97 |
|
105 |
|
98 |
/** Translate to Xapian query. rcldb knows about the void* */
|
106 |
/** Translate to Xapian query. rcldb knows about the void* */
|
99 |
bool toNativeQuery(Rcl::Db &db, void *, int maxexp, int maxcl);
|
107 |
bool toNativeQuery(Rcl::Db &db, void *);
|
100 |
|
108 |
|
101 |
/** We become the owner of cl and will delete it */
|
109 |
/** We become the owner of cl and will delete it */
|
102 |
bool addClause(SearchDataClause *cl);
|
110 |
bool addClause(SearchDataClause *cl);
|
103 |
|
111 |
|
104 |
/** If this is a simple query (one field only, no distance clauses),
|
112 |
/** If this is a simple query (one field only, no distance clauses),
|
|
... |
|
... |
141 |
* initializing the query. It is stored here for usage in the GUI.
|
149 |
* initializing the query. It is stored here for usage in the GUI.
|
142 |
*/
|
150 |
*/
|
143 |
std::string getDescription() {return m_description;}
|
151 |
std::string getDescription() {return m_description;}
|
144 |
void setDescription(const std::string& d) {m_description = d;}
|
152 |
void setDescription(const std::string& d) {m_description = d;}
|
145 |
|
153 |
|
|
|
154 |
/** Return an XML version of the contents, for storage in search history
|
|
|
155 |
by the GUI */
|
146 |
string asXML();
|
156 |
string asXML();
|
|
|
157 |
|
147 |
void setTp(SClType tp)
|
158 |
void setTp(SClType tp)
|
148 |
{
|
159 |
{
|
149 |
m_tp = tp;
|
160 |
m_tp = tp;
|
150 |
}
|
161 |
}
|
|
|
162 |
|
|
|
163 |
void setMaxExpand(int max)
|
|
|
164 |
{
|
|
|
165 |
m_softmaxexpand = max;
|
|
|
166 |
}
|
|
|
167 |
bool getAutoDiac() {return m_autodiacsens;}
|
|
|
168 |
bool getAutoCase() {return m_autocasesens;}
|
|
|
169 |
int getMaxExp() {return m_maxexp;}
|
|
|
170 |
int getMaxCl() {return m_maxcl;}
|
|
|
171 |
|
151 |
friend class ::AdvSearch;
|
172 |
friend class ::AdvSearch;
|
|
|
173 |
|
152 |
private:
|
174 |
private:
|
153 |
// Combine type. Only SCLT_AND or SCLT_OR here
|
175 |
// Combine type. Only SCLT_AND or SCLT_OR here
|
154 |
SClType m_tp;
|
176 |
SClType m_tp;
|
155 |
// Complex query descriptor
|
177 |
// Complex query descriptor
|
156 |
std::vector<SearchDataClause*> m_query;
|
178 |
std::vector<SearchDataClause*> m_query;
|
|
... |
|
... |
182 |
std::string m_description;
|
204 |
std::string m_description;
|
183 |
std::string m_reason;
|
205 |
std::string m_reason;
|
184 |
bool m_haveWildCards;
|
206 |
bool m_haveWildCards;
|
185 |
std::string m_stemlang;
|
207 |
std::string m_stemlang;
|
186 |
|
208 |
|
|
|
209 |
// Parameters set at the start of ToNativeQuery because they need
|
|
|
210 |
// an rclconfig. Actually this does not make sense and it would be
|
|
|
211 |
// simpler to just pass an rclconfig to the constructor;
|
|
|
212 |
bool m_autodiacsens;
|
|
|
213 |
bool m_autocasesens;
|
|
|
214 |
int m_maxexp;
|
|
|
215 |
int m_maxcl;
|
|
|
216 |
|
|
|
217 |
// Parameters which are not part of the main query data but may influence
|
|
|
218 |
// translation in special cases.
|
|
|
219 |
// Maximum TermMatch (e.g. wildcard) expansion. This is normally set
|
|
|
220 |
// from the configuration with a high default, but may be set to a lower
|
|
|
221 |
// value during "find-as-you-type" operations from the GUI
|
|
|
222 |
int m_softmaxexpand;
|
|
|
223 |
|
187 |
bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
|
224 |
bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
|
188 |
bool clausesToQuery(Rcl::Db &db, SClType tp,
|
225 |
bool clausesToQuery(Rcl::Db &db, SClType tp,
|
189 |
std::vector<SearchDataClause*>& query,
|
226 |
std::vector<SearchDataClause*>& query,
|
190 |
string& reason, void *d, int, int);
|
227 |
string& reason, void *d);
|
|
|
228 |
void commoninit();
|
191 |
|
229 |
|
192 |
/* Copyconst and assignment private and forbidden */
|
230 |
/* Copyconst and assignment private and forbidden */
|
193 |
SearchData(const SearchData &) {}
|
231 |
SearchData(const SearchData &) {}
|
194 |
SearchData& operator=(const SearchData&) {return *this;};
|
232 |
SearchData& operator=(const SearchData&) {return *this;};
|
195 |
};
|
233 |
};
|
|
... |
|
... |
202 |
SearchDataClause(SClType tp)
|
240 |
SearchDataClause(SClType tp)
|
203 |
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
241 |
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
204 |
m_modifiers(SDCM_NONE), m_weight(1.0)
|
242 |
m_modifiers(SDCM_NONE), m_weight(1.0)
|
205 |
{}
|
243 |
{}
|
206 |
virtual ~SearchDataClause() {}
|
244 |
virtual ~SearchDataClause() {}
|
207 |
virtual bool toNativeQuery(Rcl::Db &db, void *, int maxexp, int maxcl) = 0;
|
245 |
virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
|
208 |
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
|
246 |
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
|
209 |
virtual std::string getReason() const {return m_reason;}
|
247 |
virtual std::string getReason() const {return m_reason;}
|
210 |
virtual void getTerms(HighlightData & hldata) const = 0;
|
248 |
virtual void getTerms(HighlightData & hldata) const = 0;
|
211 |
|
249 |
|
212 |
SClType getTp() const
|
250 |
SClType getTp() const
|
|
... |
|
... |
219 |
}
|
257 |
}
|
220 |
string getStemLang()
|
258 |
string getStemLang()
|
221 |
{
|
259 |
{
|
222 |
return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ?
|
260 |
return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ?
|
223 |
cstr_null : m_parentSearch->getStemLang();
|
261 |
cstr_null : m_parentSearch->getStemLang();
|
|
|
262 |
}
|
|
|
263 |
bool getAutoDiac()
|
|
|
264 |
{
|
|
|
265 |
return m_parentSearch ? m_parentSearch->getAutoDiac() : false;
|
|
|
266 |
}
|
|
|
267 |
bool getAutoCase()
|
|
|
268 |
{
|
|
|
269 |
return m_parentSearch ? m_parentSearch->getAutoCase() : true;
|
|
|
270 |
}
|
|
|
271 |
int getMaxExp()
|
|
|
272 |
{
|
|
|
273 |
return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
|
|
|
274 |
}
|
|
|
275 |
int getMaxCl()
|
|
|
276 |
{
|
|
|
277 |
return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
|
224 |
}
|
278 |
}
|
225 |
virtual void setModifiers(Modifier mod)
|
279 |
virtual void setModifiers(Modifier mod)
|
226 |
{
|
280 |
{
|
227 |
m_modifiers = mod;
|
281 |
m_modifiers = mod;
|
228 |
}
|
282 |
}
|
|
... |
|
... |
261 |
|
315 |
|
262 |
/**
|
316 |
/**
|
263 |
* "Simple" data clause with user-entered query text. This can include
|
317 |
* "Simple" data clause with user-entered query text. This can include
|
264 |
* multiple phrases and words, but no specified distance.
|
318 |
* multiple phrases and words, but no specified distance.
|
265 |
*/
|
319 |
*/
|
|
|
320 |
class TextSplitQ;
|
266 |
class SearchDataClauseSimple : public SearchDataClause {
|
321 |
class SearchDataClauseSimple : public SearchDataClause {
|
267 |
public:
|
322 |
public:
|
268 |
SearchDataClauseSimple(SClType tp, const std::string& txt,
|
323 |
SearchDataClauseSimple(SClType tp, const std::string& txt,
|
269 |
const std::string& fld = std::string())
|
324 |
const std::string& fld = std::string())
|
270 |
: SearchDataClause(tp), m_text(txt), m_field(fld)
|
325 |
: SearchDataClause(tp), m_text(txt), m_field(fld)
|
|
... |
|
... |
276 |
virtual ~SearchDataClauseSimple()
|
331 |
virtual ~SearchDataClauseSimple()
|
277 |
{
|
332 |
{
|
278 |
}
|
333 |
}
|
279 |
|
334 |
|
280 |
/** Translate to Xapian query */
|
335 |
/** Translate to Xapian query */
|
281 |
virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
|
336 |
virtual bool toNativeQuery(Rcl::Db &, void *);
|
282 |
|
337 |
|
283 |
virtual void getTerms(HighlightData& hldata) const
|
338 |
virtual void getTerms(HighlightData& hldata) const
|
284 |
{
|
339 |
{
|
285 |
hldata.append(m_hldata);
|
340 |
hldata.append(m_hldata);
|
286 |
}
|
341 |
}
|
|
... |
|
... |
294 |
}
|
349 |
}
|
295 |
protected:
|
350 |
protected:
|
296 |
std::string m_text; // Raw user entry text.
|
351 |
std::string m_text; // Raw user entry text.
|
297 |
std::string m_field; // Field specification if any
|
352 |
std::string m_field; // Field specification if any
|
298 |
HighlightData m_hldata;
|
353 |
HighlightData m_hldata;
|
|
|
354 |
int m_curcl;
|
|
|
355 |
|
|
|
356 |
bool processUserString(Rcl::Db &db, const string &iq, int mods,
|
|
|
357 |
std::string &ermsg,
|
|
|
358 |
void* pq, int slack = 0, bool useNear = false);
|
|
|
359 |
bool expandTerm(Rcl::Db &db, std::string& ermsg, int mods,
|
|
|
360 |
const std::string& term,
|
|
|
361 |
std::vector<std::string>& exp,
|
|
|
362 |
std::string& sterm, const std::string& prefix);
|
|
|
363 |
// After splitting entry on whitespace: process non-phrase element
|
|
|
364 |
void processSimpleSpan(Rcl::Db &db, string& ermsg, const string& span,
|
|
|
365 |
int mods, void *pq);
|
|
|
366 |
// Process phrase/near element
|
|
|
367 |
void processPhraseOrNear(Rcl::Db &db, string& ermsg, TextSplitQ *splitData,
|
|
|
368 |
int mods, void *pq, bool useNear, int slack);
|
299 |
};
|
369 |
};
|
300 |
|
370 |
|
301 |
/**
|
371 |
/**
|
302 |
* Filename search clause. This is special because term expansion is only
|
372 |
* Filename search clause. This is special because term expansion is only
|
303 |
* performed against the unsplit file name terms.
|
373 |
* performed against the unsplit file name terms.
|
304 |
*
|
374 |
*
|
305 |
* There is a big advantage in expanding only against the
|
375 |
* There is a big advantage in expanding only against the
|
306 |
* field, especially for file names, because this makes searches for
|
376 |
* field, especially for file names, because this makes searches for
|
307 |
* "*xx" much faster (no need to scan the whole main index).
|
377 |
* "*xx" much faster (no need to scan the whole main index).
|
308 |
*/
|
378 |
*/
|
309 |
class SearchDataClauseFilename : public SearchDataClauseSimple {
|
379 |
class SearchDataClauseFilename : public SearchDataClause {
|
310 |
public:
|
380 |
public:
|
311 |
SearchDataClauseFilename(const std::string& txt)
|
381 |
SearchDataClauseFilename(const std::string& txt)
|
312 |
: SearchDataClauseSimple(SCLT_FILENAME, txt)
|
382 |
: SearchDataClause(SCLT_FILENAME), m_text(txt)
|
313 |
{
|
383 |
{
|
314 |
// File name searches don't count when looking for wild cards.
|
384 |
// File name searches don't count when looking for wild cards.
|
315 |
m_haveWildCards = false;
|
385 |
m_haveWildCards = false;
|
316 |
}
|
386 |
}
|
317 |
|
387 |
|
318 |
virtual ~SearchDataClauseFilename()
|
388 |
virtual ~SearchDataClauseFilename()
|
319 |
{
|
389 |
{
|
320 |
}
|
390 |
}
|
321 |
|
391 |
|
|
|
392 |
virtual void getTerms(HighlightData&) const
|
|
|
393 |
{
|
|
|
394 |
}
|
|
|
395 |
|
322 |
virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
|
396 |
virtual bool toNativeQuery(Rcl::Db &, void *);
|
|
|
397 |
|
|
|
398 |
protected:
|
|
|
399 |
std::string m_text;
|
323 |
};
|
400 |
};
|
324 |
|
401 |
|
325 |
/**
|
402 |
/**
|
326 |
* A clause coming from a NEAR or PHRASE entry field. There is only one
|
403 |
* A clause coming from a NEAR or PHRASE entry field. There is only one
|
327 |
* std::string group, and a specified distance, which applies to it.
|
404 |
* std::string group, and a specified distance, which applies to it.
|
|
... |
|
... |
336 |
|
413 |
|
337 |
virtual ~SearchDataClauseDist()
|
414 |
virtual ~SearchDataClauseDist()
|
338 |
{
|
415 |
{
|
339 |
}
|
416 |
}
|
340 |
|
417 |
|
341 |
virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
|
418 |
virtual bool toNativeQuery(Rcl::Db &, void *);
|
342 |
virtual int getslack() const
|
419 |
virtual int getslack() const
|
343 |
{
|
420 |
{
|
344 |
return m_slack;
|
421 |
return m_slack;
|
345 |
}
|
422 |
}
|
346 |
private:
|
423 |
private:
|
|
... |
|
... |
352 |
public:
|
429 |
public:
|
353 |
SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub)
|
430 |
SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub)
|
354 |
: SearchDataClause(tp), m_sub(sub)
|
431 |
: SearchDataClause(tp), m_sub(sub)
|
355 |
{
|
432 |
{
|
356 |
}
|
433 |
}
|
357 |
virtual bool toNativeQuery(Rcl::Db &db, void *p, int maxexp, int maxcl)
|
434 |
virtual bool toNativeQuery(Rcl::Db &db, void *p)
|
358 |
{
|
435 |
{
|
359 |
bool ret = m_sub->toNativeQuery(db, p, maxexp, maxcl);
|
436 |
bool ret = m_sub->toNativeQuery(db, p);
|
360 |
if (!ret)
|
437 |
if (!ret)
|
361 |
m_reason = m_sub->getReason();
|
438 |
m_reason = m_sub->getReason();
|
362 |
return ret;
|
439 |
return ret;
|
363 |
}
|
440 |
}
|
364 |
|
441 |
|