|
a/src/rcldb/searchdata.h |
|
b/src/rcldb/searchdata.h |
|
... |
|
... |
39 |
namespace Rcl {
|
39 |
namespace Rcl {
|
40 |
|
40 |
|
41 |
/** Search clause types */
|
41 |
/** Search clause types */
|
42 |
enum SClType {
|
42 |
enum SClType {
|
43 |
SCLT_AND,
|
43 |
SCLT_AND,
|
44 |
SCLT_OR, SCLT_EXCL, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR,
|
44 |
SCLT_OR, SCLT_EXCL, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR, SCLT_PATH,
|
45 |
SCLT_SUB
|
45 |
SCLT_SUB
|
46 |
};
|
46 |
};
|
47 |
|
47 |
|
48 |
class SearchDataClause;
|
48 |
class SearchDataClause;
|
49 |
|
49 |
|
|
... |
|
... |
82 |
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
82 |
if (m_tp != SCLT_OR && m_tp != SCLT_AND)
|
83 |
m_tp = SCLT_OR;
|
83 |
m_tp = SCLT_OR;
|
84 |
commoninit();
|
84 |
commoninit();
|
85 |
}
|
85 |
}
|
86 |
SearchData()
|
86 |
SearchData()
|
87 |
: m_tp(SCLT_AND), m_stemlang("english")
|
87 |
: m_tp(SCLT_AND)
|
88 |
{
|
88 |
{
|
89 |
commoninit();
|
89 |
commoninit();
|
90 |
}
|
90 |
}
|
91 |
|
91 |
|
92 |
~SearchData()
|
92 |
~SearchData()
|
|
... |
|
... |
115 |
* before toNativeQuery().
|
115 |
* before toNativeQuery().
|
116 |
* @param threshold: don't use terms more frequent than the value
|
116 |
* @param threshold: don't use terms more frequent than the value
|
117 |
* (proportion of docs where they occur)
|
117 |
* (proportion of docs where they occur)
|
118 |
*/
|
118 |
*/
|
119 |
bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
|
119 |
bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
|
120 |
|
|
|
121 |
/** Set/get top subdirectory for filtering results */
|
|
|
122 |
void addDirSpec(const std::string& t, bool excl = false, float w = 1.0)
|
|
|
123 |
{
|
|
|
124 |
m_dirspecs.push_back(DirSpec(t, excl, w));
|
|
|
125 |
}
|
|
|
126 |
|
120 |
|
127 |
const std::string& getStemLang() {return m_stemlang;}
|
121 |
const std::string& getStemLang() {return m_stemlang;}
|
128 |
|
122 |
|
129 |
void setMinSize(size_t size) {m_minSize = size;}
|
123 |
void setMinSize(size_t size) {m_minSize = size;}
|
130 |
void setMaxSize(size_t size) {m_maxSize = size;}
|
124 |
void setMaxSize(size_t size) {m_maxSize = size;}
|
|
... |
|
... |
179 |
std::vector<SearchDataClause*> m_query;
|
173 |
std::vector<SearchDataClause*> m_query;
|
180 |
// Restricted set of filetypes if not empty.
|
174 |
// Restricted set of filetypes if not empty.
|
181 |
std::vector<std::string> m_filetypes;
|
175 |
std::vector<std::string> m_filetypes;
|
182 |
// Excluded set of file types if not empty
|
176 |
// Excluded set of file types if not empty
|
183 |
std::vector<std::string> m_nfiletypes;
|
177 |
std::vector<std::string> m_nfiletypes;
|
184 |
|
|
|
185 |
// Restrict to subtree or exclude one
|
|
|
186 |
class DirSpec {
|
|
|
187 |
public:
|
|
|
188 |
std::string dir;
|
|
|
189 |
bool exclude;
|
|
|
190 |
// For positive spec: affect weight instead of filter
|
|
|
191 |
float weight;
|
|
|
192 |
DirSpec(const std::string&d, bool x, float w)
|
|
|
193 |
: dir(d), exclude(x), weight(w)
|
|
|
194 |
{
|
|
|
195 |
}
|
|
|
196 |
};
|
|
|
197 |
std::vector<DirSpec> m_dirspecs;
|
|
|
198 |
|
178 |
|
199 |
bool m_haveDates;
|
179 |
bool m_haveDates;
|
200 |
DateInterval m_dates; // Restrict to date interval
|
180 |
DateInterval m_dates; // Restrict to date interval
|
201 |
size_t m_maxSize;
|
181 |
size_t m_maxSize;
|
202 |
size_t m_minSize;
|
182 |
size_t m_minSize;
|
|
... |
|
... |
238 |
enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
|
218 |
enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
|
239 |
SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16};
|
219 |
SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16};
|
240 |
|
220 |
|
241 |
SearchDataClause(SClType tp)
|
221 |
SearchDataClause(SClType tp)
|
242 |
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
222 |
: m_tp(tp), m_parentSearch(0), m_haveWildCards(0),
|
243 |
m_modifiers(SDCM_NONE), m_weight(1.0)
|
223 |
m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false)
|
244 |
{}
|
224 |
{}
|
245 |
virtual ~SearchDataClause() {}
|
225 |
virtual ~SearchDataClause() {}
|
246 |
virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
|
226 |
virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
|
247 |
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
|
227 |
bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
|
248 |
virtual std::string getReason() const {return m_reason;}
|
228 |
virtual std::string getReason() const {return m_reason;}
|
|
... |
|
... |
297 |
}
|
277 |
}
|
298 |
virtual void setWeight(float w)
|
278 |
virtual void setWeight(float w)
|
299 |
{
|
279 |
{
|
300 |
m_weight = w;
|
280 |
m_weight = w;
|
301 |
}
|
281 |
}
|
|
|
282 |
virtual bool getexclude() const
|
|
|
283 |
{
|
|
|
284 |
return m_exclude;
|
|
|
285 |
}
|
|
|
286 |
|
302 |
friend class SearchData;
|
287 |
friend class SearchData;
|
303 |
|
|
|
304 |
protected:
|
288 |
protected:
|
305 |
std::string m_reason;
|
289 |
std::string m_reason;
|
306 |
SClType m_tp;
|
290 |
SClType m_tp;
|
307 |
SearchData *m_parentSearch;
|
291 |
SearchData *m_parentSearch;
|
308 |
bool m_haveWildCards;
|
292 |
bool m_haveWildCards;
|
309 |
Modifier m_modifiers;
|
293 |
Modifier m_modifiers;
|
310 |
float m_weight;
|
294 |
float m_weight;
|
|
|
295 |
bool m_exclude;
|
311 |
private:
|
296 |
private:
|
312 |
SearchDataClause(const SearchDataClause&)
|
297 |
SearchDataClause(const SearchDataClause&)
|
313 |
{
|
298 |
{
|
314 |
}
|
299 |
}
|
315 |
SearchDataClause& operator=(const SearchDataClause&)
|
300 |
SearchDataClause& operator=(const SearchDataClause&)
|
|
... |
|
... |
402 |
|
387 |
|
403 |
protected:
|
388 |
protected:
|
404 |
std::string m_text;
|
389 |
std::string m_text;
|
405 |
};
|
390 |
};
|
406 |
|
391 |
|
|
|
392 |
|
|
|
393 |
/**
|
|
|
394 |
* Pathname filtering clause. This is special because of history:
|
|
|
395 |
* - Pathname filtering used to be performed as a post-processing step
|
|
|
396 |
* done with the url fields of doc data records.
|
|
|
397 |
* - Then it was done as special phrase searchs on path elements prefixed
|
|
|
398 |
* with XP.
|
|
|
399 |
* Up to this point dir filtering data was stored as part of the searchdata
|
|
|
400 |
* object, not in the SearchDataClause tree. Only one, then a list,
|
|
|
401 |
* of clauses where stored, and they were always ANDed together.
|
|
|
402 |
*
|
|
|
403 |
* In order to allow for OR searching, dir clauses are now stored in a
|
|
|
404 |
* specific SearchDataClause, but this is still special because the field has
|
|
|
405 |
* non-standard phrase-like processing, reflected in index storage by
|
|
|
406 |
* an empty element representing / (as "XP").
|
|
|
407 |
*
|
|
|
408 |
* A future version should use a standard phrase with an anchor to the
|
|
|
409 |
* start if the path starts with /. As this implies an index format
|
|
|
410 |
* change but is no important enough to warrant it, this has to wait for
|
|
|
411 |
* the next format change.
|
|
|
412 |
*/
|
|
|
413 |
class SearchDataClausePath : public SearchDataClause {
|
|
|
414 |
public:
|
|
|
415 |
SearchDataClausePath(const std::string& txt, bool excl = false)
|
|
|
416 |
: SearchDataClause(SCLT_PATH), m_text(txt)
|
|
|
417 |
{
|
|
|
418 |
m_exclude = excl;
|
|
|
419 |
m_haveWildCards = false;
|
|
|
420 |
}
|
|
|
421 |
|
|
|
422 |
virtual ~SearchDataClausePath()
|
|
|
423 |
{
|
|
|
424 |
}
|
|
|
425 |
|
|
|
426 |
virtual void getTerms(HighlightData&) const
|
|
|
427 |
{
|
|
|
428 |
}
|
|
|
429 |
|
|
|
430 |
virtual bool toNativeQuery(Rcl::Db &, void *);
|
|
|
431 |
virtual const std::string& gettext() const
|
|
|
432 |
{
|
|
|
433 |
return m_text;
|
|
|
434 |
}
|
|
|
435 |
|
|
|
436 |
protected:
|
|
|
437 |
std::string m_text;
|
|
|
438 |
};
|
|
|
439 |
|
407 |
/**
|
440 |
/**
|
408 |
* A clause coming from a NEAR or PHRASE entry field. There is only one
|
441 |
* A clause coming from a NEAR or PHRASE entry field. There is only one
|
409 |
* std::string group, and a specified distance, which applies to it.
|
442 |
* std::string group, and a specified distance, which applies to it.
|
410 |
*/
|
443 |
*/
|
411 |
class SearchDataClauseDist : public SearchDataClauseSimple {
|
444 |
class SearchDataClauseDist : public SearchDataClauseSimple {
|