a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h
...
...
29
29
30
#include "rcldb.h"
30
#include "rcldb.h"
31
#include "refcntr.h"
31
#include "refcntr.h"
32
#include "smallut.h"
32
#include "smallut.h"
33
#include "cstr.h"
33
#include "cstr.h"
34
#include "hldata.h"
34
35
35
class RclConfig;
36
class RclConfig;
36
37
37
#ifndef NO_NAMESPACES
38
using std::vector;
39
using std::string;
40
namespace Rcl {
38
namespace Rcl {
41
#endif // NO_NAMESPACES
42
39
43
/** Search clause types */
40
/** Search clause types */
44
enum SClType {
41
enum SClType {
45
    SCLT_AND, 
42
    SCLT_AND, 
46
    SCLT_OR, SCLT_EXCL, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR,
43
    SCLT_OR, SCLT_EXCL, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR,
...
...
48
};
45
};
49
46
50
class SearchDataClause;
47
class SearchDataClause;
51
48
52
/** 
49
/** 
53
  Data structure representing a Recoll user query, for translation
50
    Data structure representing a Recoll user query, for translation
54
  into a Xapian query tree. This could probably better called a 'question'.
51
    into a Xapian query tree. This could probably better called a 'question'.
55
52
56
  This is a list of search clauses combined through either OR or AND.
53
    This is a list of search clauses combined through either OR or AND.
57
54
58
  Clauses either reflect user entry in a query field: some text, a
55
    Clauses either reflect user entry in a query field: some text, a
59
  clause type (AND/OR/NEAR etc.), possibly a distance, or points to
56
    clause type (AND/OR/NEAR etc.), possibly a distance, or points to
60
  another SearchData representing a subquery.
57
    another SearchData representing a subquery.
61
58
62
  The content of each clause when added may not be fully parsed yet
59
    The content of each clause when added may not be fully parsed yet
63
  (may come directly from a gui field). It will be parsed and may be
60
    (may come directly from a gui field). It will be parsed and may be
64
  translated to several queries in the Xapian sense, for exemple
61
    translated to several queries in the Xapian sense, for exemple
65
  several terms and phrases as would result from 
62
    several terms and phrases as would result from 
66
  ["this is a phrase"  term1 term2] . 
63
    ["this is a phrase"  term1 term2] . 
67
64
68
  This is why the clauses also have an AND/OR/... type. 
65
    This is why the clauses also have an AND/OR/... type. 
69
66
70
  A phrase clause could be added either explicitly or using double quotes:
67
    A phrase clause could be added either explicitly or using double quotes:
71
  {SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
68
    {SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
72
69
73
 */
70
*/
74
class SearchData {
71
class SearchData {
75
public:
72
public:
76
    SearchData(SClType tp) 
73
    SearchData(SClType tp) 
77
        : m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0), 
74
    : m_tp(tp), m_topdirexcl(false), m_topdirweight(1.0), 
78
    m_haveDates(false), m_maxSize(size_t(-1)),
75
      m_haveDates(false), m_maxSize(size_t(-1)),
79
    m_minSize(size_t(-1)), m_haveWildCards(false) 
76
      m_minSize(size_t(-1)), m_haveWildCards(false) 
80
    {
77
    {
81
    if (m_tp != SCLT_OR && m_tp != SCLT_AND) 
78
    if (m_tp != SCLT_OR && m_tp != SCLT_AND) 
82
        m_tp = SCLT_OR;
79
        m_tp = SCLT_OR;
83
    }
80
    }
84
    ~SearchData() {erase();}
81
    ~SearchData() {erase();}
...
...
106
     *     (proportion of docs where they occur)    
103
     *     (proportion of docs where they occur)    
107
     */
104
     */
108
    bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
105
    bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
109
106
110
    /** Set/get top subdirectory for filtering results */
107
    /** Set/get top subdirectory for filtering results */
111
    void setTopdir(const string& t, bool excl = false, float w = 1.0) 
108
    void setTopdir(const std::string& t, bool excl = false, float w = 1.0) 
112
    {
109
    {
113
    m_topdir = t;
110
    m_topdir = t;
114
    m_topdirexcl = excl;
111
    m_topdirexcl = excl;
115
    m_topdirweight = w;
112
    m_topdirweight = w;
116
    }
113
    }
...
...
120
117
121
    /** Set date span for filtering results */
118
    /** Set date span for filtering results */
122
    void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
119
    void setDateSpan(DateInterval *dip) {m_dates = *dip; m_haveDates = true;}
123
120
124
    /** Add file type for filtering results */
121
    /** Add file type for filtering results */
125
    void addFiletype(const string& ft) {m_filetypes.push_back(ft);}
122
    void addFiletype(const std::string& ft) {m_filetypes.push_back(ft);}
126
    /** Add file type to not wanted list */
123
    /** Add file type to not wanted list */
127
    void remFiletype(const string& ft) {m_nfiletypes.push_back(ft);}
124
    void remFiletype(const std::string& ft) {m_nfiletypes.push_back(ft);}
128
125
129
    void setStemlang(const string& lang = "english") {m_stemlang = lang;}
126
    void setStemlang(const std::string& lang = "english") {m_stemlang = lang;}
130
127
131
    /** Retrieve error description */
128
    /** Retrieve error description */
132
    string getReason() {return m_reason;}
129
    std::string getReason() {return m_reason;}
133
130
134
    /** Get terms and phrase/near groups. Used in the GUI for highlighting 
131
    /** Return term expansion data. Mostly used by caller for highlighting
135
     * The groups and gslks vectors are parallel and hold the phrases/near
136
     * string groups and their associated slacks (distance in excess of group
137
     * size)
138
     */
132
     */
139
    bool getTerms(vector<string>& terms, 
133
    void getTerms(HighlightData& hldata) const;
140
        vector<vector<string> >& groups, vector<int>& gslks) const;
141
    /** Get user-input terms (before expansion etc.) */
142
    void getUTerms(vector<string>& terms) const;
143
134
144
    /** 
135
    /** 
145
     * Get/set the description field which is retrieved from xapian after
136
     * Get/set the description field which is retrieved from xapian after
146
     * initializing the query. It is stored here for usage in the GUI.
137
     * initializing the query. It is stored here for usage in the GUI.
147
     */
138
     */
148
    string getDescription() {return m_description;}
139
    std::string getDescription() {return m_description;}
149
    void setDescription(const string& d) {m_description = d;}
140
    void setDescription(const std::string& d) {m_description = d;}
150
141
151
private:
142
private:
152
    SClType                   m_tp; // Only SCLT_AND or SCLT_OR here
143
    // Combine type. Only SCLT_AND or SCLT_OR here
144
    SClType                   m_tp; 
145
    // Complex query descriptor
153
    vector<SearchDataClause*> m_query;
146
    std::vector<SearchDataClause*> m_query;
154
    vector<string>            m_filetypes; // Restrict to filetypes if set.
147
    // Restricted set of filetypes if not empty.
155
    vector<string>            m_nfiletypes; // Unwanted file types
148
    std::vector<std::string>            m_filetypes; 
156
    string                    m_topdir; // Restrict to subtree.
149
    // Excluded set of file types if not empty
150
    std::vector<std::string>            m_nfiletypes;
151
    // Restrict to subtree.
152
    std::string                    m_topdir; 
157
    bool                      m_topdirexcl; // Invert meaning
153
    bool                      m_topdirexcl; // Invert meaning
158
    float                     m_topdirweight; // affect weight instead of filter
154
    float                     m_topdirweight; // affect weight instead of filter
159
    bool                      m_haveDates;
155
    bool                      m_haveDates;
160
    DateInterval              m_dates; // Restrict to date interval
156
    DateInterval              m_dates; // Restrict to date interval
161
    size_t                    m_maxSize;
157
    size_t                    m_maxSize;
162
    size_t                    m_minSize;
158
    size_t                    m_minSize;
163
    // Printable expanded version of the complete query, retrieved/set
159
    // Printable expanded version of the complete query, retrieved/set
164
    // from rcldb after the Xapian::setQuery() call
160
    // from rcldb after the Xapian::setQuery() call
165
    string m_description; 
161
    std::string m_description; 
166
    string m_reason;
162
    std::string m_reason;
167
    bool   m_haveWildCards;
163
    bool   m_haveWildCards;
168
    string m_stemlang;
164
    std::string m_stemlang;
169
    bool expandFileTypes(RclConfig *cfg, vector<string>& exptps);
165
    bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
170
    /* Copyconst and assignment private and forbidden */
166
    /* Copyconst and assignment private and forbidden */
171
    SearchData(const SearchData &) {}
167
    SearchData(const SearchData &) {}
172
    SearchData& operator=(const SearchData&) {return *this;};
168
    SearchData& operator=(const SearchData&) {return *this;};
173
};
169
};
174
170
...
...
176
public:
172
public:
177
    enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
173
    enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
178
           SDCM_ANCHOREND=4};
174
           SDCM_ANCHOREND=4};
179
175
180
    SearchDataClause(SClType tp) 
176
    SearchDataClause(SClType tp) 
181
  : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), 
177
    : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), 
182
    m_modifiers(SDCM_NONE), m_weight(1.0)
178
      m_modifiers(SDCM_NONE), m_weight(1.0)
183
    {}
179
    {}
184
    virtual ~SearchDataClause() {}
180
    virtual ~SearchDataClause() {}
185
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string&) = 0;
181
    virtual bool toNativeQuery(Rcl::Db &db, void *, const std::string&) = 0;
186
    bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
182
    bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
187
    virtual string getReason() const {return m_reason;}
183
    virtual std::string getReason() const {return m_reason;}
188
    virtual bool getTerms(vector<string>&, vector<vector<string> >&,
184
    virtual void getTerms(HighlightData & hldata) const = 0;
189
            vector<int>&) const = 0;
190
    virtual void getUTerms(vector<string>&) const = 0;
191
185
192
    SClType getTp() {return m_tp;}
186
    SClType getTp() 
187
    {
188
  return m_tp;
189
    }
193
    void setParent(SearchData *p) {m_parentSearch = p;}
190
    void setParent(SearchData *p) 
191
    {
192
  m_parentSearch = p;
193
    }
194
    virtual void setModifiers(Modifier mod) {m_modifiers = mod;}
194
    virtual void setModifiers(Modifier mod) 
195
    {
196
  m_modifiers = mod;
197
    }
195
    virtual int getModifiers() {return m_modifiers;}
198
    virtual int getModifiers() 
199
    {
200
  return m_modifiers;
201
    }
196
    virtual void addModifier(Modifier mod) {
202
    virtual void addModifier(Modifier mod) 
203
    {
197
    int imod = getModifiers();
204
    int imod = getModifiers();
198
    imod |= mod;
205
    imod |= mod;
199
    setModifiers(Modifier(imod));
206
    setModifiers(Modifier(imod));
200
    }
207
    }
201
    virtual void setWeight(float w) {m_weight = w;}
208
    virtual void setWeight(float w) 
209
    {
210
  m_weight = w;
211
    }
202
    friend class SearchData;
212
    friend class SearchData;
203
213
204
protected:
214
protected:
205
    string      m_reason;
215
    std::string      m_reason;
206
    SClType     m_tp;
216
    SClType     m_tp;
207
    SearchData *m_parentSearch;
217
    SearchData *m_parentSearch;
208
    bool        m_haveWildCards;
218
    bool        m_haveWildCards;
209
    Modifier    m_modifiers;
219
    Modifier    m_modifiers;
210
    float       m_weight;
220
    float       m_weight;
211
private:
221
private:
212
    SearchDataClause(const SearchDataClause&) {}
222
    SearchDataClause(const SearchDataClause&) 
223
    {
224
    }
213
    SearchDataClause& operator=(const SearchDataClause&) {
225
    SearchDataClause& operator=(const SearchDataClause&) 
226
    {
214
    return *this;
227
    return *this;
215
    }
228
    }
216
};
229
};
217
    
230
    
218
/**
231
/**
219
 * "Simple" data clause with user-entered query text. This can include 
232
 * "Simple" data clause with user-entered query text. This can include 
220
 * multiple phrases and words, but no specified distance.
233
 * multiple phrases and words, but no specified distance.
221
 */
234
 */
222
class SearchDataClauseSimple : public SearchDataClause {
235
class SearchDataClauseSimple : public SearchDataClause {
223
public:
236
public:
224
    SearchDataClauseSimple(SClType tp, const string& txt, 
237
    SearchDataClauseSimple(SClType tp, const std::string& txt, 
225
               const string& fld = string())
238
               const std::string& fld = std::string())
226
    : SearchDataClause(tp), m_text(txt), m_field(fld), m_slack(0) {
239
    : SearchDataClause(tp), m_text(txt), m_field(fld)
227
  m_haveWildCards = (txt.find_first_of(cstr_minwilds) != string::npos);
228
    }
240
    {
241
  m_haveWildCards = 
242
      (txt.find_first_of(cstr_minwilds) != std::string::npos);
243
    }
229
244
230
    virtual ~SearchDataClauseSimple() {}
245
    virtual ~SearchDataClauseSimple() 
246
    {
247
    }
231
248
232
    /** Translate to Xapian query */
249
    /** Translate to Xapian query */
233
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
250
    virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
234
251
235
    /** Retrieve query terms and term groups. This is used for highlighting */
252
    virtual void getTerms(HighlightData& hldata) const
236
    virtual bool getTerms(vector<string>& terms, /* Single terms */
237
            vector<vector<string> >& groups, /* Prox grps */
238
            vector<int>& gslks) const        /* Prox slacks */
239
    {
240
  terms.insert(terms.end(), m_terms.begin(), m_terms.end());
241
  groups.insert(groups.end(), m_groups.begin(), m_groups.end());
242
  gslks.insert(gslks.end(), m_groups.size(), m_slack);
243
  return true;
244
    }
253
    {
245
    virtual void getUTerms(vector<string>& terms) const
254
  hldata.append(m_hldata);
246
    {
255
    }
247
  terms.insert(terms.end(), m_uterms.begin(), m_uterms.end());
256
    virtual const std::string& gettext() 
248
    }
257
    {
249
    virtual const string& gettext() {return m_text;}
258
  return m_text;
259
    }
250
    virtual const string& getfield() {return m_field;}
260
    virtual const std::string& getfield() 
261
    {
262
  return m_field;
263
    }
251
protected:
264
protected:
252
    string  m_text;  // Raw user entry text.
265
    std::string  m_text;  // Raw user entry text.
253
    string  m_field; // Field specification if any
266
    std::string  m_field; // Field specification if any
254
    // Single terms and phrases resulting from breaking up m_text;
267
    HighlightData m_hldata;
255
    // valid after toNativeQuery() call
256
    vector<string>          m_terms;
257
    vector<vector<string> > m_groups;
258
    // User terms before expansion
259
    vector<string>          m_uterms;
260
    // Declare m_slack here. Always 0, but allows getTerms to work for
261
    // SearchDataClauseDist
262
    int m_slack;
263
};
268
};
264
269
265
/** 
270
/** 
266
 * Filename search clause. This is special because term expansion is only
271
 * Filename search clause. This is special because term expansion is only
267
 * performed against the unsplit file name terms. 
272
 * performed against the unsplit file name terms. 
...
...
270
 * field, especially for file names, because this makes searches for
275
 * field, especially for file names, because this makes searches for
271
 * "*xx" much faster (no need to scan the whole main index).
276
 * "*xx" much faster (no need to scan the whole main index).
272
 */
277
 */
273
class SearchDataClauseFilename : public SearchDataClauseSimple {
278
class SearchDataClauseFilename : public SearchDataClauseSimple {
274
public:
279
public:
275
    SearchDataClauseFilename(const string& txt)
280
    SearchDataClauseFilename(const std::string& txt)
276
    : SearchDataClauseSimple(SCLT_FILENAME, txt) {
281
    : SearchDataClauseSimple(SCLT_FILENAME, txt) 
282
    {
277
    // File name searches don't count when looking for wild cards.
283
    // File name searches don't count when looking for wild cards.
278
    m_haveWildCards = false;
284
    m_haveWildCards = false;
279
    }
285
    }
286
280
    virtual ~SearchDataClauseFilename() {}
287
    virtual ~SearchDataClauseFilename() 
288
    {
289
    }
290
281
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
291
    virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
282
};
292
};
283
293
284
/** 
294
/** 
285
 * A clause coming from a NEAR or PHRASE entry field. There is only one 
295
 * A clause coming from a NEAR or PHRASE entry field. There is only one 
286
 * string group, and a specified distance, which applies to it.
296
 * std::string group, and a specified distance, which applies to it.
287
 */
297
 */
288
class SearchDataClauseDist : public SearchDataClauseSimple {
298
class SearchDataClauseDist : public SearchDataClauseSimple {
289
public:
299
public:
290
    SearchDataClauseDist(SClType tp, const string& txt, int slack, 
300
    SearchDataClauseDist(SClType tp, const std::string& txt, int slack, 
291
             const string& fld = string())
301
             const std::string& fld = std::string())
292
    : SearchDataClauseSimple(tp, txt, fld) {m_slack = slack;}
302
    : SearchDataClauseSimple(tp, txt, fld), m_slack(slack)
303
    {
304
    }
305
293
    virtual ~SearchDataClauseDist() {}
306
    virtual ~SearchDataClauseDist() 
307
    {
308
    }
294
309
295
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
310
    virtual bool toNativeQuery(Rcl::Db &, void *, const std::string& stemlang);
296
311
private:
297
    // m_slack is declared in SearchDataClauseSimple
312
    int m_slack;
298
};
313
};
299
314
300
/** Subquery */
315
/** Subquery */
301
class SearchDataClauseSub : public SearchDataClause {
316
class SearchDataClauseSub : public SearchDataClause {
302
public:
317
public:
303
    // We take charge of the SearchData * and will delete it.
318
    // We take charge of the SearchData * and will delete it.
304
    SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub) 
319
    SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub) 
305
    : SearchDataClause(tp), m_sub(sub) {}
320
    : SearchDataClause(tp), m_sub(sub) 
321
    {
322
    }
323
306
    virtual ~SearchDataClauseSub() {}
324
    virtual ~SearchDataClauseSub() 
325
    {
326
    }
327
307
    virtual bool toNativeQuery(Rcl::Db &db, void *, const string& stemlang);
328
    virtual bool toNativeQuery(Rcl::Db &db, void *p, const std::string&)
308
    virtual bool getTerms(vector<string>&, vector<vector<string> >&,
329
    {
309
            vector<int>&) const;
330
  return m_sub->toNativeQuery(db, p);
310
    virtual void getUTerms(vector<string>&) const;
331
    }
332
333
    virtual void getTerms(HighlightData& hldata) const
334
    {
335
  m_sub.getconstptr()->getTerms(hldata);
336
    }
337
311
protected:
338
protected:
312
    RefCntr<SearchData> m_sub;
339
    RefCntr<SearchData> m_sub;
313
};
340
};
314
341
315
} // Namespace Rcl
342
} // Namespace Rcl
343
316
#endif /* _SEARCHDATA_H_INCLUDED_ */
344
#endif /* _SEARCHDATA_H_INCLUDED_ */