Switch to unified view

a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h
...
...
46
};
46
};
47
47
48
class SearchDataClause;
48
class SearchDataClause;
49
49
50
/** 
50
/** 
51
    Data structure representing a Recoll user query, for translation
51
    A SearchData object represents a Recoll user query, for translation
52
    into a Xapian query tree. This could probably better called a 'question'.
52
    into a Xapian query tree. This could probably better called a 'question'.
53
53
54
    This is a list of search clauses combined through either OR or AND.
54
    This is a list of SearchDataClause objects combined through either
55
    OR or AND.
55
56
56
    Clauses either reflect user entry in a query field: some text, a
57
    Clauses either reflect user entry in a query field: some text, a
57
    clause type (AND/OR/NEAR etc.), possibly a distance, or points to
58
    clause type (AND/OR/NEAR etc.), possibly a distance, or are the
59
    result of parsing query language input. A clause can also point to
58
    another SearchData representing a subquery.
60
    another SearchData representing a subquery.
59
61
60
    The content of each clause when added may not be fully parsed yet
62
    The content of each clause when added may not be fully parsed yet
61
    (may come directly from a gui field). It will be parsed and may be
63
    (may come directly from a gui field). It will be parsed and may be
62
    translated to several queries in the Xapian sense, for exemple
64
    translated to several queries in the Xapian sense, for exemple
63
    several terms and phrases as would result from 
65
    several terms and phrases as would result from 
64
    ["this is a phrase"  term1 term2] . 
66
    ["this is a phrase"  term1 term2] . 
65
67
66
    This is why the clauses also have an AND/OR/... type. 
68
    This is why the clauses also have an AND/OR/... type. They are an 
69
    intermediate form between the primary user input and 
70
    the final Xapian::Query tree.
67
71
68
    A phrase clause could be added either explicitly or using double quotes:
72
    For example, a phrase clause could be added either explicitly or
69
    {SCLT_PHRASE, [this is a phrase]} or as {SCLT_XXX, ["this is a phrase"]}
73
    using double quotes: {SCLT_PHRASE, [this is a phrase]} or as
74
    {SCLT_XXX, ["this is a phrase"]}
70
75
71
*/
76
*/
72
class SearchData {
77
class SearchData {
73
public:
78
public:
74
    SearchData(SClType tp, const string& stemlang) 
79
    SearchData(SClType tp, const string& stemlang) 
75
    : m_tp(tp), m_haveDates(false), m_maxSize(size_t(-1)),
80
  : m_tp(tp), m_stemlang(stemlang)
76
      m_minSize(size_t(-1)), m_haveWildCards(false), m_stemlang(stemlang)
77
    {
81
    {
78
    if (m_tp != SCLT_OR && m_tp != SCLT_AND) 
82
    if (m_tp != SCLT_OR && m_tp != SCLT_AND) 
79
        m_tp = SCLT_OR;
83
        m_tp = SCLT_OR;
84
  commoninit();
80
    }
85
    }
81
    SearchData() 
86
    SearchData() 
82
  : m_tp(SCLT_AND), m_haveDates(false), m_maxSize(size_t(-1)),
87
  : m_tp(SCLT_AND), m_stemlang("english")
83
    m_minSize(size_t(-1)), m_haveWildCards(false), m_stemlang("english")
84
    {
88
    {
89
  commoninit();
85
    }
90
    }
86
    
91
    
87
    ~SearchData() {erase();}
92
    ~SearchData() 
93
    {
94
  erase();
95
    }
88
96
89
    /** Make pristine */
97
    /** Make pristine */
90
    void erase();
98
    void erase();
91
99
92
    /** Is there anything but a file name search in here ? */
100
    /** Is there anything but a file name search in here ? */
...
...
94
102
95
    /** Do we have wildcards anywhere apart from filename searches ? */
103
    /** Do we have wildcards anywhere apart from filename searches ? */
96
    bool haveWildCards() {return m_haveWildCards;}
104
    bool haveWildCards() {return m_haveWildCards;}
97
105
98
    /** Translate to Xapian query. rcldb knows about the void*  */
106
    /** Translate to Xapian query. rcldb knows about the void*  */
99
    bool toNativeQuery(Rcl::Db &db, void *, int maxexp, int maxcl);
107
    bool toNativeQuery(Rcl::Db &db, void *);
100
108
101
    /** We become the owner of cl and will delete it */
109
    /** We become the owner of cl and will delete it */
102
    bool addClause(SearchDataClause *cl);
110
    bool addClause(SearchDataClause *cl);
103
111
104
    /** If this is a simple query (one field only, no distance clauses),
112
    /** If this is a simple query (one field only, no distance clauses),
...
...
141
     * initializing the query. It is stored here for usage in the GUI.
149
     * initializing the query. It is stored here for usage in the GUI.
142
     */
150
     */
143
    std::string getDescription() {return m_description;}
151
    std::string getDescription() {return m_description;}
144
    void setDescription(const std::string& d) {m_description = d;}
152
    void setDescription(const std::string& d) {m_description = d;}
145
153
154
    /** Return an XML version of the contents, for storage in search history
155
  by the GUI */
146
    string asXML();
156
    string asXML();
157
147
    void setTp(SClType tp) 
158
    void setTp(SClType tp) 
148
    {
159
    {
149
    m_tp = tp;
160
    m_tp = tp;
150
    }
161
    }
162
163
    void setMaxExpand(int max)
164
    {
165
  m_softmaxexpand = max;
166
    }
167
    bool getAutoDiac() {return m_autodiacsens;}
168
    bool getAutoCase() {return m_autocasesens;}
169
    int getMaxExp() {return m_maxexp;}
170
    int getMaxCl() {return m_maxcl;}
171
151
    friend class ::AdvSearch;
172
    friend class ::AdvSearch;
173
152
private:
174
private:
153
    // Combine type. Only SCLT_AND or SCLT_OR here
175
    // Combine type. Only SCLT_AND or SCLT_OR here
154
    SClType                   m_tp; 
176
    SClType                   m_tp; 
155
    // Complex query descriptor
177
    // Complex query descriptor
156
    std::vector<SearchDataClause*> m_query;
178
    std::vector<SearchDataClause*> m_query;
...
...
182
    std::string m_description; 
204
    std::string m_description; 
183
    std::string m_reason;
205
    std::string m_reason;
184
    bool   m_haveWildCards;
206
    bool   m_haveWildCards;
185
    std::string m_stemlang;
207
    std::string m_stemlang;
186
208
209
    // Parameters set at the start of ToNativeQuery because they need
210
    // an rclconfig. Actually this does not make sense and it would be
211
    // simpler to just pass an rclconfig to the constructor;
212
    bool m_autodiacsens;
213
    bool m_autocasesens;
214
    int m_maxexp;
215
    int m_maxcl;
216
217
    // Parameters which are not part of the main query data but may influence
218
    // translation in special cases.
219
    // Maximum TermMatch (e.g. wildcard) expansion. This is normally set
220
    // from the configuration with a high default, but may be set to a lower
221
    // value during "find-as-you-type" operations from the GUI
222
    int m_softmaxexpand;
223
187
    bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
224
    bool expandFileTypes(RclConfig *cfg, std::vector<std::string>& exptps);
188
    bool clausesToQuery(Rcl::Db &db, SClType tp,     
225
    bool clausesToQuery(Rcl::Db &db, SClType tp,     
189
            std::vector<SearchDataClause*>& query,
226
            std::vector<SearchDataClause*>& query,
190
            string& reason, void *d, int, int);
227
            string& reason, void *d);
228
    void commoninit();
191
229
192
    /* Copyconst and assignment private and forbidden */
230
    /* Copyconst and assignment private and forbidden */
193
    SearchData(const SearchData &) {}
231
    SearchData(const SearchData &) {}
194
    SearchData& operator=(const SearchData&) {return *this;};
232
    SearchData& operator=(const SearchData&) {return *this;};
195
};
233
};
...
...
202
    SearchDataClause(SClType tp) 
240
    SearchDataClause(SClType tp) 
203
    : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), 
241
    : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), 
204
      m_modifiers(SDCM_NONE), m_weight(1.0)
242
      m_modifiers(SDCM_NONE), m_weight(1.0)
205
    {}
243
    {}
206
    virtual ~SearchDataClause() {}
244
    virtual ~SearchDataClause() {}
207
    virtual bool toNativeQuery(Rcl::Db &db, void *, int maxexp, int maxcl) = 0;
245
    virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
208
    bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
246
    bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
209
    virtual std::string getReason() const {return m_reason;}
247
    virtual std::string getReason() const {return m_reason;}
210
    virtual void getTerms(HighlightData & hldata) const = 0;
248
    virtual void getTerms(HighlightData & hldata) const = 0;
211
249
212
    SClType getTp() const
250
    SClType getTp() const
...
...
219
    }
257
    }
220
    string getStemLang() 
258
    string getStemLang() 
221
    {
259
    {
222
    return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ? 
260
    return (m_modifiers & SDCM_NOSTEMMING) || m_parentSearch == 0 ? 
223
        cstr_null : m_parentSearch->getStemLang();
261
        cstr_null : m_parentSearch->getStemLang();
262
    }
263
    bool getAutoDiac()
264
    {
265
  return m_parentSearch ? m_parentSearch->getAutoDiac() : false;
266
    }
267
    bool getAutoCase()
268
    {
269
  return m_parentSearch ? m_parentSearch->getAutoCase() : true;
270
    }
271
    int getMaxExp() 
272
    {
273
  return m_parentSearch ? m_parentSearch->getMaxExp() : 10000;
274
    }
275
    int getMaxCl() 
276
    {
277
  return m_parentSearch ? m_parentSearch->getMaxCl() : 100000;
224
    }
278
    }
225
    virtual void setModifiers(Modifier mod) 
279
    virtual void setModifiers(Modifier mod) 
226
    {
280
    {
227
    m_modifiers = mod;
281
    m_modifiers = mod;
228
    }
282
    }
...
...
261
    
315
    
262
/**
316
/**
263
 * "Simple" data clause with user-entered query text. This can include 
317
 * "Simple" data clause with user-entered query text. This can include 
264
 * multiple phrases and words, but no specified distance.
318
 * multiple phrases and words, but no specified distance.
265
 */
319
 */
320
class TextSplitQ;
266
class SearchDataClauseSimple : public SearchDataClause {
321
class SearchDataClauseSimple : public SearchDataClause {
267
public:
322
public:
268
    SearchDataClauseSimple(SClType tp, const std::string& txt, 
323
    SearchDataClauseSimple(SClType tp, const std::string& txt, 
269
               const std::string& fld = std::string())
324
               const std::string& fld = std::string())
270
    : SearchDataClause(tp), m_text(txt), m_field(fld)
325
    : SearchDataClause(tp), m_text(txt), m_field(fld)
...
...
276
    virtual ~SearchDataClauseSimple() 
331
    virtual ~SearchDataClauseSimple() 
277
    {
332
    {
278
    }
333
    }
279
334
280
    /** Translate to Xapian query */
335
    /** Translate to Xapian query */
281
    virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
336
    virtual bool toNativeQuery(Rcl::Db &, void *);
282
337
283
    virtual void getTerms(HighlightData& hldata) const
338
    virtual void getTerms(HighlightData& hldata) const
284
    {
339
    {
285
    hldata.append(m_hldata);
340
    hldata.append(m_hldata);
286
    }
341
    }
...
...
294
    }
349
    }
295
protected:
350
protected:
296
    std::string  m_text;  // Raw user entry text.
351
    std::string  m_text;  // Raw user entry text.
297
    std::string  m_field; // Field specification if any
352
    std::string  m_field; // Field specification if any
298
    HighlightData m_hldata;
353
    HighlightData m_hldata;
354
    int  m_curcl;
355
356
    bool processUserString(Rcl::Db &db, const string &iq, int mods,  
357
             std::string &ermsg,
358
             void* pq, int slack = 0, bool useNear = false);
359
    bool expandTerm(Rcl::Db &db, std::string& ermsg, int mods, 
360
          const std::string& term, 
361
          std::vector<std::string>& exp, 
362
                    std::string& sterm, const std::string& prefix);
363
    // After splitting entry on whitespace: process non-phrase element
364
    void processSimpleSpan(Rcl::Db &db, string& ermsg, const string& span, 
365
             int mods, void *pq);
366
    // Process phrase/near element
367
    void processPhraseOrNear(Rcl::Db &db, string& ermsg, TextSplitQ *splitData, 
368
               int mods, void *pq, bool useNear, int slack);
299
};
369
};
300
370
301
/** 
371
/** 
302
 * Filename search clause. This is special because term expansion is only
372
 * Filename search clause. This is special because term expansion is only
303
 * performed against the unsplit file name terms. 
373
 * performed against the unsplit file name terms. 
304
 *
374
 *
305
 * There is a big advantage in expanding only against the
375
 * There is a big advantage in expanding only against the
306
 * field, especially for file names, because this makes searches for
376
 * field, especially for file names, because this makes searches for
307
 * "*xx" much faster (no need to scan the whole main index).
377
 * "*xx" much faster (no need to scan the whole main index).
308
 */
378
 */
309
class SearchDataClauseFilename : public SearchDataClauseSimple {
379
class SearchDataClauseFilename : public SearchDataClause {
310
public:
380
public:
311
    SearchDataClauseFilename(const std::string& txt)
381
    SearchDataClauseFilename(const std::string& txt)
312
    : SearchDataClauseSimple(SCLT_FILENAME, txt) 
382
    : SearchDataClause(SCLT_FILENAME), m_text(txt) 
313
    {
383
    {
314
    // File name searches don't count when looking for wild cards.
384
    // File name searches don't count when looking for wild cards.
315
    m_haveWildCards = false;
385
    m_haveWildCards = false;
316
    }
386
    }
317
387
318
    virtual ~SearchDataClauseFilename() 
388
    virtual ~SearchDataClauseFilename() 
319
    {
389
    {
320
    }
390
    }
321
391
392
    virtual void getTerms(HighlightData&) const
393
    {
394
    }
395
322
    virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
396
    virtual bool toNativeQuery(Rcl::Db &, void *);
397
398
protected:
399
    std::string m_text;
323
};
400
};
324
401
325
/** 
402
/** 
326
 * A clause coming from a NEAR or PHRASE entry field. There is only one 
403
 * A clause coming from a NEAR or PHRASE entry field. There is only one 
327
 * std::string group, and a specified distance, which applies to it.
404
 * std::string group, and a specified distance, which applies to it.
...
...
336
413
337
    virtual ~SearchDataClauseDist() 
414
    virtual ~SearchDataClauseDist() 
338
    {
415
    {
339
    }
416
    }
340
417
341
    virtual bool toNativeQuery(Rcl::Db &, void *, int maxexp, int maxcl);
418
    virtual bool toNativeQuery(Rcl::Db &, void *);
342
    virtual int getslack() const
419
    virtual int getslack() const
343
    {
420
    {
344
    return m_slack;
421
    return m_slack;
345
    }
422
    }
346
private:
423
private:
...
...
352
public:
429
public:
353
    SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub) 
430
    SearchDataClauseSub(SClType tp, RefCntr<SearchData> sub) 
354
    : SearchDataClause(tp), m_sub(sub) 
431
    : SearchDataClause(tp), m_sub(sub) 
355
    {
432
    {
356
    }
433
    }
357
    virtual bool toNativeQuery(Rcl::Db &db, void *p, int maxexp, int maxcl)
434
    virtual bool toNativeQuery(Rcl::Db &db, void *p)
358
    {
435
    {
359
    bool ret = m_sub->toNativeQuery(db, p, maxexp, maxcl);
436
    bool ret = m_sub->toNativeQuery(db, p);
360
    if (!ret) 
437
    if (!ret) 
361
        m_reason = m_sub->getReason();
438
        m_reason = m_sub->getReason();
362
    return ret;
439
    return ret;
363
    }
440
    }
364
441