Switch to unified view

a/src/rcldb/searchdata.h b/src/rcldb/searchdata.h
...
...
39
namespace Rcl {
39
namespace Rcl {
40
40
41
/** Search clause types */
41
/** Search clause types */
42
enum SClType {
42
enum SClType {
43
    SCLT_AND, 
43
    SCLT_AND, 
44
    SCLT_OR, SCLT_EXCL, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR,
44
    SCLT_OR, SCLT_EXCL, SCLT_FILENAME, SCLT_PHRASE, SCLT_NEAR, SCLT_PATH,
45
    SCLT_SUB
45
    SCLT_SUB
46
};
46
};
47
47
48
class SearchDataClause;
48
class SearchDataClause;
49
49
...
...
82
    if (m_tp != SCLT_OR && m_tp != SCLT_AND) 
82
    if (m_tp != SCLT_OR && m_tp != SCLT_AND) 
83
        m_tp = SCLT_OR;
83
        m_tp = SCLT_OR;
84
    commoninit();
84
    commoninit();
85
    }
85
    }
86
    SearchData() 
86
    SearchData() 
87
  : m_tp(SCLT_AND), m_stemlang("english")
87
  : m_tp(SCLT_AND)
88
    {
88
    {
89
    commoninit();
89
    commoninit();
90
    }
90
    }
91
    
91
    
92
    ~SearchData() 
92
    ~SearchData() 
...
...
115
     * before toNativeQuery().
115
     * before toNativeQuery().
116
     * @param threshold: don't use terms more frequent than the value 
116
     * @param threshold: don't use terms more frequent than the value 
117
     *     (proportion of docs where they occur)    
117
     *     (proportion of docs where they occur)    
118
     */
118
     */
119
    bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
119
    bool maybeAddAutoPhrase(Rcl::Db &db, double threshold);
120
121
    /** Set/get top subdirectory for filtering results */
122
    void addDirSpec(const std::string& t, bool excl = false, float w = 1.0) 
123
    {
124
  m_dirspecs.push_back(DirSpec(t, excl, w));
125
    }
126
120
127
    const std::string& getStemLang() {return m_stemlang;}
121
    const std::string& getStemLang() {return m_stemlang;}
128
122
129
    void setMinSize(size_t size) {m_minSize = size;}
123
    void setMinSize(size_t size) {m_minSize = size;}
130
    void setMaxSize(size_t size) {m_maxSize = size;}
124
    void setMaxSize(size_t size) {m_maxSize = size;}
...
...
179
    std::vector<SearchDataClause*> m_query;
173
    std::vector<SearchDataClause*> m_query;
180
    // Restricted set of filetypes if not empty.
174
    // Restricted set of filetypes if not empty.
181
    std::vector<std::string>            m_filetypes; 
175
    std::vector<std::string>            m_filetypes; 
182
    // Excluded set of file types if not empty
176
    // Excluded set of file types if not empty
183
    std::vector<std::string>            m_nfiletypes;
177
    std::vector<std::string>            m_nfiletypes;
184
185
    // Restrict  to subtree or exclude one
186
    class DirSpec {
187
    public:
188
  std::string dir; 
189
  bool        exclude; 
190
  // For positive spec: affect weight instead of filter
191
  float       weight;
192
  DirSpec(const std::string&d, bool x, float w)
193
      : dir(d), exclude(x), weight(w)
194
  {
195
  }
196
    };
197
    std::vector<DirSpec> m_dirspecs;
198
178
199
    bool                      m_haveDates;
179
    bool                      m_haveDates;
200
    DateInterval              m_dates; // Restrict to date interval
180
    DateInterval              m_dates; // Restrict to date interval
201
    size_t                    m_maxSize;
181
    size_t                    m_maxSize;
202
    size_t                    m_minSize;
182
    size_t                    m_minSize;
...
...
238
    enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
218
    enum Modifier {SDCM_NONE=0, SDCM_NOSTEMMING=1, SDCM_ANCHORSTART=2,
239
           SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16};
219
           SDCM_ANCHOREND=4, SDCM_CASESENS=8, SDCM_DIACSENS=16};
240
220
241
    SearchDataClause(SClType tp) 
221
    SearchDataClause(SClType tp) 
242
    : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), 
222
    : m_tp(tp), m_parentSearch(0), m_haveWildCards(0), 
243
      m_modifiers(SDCM_NONE), m_weight(1.0)
223
      m_modifiers(SDCM_NONE), m_weight(1.0), m_exclude(false)
244
    {}
224
    {}
245
    virtual ~SearchDataClause() {}
225
    virtual ~SearchDataClause() {}
246
    virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
226
    virtual bool toNativeQuery(Rcl::Db &db, void *) = 0;
247
    bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
227
    bool isFileName() const {return m_tp == SCLT_FILENAME ? true: false;}
248
    virtual std::string getReason() const {return m_reason;}
228
    virtual std::string getReason() const {return m_reason;}
...
...
297
    }
277
    }
298
    virtual void setWeight(float w) 
278
    virtual void setWeight(float w) 
299
    {
279
    {
300
    m_weight = w;
280
    m_weight = w;
301
    }
281
    }
282
    virtual bool getexclude() const
283
    {
284
  return m_exclude;
285
    }
286
302
    friend class SearchData;
287
    friend class SearchData;
303
304
protected:
288
protected:
305
    std::string      m_reason;
289
    std::string      m_reason;
306
    SClType     m_tp;
290
    SClType     m_tp;
307
    SearchData *m_parentSearch;
291
    SearchData *m_parentSearch;
308
    bool        m_haveWildCards;
292
    bool        m_haveWildCards;
309
    Modifier    m_modifiers;
293
    Modifier    m_modifiers;
310
    float       m_weight;
294
    float       m_weight;
295
    bool        m_exclude;
311
private:
296
private:
312
    SearchDataClause(const SearchDataClause&) 
297
    SearchDataClause(const SearchDataClause&) 
313
    {
298
    {
314
    }
299
    }
315
    SearchDataClause& operator=(const SearchDataClause&) 
300
    SearchDataClause& operator=(const SearchDataClause&) 
...
...
402
387
403
protected:
388
protected:
404
    std::string m_text;
389
    std::string m_text;
405
};
390
};
406
391
392
393
/** 
394
 * Pathname filtering clause. This is special because of history:
395
 *  - Pathname filtering used to be performed as a post-processing step 
396
 *    done with the url fields of doc data records.
397
 *  - Then it was done as special phrase searchs on path elements prefixed
398
 *    with XP.
399
 *  Up to this point dir filtering data was stored as part of the searchdata
400
 *  object, not in the SearchDataClause tree. Only one, then a list,
401
 *  of clauses where stored, and they were always ANDed together.
402
 *
403
 *  In order to allow for OR searching, dir clauses are now stored in a
404
 *  specific SearchDataClause, but this is still special because the field has
405
 *  non-standard phrase-like processing, reflected in index storage by
406
 *  an empty element representing / (as "XP").
407
 * 
408
 * A future version should use a standard phrase with an anchor to the
409
 * start if the path starts with /. As this implies an index format
410
 * change but is no important enough to warrant it, this has to wait for
411
 * the next format change.
412
 */
413
class SearchDataClausePath : public SearchDataClause {
414
public:
415
    SearchDataClausePath(const std::string& txt, bool excl = false)
416
  : SearchDataClause(SCLT_PATH), m_text(txt)
417
    {
418
  m_exclude = excl;
419
  m_haveWildCards = false;
420
    }
421
422
    virtual ~SearchDataClausePath() 
423
    {
424
    }
425
426
    virtual void getTerms(HighlightData&) const
427
    {
428
    }
429
430
    virtual bool toNativeQuery(Rcl::Db &, void *);
431
    virtual const std::string& gettext() const
432
    {
433
  return m_text;
434
    }
435
436
protected:
437
    std::string m_text;
438
};
439
407
/** 
440
/** 
408
 * A clause coming from a NEAR or PHRASE entry field. There is only one 
441
 * A clause coming from a NEAR or PHRASE entry field. There is only one 
409
 * std::string group, and a specified distance, which applies to it.
442
 * std::string group, and a specified distance, which applies to it.
410
 */
443
 */
411
class SearchDataClauseDist : public SearchDataClauseSimple {
444
class SearchDataClauseDist : public SearchDataClauseSimple {