Switch to unified view

a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp
...
...
44
#include "termproc.h"
44
#include "termproc.h"
45
#include "synfamily.h"
45
#include "synfamily.h"
46
#include "stemdb.h"
46
#include "stemdb.h"
47
#include "expansiondbs.h"
47
#include "expansiondbs.h"
48
#include "base64.h"
48
#include "base64.h"
49
#include "daterange.h"
49
50
50
namespace Rcl {
51
namespace Rcl {
51
52
52
typedef  vector<SearchDataClause *>::iterator qlist_it_t;
53
typedef  vector<SearchDataClause *>::iterator qlist_it_t;
53
typedef  vector<SearchDataClause *>::const_iterator qlist_cit_t;
54
typedef  vector<SearchDataClause *>::const_iterator qlist_cit_t;
54
55
55
static const int original_term_wqf_booster = 10;
56
static const int original_term_wqf_booster = 10;
56
57
57
/* The dates-to-query routine is is lifted quasi-verbatim but
58
void SearchData::commoninit()
58
 *  modified from xapian-omega:date.cc. Copyright info:
59
 *
60
 * Copyright 1999,2000,2001 BrightStation PLC
61
 * Copyright 2001 James Aylett
62
 * Copyright 2001,2002 Ananova Ltd
63
 * Copyright 2002 Intercede 1749 Ltd
64
 * Copyright 2002,2003,2006 Olly Betts
65
 *
66
 * This program is free software; you can redistribute it and/or
67
 * modify it under the terms of the GNU General Public License as
68
 * published by the Free Software Foundation; either version 2 of the
69
 * License, or (at your option) any later version.
70
 *
71
 * This program is distributed in the hope that it will be useful,
72
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
73
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
74
 * GNU General Public License for more details.
75
 *
76
 * You should have received a copy of the GNU General Public License
77
 * along with this program; if not, write to the Free Software
78
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
79
 * USA
80
 */
81
82
#ifdef RCL_INDEX_STRIPCHARS
83
#define bufprefix(BUF, L) {(BUF)[0] = L;}
84
#define bpoffs() 1
85
#else
86
static inline void bufprefix(char *buf, char c)
87
{
59
{
88
    if (o_index_stripchars) {
60
    m_haveDates = false;
89
  buf[0] = c;
61
    m_maxSize = size_t(-1);
90
    } else {
62
    m_minSize = size_t(-1);
91
  buf[0] = ':'; 
63
    m_haveWildCards = false;
92
  buf[1] = c; 
64
    m_softmaxexpand = -1;
93
  buf[2] = ':';
65
    m_autodiacsens = false;
94
    }
66
    m_autocasesens = true;
95
}
67
    m_maxexp = 10000;
96
static inline int bpoffs() 
68
    m_maxcl = 100000;
97
{
98
    return o_index_stripchars ? 1 : 3;
99
}
100
#endif
101
102
static Xapian::Query
103
date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
104
{
105
    // Xapian uses a smallbuf and snprintf. Can't be bothered, we're
106
    // only doing %d's !
107
    char buf[200];
108
    bufprefix(buf, 'D');
109
    sprintf(buf+bpoffs(), "%04d%02d", y1, m1);
110
    vector<Xapian::Query> v;
111
112
    int d_last = monthdays(m1, y1);
113
    int d_end = d_last;
114
    if (y1 == y2 && m1 == m2 && d2 < d_last) {
115
  d_end = d2;
116
    }
117
    // Deal with any initial partial month
118
    if (d1 > 1 || d_end < d_last) {
119
      for ( ; d1 <= d_end ; d1++) {
120
      sprintf(buf + 6 + bpoffs(), "%02d", d1);
121
      v.push_back(Xapian::Query(buf));
122
  }
123
    } else {
124
  bufprefix(buf, 'M');
125
  v.push_back(Xapian::Query(buf));
126
    }
127
    
128
    if (y1 == y2 && m1 == m2) {
129
  return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
130
    }
131
132
    int m_last = (y1 < y2) ? 12 : m2 - 1;
133
    while (++m1 <= m_last) {
134
  sprintf(buf + 4 + bpoffs(), "%02d", m1);
135
  bufprefix(buf, 'M');
136
  v.push_back(Xapian::Query(buf));
137
    }
138
  
139
    if (y1 < y2) {
140
  while (++y1 < y2) {
141
      sprintf(buf + bpoffs(), "%04d", y1);
142
      bufprefix(buf, 'Y');
143
      v.push_back(Xapian::Query(buf));
144
  }
145
  sprintf(buf + bpoffs(), "%04d", y2);
146
  bufprefix(buf, 'M');
147
  for (m1 = 1; m1 < m2; m1++) {
148
      sprintf(buf + 4 + bpoffs(), "%02d", m1);
149
      v.push_back(Xapian::Query(buf));
150
  }
151
    }
152
  
153
    sprintf(buf + 2 + bpoffs(), "%02d", m2);
154
155
    // Deal with any final partial month
156
    if (d2 < monthdays(m2, y2)) {
157
  bufprefix(buf, 'D');
158
      for (d1 = 1 ; d1 <= d2; d1++) {
159
      sprintf(buf + 6 + bpoffs(), "%02d", d1);
160
      v.push_back(Xapian::Query(buf));
161
  }
162
    } else {
163
  bufprefix(buf, 'M');
164
  v.push_back(Xapian::Query(buf));
165
    }
166
167
    return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
168
}
69
}
169
70
170
// Expand categories and mime type wild card exps
71
// Expand categories and mime type wild card exps
171
// Actually, using getAllMimeTypes() here is a bit problematic because
72
// Actually, using getAllMimeTypes() here is a bit problematic because
172
// there maybe other types in the index, not indexed by content, but
73
// there maybe other types in the index, not indexed by content, but
...
...
186
    if (cfg->isMimeCategory(*it)) {
87
    if (cfg->isMimeCategory(*it)) {
187
        vector<string>tps;
88
        vector<string>tps;
188
        cfg->getMimeCatTypes(*it, tps);
89
        cfg->getMimeCatTypes(*it, tps);
189
        exptps.insert(exptps.end(), tps.begin(), tps.end());
90
        exptps.insert(exptps.end(), tps.begin(), tps.end());
190
    } else {
91
    } else {
92
      bool matched = false;
191
        for (vector<string>::const_iterator ait = alltypes.begin();
93
        for (vector<string>::const_iterator ait = alltypes.begin();
192
         ait != alltypes.end(); ait++) {
94
         ait != alltypes.end(); ait++) {
193
        if (fnmatch(it->c_str(), ait->c_str(), FNM_CASEFOLD) 
95
        if (fnmatch(it->c_str(), ait->c_str(), FNM_CASEFOLD) 
194
            != FNM_NOMATCH) {
96
            != FNM_NOMATCH) {
195
            exptps.push_back(*ait);
97
            exptps.push_back(*ait);
98
          matched = true;
196
        }
99
        }
197
        }
100
        }
101
      if (!matched)
102
      exptps.push_back(it->c_str());
198
    }
103
    }
199
    }
104
    }
200
    tps = exptps;
105
    tps = exptps;
201
    return true;
106
    return true;
202
}
107
}
203
108
204
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, 
109
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp, 
205
                vector<SearchDataClause*>& query, 
110
                vector<SearchDataClause*>& query, 
206
                string& reason, void *d, 
111
                string& reason, void *d)
207
              int maxexp, int maxcl)
208
{
112
{
209
    Xapian::Query xq;
113
    Xapian::Query xq;
210
    for (qlist_it_t it = query.begin(); it != query.end(); it++) {
114
    for (qlist_it_t it = query.begin(); it != query.end(); it++) {
211
    Xapian::Query nq;
115
    Xapian::Query nq;
212
    if (!(*it)->toNativeQuery(db, &nq, maxexp, maxcl)) {
116
    if (!(*it)->toNativeQuery(db, &nq)) {
213
        LOGERR(("SearchData::clausesToQuery: toNativeQuery failed: %s\n",
117
        LOGERR(("SearchData::clausesToQuery: toNativeQuery failed: %s\n",
214
            (*it)->getReason().c_str()));
118
            (*it)->getReason().c_str()));
215
        reason += (*it)->getReason() + " ";
119
        reason += (*it)->getReason() + " ";
216
        return false;
120
        return false;
217
    }       
121
    }       
...
...
238
            else 
142
            else 
239
                xq = nq;
143
                xq = nq;
240
        } else {
144
        } else {
241
            xq = Xapian::Query(op, xq, nq);
145
            xq = Xapian::Query(op, xq, nq);
242
        }
146
        }
243
    if (int(xq.get_length()) >= maxcl) {
147
    if (int(xq.get_length()) >= getMaxCl()) {
244
        LOGERR(("Maximum Xapian query size exceeded."
148
        LOGERR(("Maximum Xapian query size exceeded."
245
            " Maybe increase maxXapianClauses."));
149
            " Maybe increase maxXapianClauses."));
246
        m_reason += "Maximum Xapian query size exceeded."
150
        m_reason += "Maximum Xapian query size exceeded."
247
        " Maybe increase maxXapianClauses.";
151
        " Maybe increase maxXapianClauses.";
248
        return false;
152
        return false;
...
...
253
157
254
   *((Xapian::Query *)d) = xq;
158
   *((Xapian::Query *)d) = xq;
255
    return true;
159
    return true;
256
}
160
}
257
161
258
static string tpToString(SClType tp)
259
{
260
    switch (tp) {
261
    case SCLT_AND: return "AND";
262
    case SCLT_OR: return "OR";
263
    case SCLT_EXCL: return "EX";
264
    case SCLT_FILENAME: return "FN";
265
    case SCLT_PHRASE: return "PH";
266
    case SCLT_NEAR: return "NE";
267
    case SCLT_SUB: return "SU"; // Unsupported actually
268
    default: return "UN";
269
    }
270
}
271
272
string SearchData::asXML()
273
{
274
    LOGDEB(("SearchData::asXML\n"));
275
    ostringstream os;
276
277
    // Searchdata
278
    os << "<SD>" << endl;
279
280
    // Clause list
281
    os << "<CL>" << endl;
282
    if (m_tp != SCLT_AND)
283
  os << "<CLT>" << tpToString(m_tp) << "</CLT>" << endl;
284
    for (unsigned int i = 0; i <  m_query.size(); i++) {
285
  SearchDataClause *c = m_query[i];
286
  if (c->getTp() == SCLT_SUB) {
287
      LOGERR(("SearchData::asXML: can't do subclauses !\n"));
288
      continue;
289
  }
290
  SearchDataClauseSimple *cl = 
291
      dynamic_cast<SearchDataClauseSimple*>(c);
292
  os << "<C>" << endl;
293
  if (cl->getTp() != SCLT_AND) {
294
      os << "<CT>" << tpToString(cl->getTp()) << "</CT>" << endl;
295
  }
296
  if (cl->getTp() != SCLT_FILENAME && !cl->getfield().empty()) {
297
      os << "<F>" << base64_encode(cl->getfield()) << "</F>" << endl;
298
  }
299
  os << "<T>" << base64_encode(cl->gettext()) << "</T>" << endl;
300
  if (cl->getTp() == SCLT_NEAR || cl->getTp() == SCLT_PHRASE) {
301
      SearchDataClauseDist *cld = 
302
      dynamic_cast<SearchDataClauseDist*>(cl);
303
      os << "<S>" << cld->getslack() << "</S>" << endl;
304
  }
305
  os << "</C>" << endl;
306
    }
307
    os << "</CL>" << endl;
308
309
    if (m_haveDates) {
310
  if (m_dates.y1 > 0) {
311
      os << "<DMI>" << 
312
      "<D>" << m_dates.d1 << "</D>" <<
313
      "<M>" << m_dates.m1 << "</M>" << 
314
      "<Y>" << m_dates.y1 << "</Y>" 
315
         << "</DMI>" << endl;
316
  }
317
  if (m_dates.y2 > 0) {
318
      os << "<DMA>" << 
319
      "<D>" << m_dates.d2 << "</D>" <<
320
      "<M>" << m_dates.m2 << "</M>" << 
321
      "<Y>" << m_dates.y2 << "</Y>" 
322
         << "</DMA>" << endl;
323
  }
324
    }
325
326
327
    if (m_minSize != size_t(-1)) {
328
  os << "<MIS>" << m_minSize << "</MIS>" << endl;
329
    }
330
    if (m_maxSize != size_t(-1)) {
331
  os << "<MAS>" << m_maxSize << "</MAS>" << endl;
332
    }
333
334
    if (!m_filetypes.empty()) {
335
  os << "<ST>";
336
  for (vector<string>::iterator it = m_filetypes.begin(); 
337
       it != m_filetypes.end(); it++) {
338
      os << *it << " ";
339
  }
340
  os << "</ST>" << endl;
341
    }
342
343
    if (!m_nfiletypes.empty()) {
344
  os << "<IT>";
345
  for (vector<string>::iterator it = m_nfiletypes.begin(); 
346
       it != m_nfiletypes.end(); it++) {
347
      os << *it << " ";
348
  }
349
  os << "</IT>" << endl;
350
    }
351
352
    for (vector<DirSpec>::const_iterator dit = m_dirspecs.begin();
353
   dit != m_dirspecs.end(); dit++) {
354
  if (dit->exclude) {
355
      os << "<ND>" << base64_encode(dit->dir) << "</ND>" << endl;
356
  } else {
357
      os << "<YD>" << base64_encode(dit->dir) << "</YD>" << endl;
358
  }
359
    }
360
    os << "</SD>";
361
    return os.str();
362
}
363
364
bool SearchData::toNativeQuery(Rcl::Db &db, void *d, int maxexp, int maxcl)
162
bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
365
{
163
{
366
    LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
164
    LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
367
    m_reason.erase();
165
    m_reason.erase();
368
166
369
    // Walk the clause list translating each in turn and building the 
167
    // Walk the clause list translating each in turn and building the 
370
    // Xapian query tree
168
    // Xapian query tree
371
    Xapian::Query xq;
169
    Xapian::Query xq;
372
    if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq, maxexp, maxcl)) {
170
    if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
373
    LOGERR(("SearchData::toNativeQuery: clausesToQuery failed. reason: %s\n", 
171
    LOGERR(("SearchData::toNativeQuery: clausesToQuery failed. reason: %s\n", 
374
        m_reason.c_str()));
172
        m_reason.c_str()));
375
    return false;
173
    return false;
376
    }
174
    }
377
175
...
...
630
    m_query.push_back(cl);
428
    m_query.push_back(cl);
631
    return true;
429
    return true;
632
}
430
}
633
431
634
// Make me all new
432
// Make me all new
635
void SearchData::erase() {
433
void SearchData::erase() 
434
{
636
    LOGDEB0(("SearchData::erase\n"));
435
    LOGDEB0(("SearchData::erase\n"));
637
    m_tp = SCLT_AND;
436
    m_tp = SCLT_AND;
638
    for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
437
    for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++)
639
    delete *it;
438
    delete *it;
640
    m_query.clear();
439
    m_query.clear();
...
...
727
    TextSplitQ *m_ts;
526
    TextSplitQ *m_ts;
728
    map<int, string> m_terms;
527
    map<int, string> m_terms;
729
    map<int, bool> m_nste;
528
    map<int, bool> m_nste;
730
};
529
};
731
530
732
// A class used to translate a user compound string (*not* a query
733
// language string) as may be entered in any_terms/all_terms search
734
// entry fields, ex: [term1 "a phrase" term3] into a xapian query
735
// tree.
736
// The object keeps track of the query terms and term groups while
737
// translating.
738
class StringToXapianQ {
739
public:
740
    StringToXapianQ(Db& db, HighlightData& hld, const string& field, 
741
          const string &stmlng, bool boostUser, int maxexp, int maxcl)
742
  : m_db(db), m_field(field), m_stemlang(stmlng),
743
    m_doBoostUserTerms(boostUser), m_hld(hld), m_autodiacsens(false),
744
    m_autocasesens(true), m_maxexp(maxexp), m_maxcl(maxcl), m_curcl(0)
745
    { 
746
  m_db.getConf()->getConfParam("autodiacsens", &m_autodiacsens);
747
  m_db.getConf()->getConfParam("autocasesens", &m_autocasesens);
748
    }
749
750
    bool processUserString(const string &iq,
751
             int mods, 
752
             string &ermsg,
753
             vector<Xapian::Query> &pqueries, 
754
             int slack = 0, bool useNear = false);
755
private:
756
    bool expandTerm(string& ermsg, int mods, 
757
          const string& term, vector<string>& exp, 
758
                    string& sterm, const string& prefix);
759
    // After splitting entry on whitespace: process non-phrase element
760
    void processSimpleSpan(string& ermsg, const string& span, 
761
             int mods,
762
             vector<Xapian::Query> &pqueries);
763
    // Process phrase/near element
764
    void processPhraseOrNear(string& ermsg, TextSplitQ *splitData, 
765
               int mods,
766
               vector<Xapian::Query> &pqueries,
767
               bool useNear, int slack);
768
769
    Db&           m_db;
770
    const string& m_field;
771
    const string& m_stemlang;
772
    const bool    m_doBoostUserTerms;
773
    HighlightData& m_hld;
774
    bool m_autodiacsens;
775
    bool m_autocasesens;
776
    int  m_maxexp;
777
    int  m_maxcl;
778
    int  m_curcl;
779
};
780
531
781
#if 1
532
#if 1
782
static void listVector(const string& what, const vector<string>&l)
533
static void listVector(const string& what, const vector<string>&l)
783
{
534
{
784
    string a;
535
    string a;
...
...
798
 * @param sterm output original input term if there were no wildcards
549
 * @param sterm output original input term if there were no wildcards
799
 * @param prefix field prefix in index. We could recompute it, but the caller
550
 * @param prefix field prefix in index. We could recompute it, but the caller
800
 *  has it already. Used in the simple case where there is nothing to expand, 
551
 *  has it already. Used in the simple case where there is nothing to expand, 
801
 *  and we just return the prefixed term (else Db::termMatch deals with it).
552
 *  and we just return the prefixed term (else Db::termMatch deals with it).
802
 */
553
 */
803
bool StringToXapianQ::expandTerm(string& ermsg, int mods, 
554
bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, 
555
                  string& ermsg, int mods, 
804
                 const string& term, 
556
                  const string& term, 
805
                                 vector<string>& oexp, string &sterm,
557
                  vector<string>& oexp, string &sterm,
806
                 const string& prefix)
558
                  const string& prefix)
807
{
559
{
808
    LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
560
    LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
809
         mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
561
         mods, m_field.c_str(), term.c_str(), getStemLang().c_str()));
810
    sterm.clear();
562
    sterm.clear();
811
    oexp.clear();
563
    oexp.clear();
812
    if (term.empty())
564
    if (term.empty())
813
    return true;
565
    return true;
814
566
815
    bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
567
    bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
816
568
817
    // If there are no wildcards, add term to the list of user-entered terms
569
    // If there are no wildcards, add term to the list of user-entered terms
818
    if (!haswild)
570
    if (!haswild)
819
    m_hld.uterms.insert(term);
571
    m_hldata.uterms.insert(term);
820
572
821
    bool nostemexp = (mods & SearchDataClause::SDCM_NOSTEMMING) != 0;
573
    bool nostemexp = (mods & SearchDataClause::SDCM_NOSTEMMING) != 0;
822
574
823
    // No stem expansion if there are wildcards or if prevented by caller
575
    // No stem expansion if there are wildcards or if prevented by caller
824
    if (haswild || m_stemlang.empty()) {
576
    if (haswild || getStemLang().empty()) {
825
    LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
577
    LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
826
    nostemexp = true;
578
    nostemexp = true;
827
    }
579
    }
828
580
829
    bool noexpansion = nostemexp && !haswild;
581
    bool noexpansion = nostemexp && !haswild;
...
...
840
592
841
    // If any character has a diacritic, we become
593
    // If any character has a diacritic, we become
842
    // diacritic-sensitive. Note that the way that the test is
594
    // diacritic-sensitive. Note that the way that the test is
843
    // performed (conversion+comparison) will automatically ignore
595
    // performed (conversion+comparison) will automatically ignore
844
    // accented characters which are actually a separate letter
596
    // accented characters which are actually a separate letter
845
    if (m_autodiacsens && unachasaccents(term)) {
597
    if (getAutoDiac() && unachasaccents(term)) {
846
        LOGDEB0(("expandTerm: term has accents -> diac-sensitive\n"));
598
        LOGDEB0(("expandTerm: term has accents -> diac-sensitive\n"));
847
        diac_sensitive = true;
599
        diac_sensitive = true;
848
    }
600
    }
849
601
850
    // If any character apart the first is uppercase, we become
602
    // If any character apart the first is uppercase, we become
851
    // case-sensitive.  The first character is reserved for
603
    // case-sensitive.  The first character is reserved for
852
    // turning off stemming. You need to use a query language
604
    // turning off stemming. You need to use a query language
853
    // modifier to search for Floor in a case-sensitive way.
605
    // modifier to search for Floor in a case-sensitive way.
854
    Utf8Iter it(term);
606
    Utf8Iter it(term);
855
    it++;
607
    it++;
856
    if (m_autocasesens && unachasuppercase(term.substr(it.getBpos()))) {
608
    if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) {
857
        LOGDEB0(("expandTerm: term has uppercase -> case-sensitive\n"));
609
        LOGDEB0(("expandTerm: term has uppercase -> case-sensitive\n"));
858
        case_sensitive = true;
610
        case_sensitive = true;
859
    }
611
    }
860
612
861
    // If we are sensitive to case or diacritics turn stemming off
613
    // If we are sensitive to case or diacritics turn stemming off
...
...
870
#endif
622
#endif
871
623
872
    if (noexpansion) {
624
    if (noexpansion) {
873
    sterm = term;
625
    sterm = term;
874
    oexp.push_back(prefix + term);
626
    oexp.push_back(prefix + term);
875
    m_hld.terms[term] = m_hld.uterms.size() - 1;
627
    m_hldata.terms[term] = m_hldata.uterms.size() - 1;
876
    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
628
    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
877
    return true;
629
    return true;
878
    } 
630
    } 
879
631
880
    // Make objects before the goto jungle to avoid compiler complaints
632
    // Make objects before the goto jungle to avoid compiler complaints
881
    SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
633
    SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
882
    XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all", 
634
    XapComputableSynFamMember synac(db.m_ndb->xrdb, synFamDiCa, "all", 
883
                    &unacfoldtrans);
635
                    &unacfoldtrans);
884
    // This will hold the result of case and diacritics expansion as input
636
    // This will hold the result of case and diacritics expansion as input
885
    // to stem expansion.
637
    // to stem expansion.
886
    vector<string> lexp;
638
    vector<string> lexp;
887
    
639
    
...
...
889
    if (haswild) {
641
    if (haswild) {
890
    // Note that if there are wildcards, we do a direct from-index
642
    // Note that if there are wildcards, we do a direct from-index
891
    // expansion, which means that we are casediac-sensitive. There
643
    // expansion, which means that we are casediac-sensitive. There
892
    // would be nothing to prevent us to expand from the casediac
644
    // would be nothing to prevent us to expand from the casediac
893
    // synonyms first. To be done later
645
    // synonyms first. To be done later
894
  m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang,term,res,m_maxexp,m_field);
646
  db.termMatch(Rcl::Db::ET_WILD, getStemLang(), term, res, 
647
           getMaxExp(), m_field);
895
    goto termmatchtoresult;
648
    goto termmatchtoresult;
896
    }
649
    }
897
650
898
    sterm = term;
651
    sterm = term;
899
652
900
#ifdef RCL_INDEX_STRIPCHARS
653
#ifdef RCL_INDEX_STRIPCHARS
901
654
902
    m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, m_maxexp, m_field);
655
    db.termMatch(Rcl::Db::ET_STEM, getStemLang(), term, res, 
656
       getMaxExp(), m_field);
903
657
904
#else
658
#else
905
659
906
    if (o_index_stripchars) {
660
    if (o_index_stripchars) {
907
    // If the index is raw, we can only come here if nostemexp is unset
661
    // If the index is raw, we can only come here if nostemexp is unset
908
    // and we just need stem expansion.
662
    // and we just need stem expansion.
909
  m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang,term,res,m_maxexp,m_field);
663
  db.termMatch(Rcl::Db::ET_STEM, getStemLang(), term, res, 
664
           getMaxExp(), m_field);
910
    goto termmatchtoresult;
665
    goto termmatchtoresult;
911
    } 
666
    } 
912
667
913
    // No stem expansion when diacritic or case sensitivity is set, it
668
    // No stem expansion when diacritic or case sensitivity is set, it
914
    // makes no sense (it would mess with the diacritics anyway if
669
    // makes no sense (it would mess with the diacritics anyway if
...
...
948
    }
703
    }
949
    sort(lexp.begin(), lexp.end());
704
    sort(lexp.begin(), lexp.end());
950
    {
705
    {
951
    vector<string>::iterator uit = unique(lexp.begin(), lexp.end());
706
    vector<string>::iterator uit = unique(lexp.begin(), lexp.end());
952
    lexp.resize(uit - lexp.begin());
707
    lexp.resize(uit - lexp.begin());
953
    StemDb db(m_db.m_ndb->xrdb);
708
    StemDb sdb(db.m_ndb->xrdb);
954
    vector<string> exp1;
709
    vector<string> exp1;
955
    for (vector<string>::const_iterator it = lexp.begin(); 
710
    for (vector<string>::const_iterator it = lexp.begin(); 
956
         it != lexp.end(); it++) {
711
         it != lexp.end(); it++) {
957
        db.stemExpand(m_stemlang, *it, exp1);
712
        sdb.stemExpand(getStemLang(), *it, exp1);
958
    }
713
    }
959
    LOGDEB(("ExpTerm: stem exp-> %s\n", stringsToString(exp1).c_str()));
714
    LOGDEB(("ExpTerm: stem exp-> %s\n", stringsToString(exp1).c_str()));
960
715
961
    // Expand the resulting list for case (all stemdb content
716
    // Expand the resulting list for case (all stemdb content
962
    // is lowercase)
717
    // is lowercase)
...
...
973
    // Bogus wildcard expand to generate the result (possibly add prefixes)
728
    // Bogus wildcard expand to generate the result (possibly add prefixes)
974
exptotermatch:
729
exptotermatch:
975
    LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
730
    LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
976
    for (vector<string>::const_iterator it = lexp.begin();
731
    for (vector<string>::const_iterator it = lexp.begin();
977
     it != lexp.end(); it++) {
732
     it != lexp.end(); it++) {
978
    m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res,m_maxexp,m_field);
733
    db.termMatch(Rcl::Db::ET_WILD, getStemLang(), *it, res,
734
           getMaxExp(), m_field);
979
    }
735
    }
980
#endif
736
#endif
981
737
982
    // Term match entries to vector of terms
738
    // Term match entries to vector of terms
983
termmatchtoresult:
739
termmatchtoresult:
984
    if (int(res.entries.size()) >= m_maxexp) {
740
    if (int(res.entries.size()) >= getMaxExp()) {
985
    ermsg = "Maximum term expansion size exceeded."
741
    ermsg = "Maximum term expansion size exceeded."
986
        " Maybe increase maxTermExpand.";
742
        " Maybe increase maxTermExpand.";
987
    return false;
743
    return false;
988
    }
744
    }
989
    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
745
    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
...
...
997
    oexp.push_back(prefix + term);
753
    oexp.push_back(prefix + term);
998
754
999
    // Remember the uterm-to-expansion links
755
    // Remember the uterm-to-expansion links
1000
    for (vector<string>::const_iterator it = oexp.begin(); 
756
    for (vector<string>::const_iterator it = oexp.begin(); 
1001
     it != oexp.end(); it++) {
757
     it != oexp.end(); it++) {
1002
    m_hld.terms[strip_prefix(*it)] = term;
758
    m_hldata.terms[strip_prefix(*it)] = term;
1003
    }
759
    }
1004
    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
760
    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
1005
    return true;
761
    return true;
1006
}
762
}
1007
763
...
...
1037
    // vector)
793
    // vector)
1038
    comb.pop_back();
794
    comb.pop_back();
1039
    }
795
    }
1040
}
796
}
1041
797
1042
void StringToXapianQ::processSimpleSpan(string& ermsg, const string& span, 
798
void SearchDataClauseSimple::processSimpleSpan(Rcl::Db &db, string& ermsg,
1043
                  int mods,
799
                         const string& span, 
1044
                  vector<Xapian::Query> &pqueries)
800
                         int mods, void * pq)
1045
{
801
{
802
    vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);
1046
    LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
803
    LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
1047
        span.c_str(), (unsigned int)mods));
804
        span.c_str(), (unsigned int)mods));
1048
    vector<string> exp;  
805
    vector<string> exp;  
1049
    string sterm; // dumb version of user term
806
    string sterm; // dumb version of user term
1050
807
1051
    string prefix;
808
    string prefix;
1052
    const FieldTraits *ftp;
809
    const FieldTraits *ftp;
1053
    if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
810
    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) {
1054
    prefix = wrap_prefix(ftp->pfx);
811
    prefix = wrap_prefix(ftp->pfx);
1055
    }
812
    }
1056
813
1057
    if (!expandTerm(ermsg, mods, span, exp, sterm, prefix))
814
    if (!expandTerm(db, ermsg, mods, span, exp, sterm, prefix))
1058
    return;
815
    return;
1059
    
816
    
1060
    // Set up the highlight data. No prefix should go in there
817
    // Set up the highlight data. No prefix should go in there
1061
    for (vector<string>::const_iterator it = exp.begin(); 
818
    for (vector<string>::const_iterator it = exp.begin(); 
1062
     it != exp.end(); it++) {
819
     it != exp.end(); it++) {
1063
    m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
820
    m_hldata.groups.push_back(vector<string>(1, it->substr(prefix.size())));
1064
    m_hld.slacks.push_back(0);
821
    m_hldata.slacks.push_back(0);
1065
    m_hld.grpsugidx.push_back(m_hld.ugroups.size() - 1);
822
    m_hldata.grpsugidx.push_back(m_hldata.ugroups.size() - 1);
1066
    }
823
    }
1067
824
1068
    // Push either term or OR of stem-expanded set
825
    // Push either term or OR of stem-expanded set
1069
    Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
826
    Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
1070
    m_curcl += exp.size();
827
    m_curcl += exp.size();
...
...
1072
    // If sterm (simplified original user term) is not null, give it a
829
    // If sterm (simplified original user term) is not null, give it a
1073
    // relevance boost. We do this even if no expansion occurred (else
830
    // relevance boost. We do this even if no expansion occurred (else
1074
    // the non-expanded terms in a term list would end-up with even
831
    // the non-expanded terms in a term list would end-up with even
1075
    // less wqf). This does not happen if there are wildcards anywhere
832
    // less wqf). This does not happen if there are wildcards anywhere
1076
    // in the search.
833
    // in the search.
834
    // We normally boost the original term in the stem expansion list. Don't
835
    // do it if there are wildcards anywhere, this would skew the results.
836
    bool doBoostUserTerm = 
837
  (m_parentSearch && !m_parentSearch->haveWildCards()) || 
838
  (m_parentSearch == 0 && !m_haveWildCards);
1077
    if (m_doBoostUserTerms && !sterm.empty()) {
839
    if (doBoostUserTerm && !sterm.empty()) {
1078
        xq = Xapian::Query(Xapian::Query::OP_OR, xq, 
840
        xq = Xapian::Query(Xapian::Query::OP_OR, xq, 
1079
               Xapian::Query(prefix+sterm, 
841
               Xapian::Query(prefix+sterm, 
1080
                     original_term_wqf_booster));
842
                     original_term_wqf_booster));
1081
    }
843
    }
1082
    pqueries.push_back(xq);
844
    pqueries.push_back(xq);
...
...
1084
846
1085
// User entry element had several terms: transform into a PHRASE or
847
// User entry element had several terms: transform into a PHRASE or
1086
// NEAR xapian query, the elements of which can themselves be OR
848
// NEAR xapian query, the elements of which can themselves be OR
1087
// queries if the terms get expanded by stemming or wildcards (we
849
// queries if the terms get expanded by stemming or wildcards (we
1088
// don't do stemming for PHRASE though)
850
// don't do stemming for PHRASE though)
1089
void StringToXapianQ::processPhraseOrNear(string& ermsg, TextSplitQ *splitData, 
851
void SearchDataClauseSimple::processPhraseOrNear(Rcl::Db &db, string& ermsg, 
1090
                    int mods,
852
                       TextSplitQ *splitData, 
1091
                    vector<Xapian::Query> &pqueries,
853
                       int mods, void *pq,
1092
                      bool useNear, int slack)
854
                       bool useNear, int slack)
1093
{
855
{
856
    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
1094
    Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
857
    Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
1095
    Xapian::Query::OP_PHRASE;
858
    Xapian::Query::OP_PHRASE;
1096
    vector<Xapian::Query> orqueries;
859
    vector<Xapian::Query> orqueries;
1097
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
860
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
1098
    bool hadmultiple = false;
861
    bool hadmultiple = false;
1099
#endif
862
#endif
1100
    vector<vector<string> >groups;
863
    vector<vector<string> >groups;
1101
864
1102
    string prefix;
865
    string prefix;
1103
    const FieldTraits *ftp;
866
    const FieldTraits *ftp;
1104
    if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
867
    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp)) {
1105
    prefix = wrap_prefix(ftp->pfx);
868
    prefix = wrap_prefix(ftp->pfx);
1106
    }
869
    }
1107
870
1108
    if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {
871
    if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {
1109
    orqueries.push_back(Xapian::Query(prefix + start_of_field_term));
872
    orqueries.push_back(Xapian::Query(prefix + start_of_field_term));
...
...
1126
    int lmods = mods;
889
    int lmods = mods;
1127
    if (nostemexp)
890
    if (nostemexp)
1128
        lmods |= SearchDataClause::SDCM_NOSTEMMING;
891
        lmods |= SearchDataClause::SDCM_NOSTEMMING;
1129
    string sterm;
892
    string sterm;
1130
    vector<string> exp;
893
    vector<string> exp;
1131
    if (!expandTerm(ermsg, lmods, *it, exp, sterm, prefix))
894
    if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix))
1132
        return;
895
        return;
1133
    LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
896
    LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
1134
    listVector("", exp);
897
    listVector("", exp);
1135
    // groups is used for highlighting, we don't want prefixes in there.
898
    // groups is used for highlighting, we don't want prefixes in there.
1136
    vector<string> noprefs;
899
    vector<string> noprefs;
...
...
1140
    }
903
    }
1141
    groups.push_back(noprefs);
904
    groups.push_back(noprefs);
1142
    orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
905
    orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, 
1143
                      exp.begin(), exp.end()));
906
                      exp.begin(), exp.end()));
1144
    m_curcl += exp.size();
907
    m_curcl += exp.size();
1145
    if (m_curcl >= m_maxcl)
908
    if (m_curcl >= getMaxCl())
1146
        return;
909
        return;
1147
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
910
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
1148
    if (exp.size() > 1) 
911
    if (exp.size() > 1) 
1149
        hadmultiple = true;
912
        hadmultiple = true;
1150
#endif
913
#endif
...
...
1171
    vector<string> comb;
934
    vector<string> comb;
1172
    multiply_groups(groups.begin(), groups.end(), comb, allcombs);
935
    multiply_groups(groups.begin(), groups.end(), comb, allcombs);
1173
    
936
    
1174
    // Insert the search groups and slacks in the highlight data, with
937
    // Insert the search groups and slacks in the highlight data, with
1175
    // a reference to the user entry that generated them:
938
    // a reference to the user entry that generated them:
1176
    m_hld.groups.insert(m_hld.groups.end(), allcombs.begin(), allcombs.end());
939
    m_hldata.groups.insert(m_hldata.groups.end(), 
940
             allcombs.begin(), allcombs.end());
1177
    m_hld.slacks.insert(m_hld.slacks.end(), allcombs.size(), slack);
941
    m_hldata.slacks.insert(m_hldata.slacks.end(), allcombs.size(), slack);
1178
    m_hld.grpsugidx.insert(m_hld.grpsugidx.end(), allcombs.size(), 
942
    m_hldata.grpsugidx.insert(m_hldata.grpsugidx.end(), allcombs.size(), 
1179
               m_hld.ugroups.size() - 1);
943
                  m_hldata.ugroups.size() - 1);
1180
}
944
}
1181
945
1182
// Trim string beginning with ^ or ending with $ and convert to flags
946
// Trim string beginning with ^ or ending with $ and convert to flags
1183
static int stringToMods(string& s)
947
static int stringToMods(string& s)
1184
{
948
{
...
...
1218
 *   - Elements corresponding to phrase/near are an OP_PHRASE/NEAR
982
 *   - Elements corresponding to phrase/near are an OP_PHRASE/NEAR
1219
 *     composition of the phrase terms (no stem expansion in this case)
983
 *     composition of the phrase terms (no stem expansion in this case)
1220
 * @return the subquery count (either or'd stem-expanded terms or phrase word
984
 * @return the subquery count (either or'd stem-expanded terms or phrase word
1221
 *   count)
985
 *   count)
1222
 */
986
 */
1223
bool StringToXapianQ::processUserString(const string &iq,
987
bool SearchDataClauseSimple::processUserString(Rcl::Db &db, const string &iq,
1224
                  int mods, 
1225
                    string &ermsg,
988
                    int mods, string &ermsg,
1226
                  vector<Xapian::Query> &pqueries,
989
                  void *pq, int slack, bool useNear)
1227
                  int slack, 
1228
                  bool useNear
1229
                  )
1230
{
990
{
991
    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
992
1231
    LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x "
993
    LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x "
1232
        "slack %d near %d\n", 
994
        "slack %d near %d\n", 
1233
        iq.c_str(), m_field.c_str(), mods, slack, useNear));
995
        iq.c_str(), m_field.c_str(), mods, slack, useNear));
1234
    ermsg.erase();
996
    ermsg.erase();
1235
    m_curcl = 0;
997
    m_curcl = 0;
1236
    const StopList stops = m_db.getStopList();
998
    const StopList stops = db.getStopList();
1237
999
1238
    // Simple whitespace-split input into user-level words and
1000
    // Simple whitespace-split input into user-level words and
1239
    // double-quoted phrases: word1 word2 "this is a phrase". 
1001
    // double-quoted phrases: word1 word2 "this is a phrase". 
1240
    //
1002
    //
1241
    // The text splitter may further still decide that the resulting
1003
    // The text splitter may further still decide that the resulting
...
...
1295
        continue;// ??
1057
        continue;// ??
1296
        case 1: {
1058
        case 1: {
1297
        int lmods = mods;
1059
        int lmods = mods;
1298
        if (splitter.nostemexps.front())
1060
        if (splitter.nostemexps.front())
1299
            lmods |= SearchDataClause::SDCM_NOSTEMMING;
1061
            lmods |= SearchDataClause::SDCM_NOSTEMMING;
1300
        m_hld.ugroups.push_back(vector<string>(1, *it));
1062
        m_hldata.ugroups.push_back(vector<string>(1, *it));
1301
        processSimpleSpan(ermsg,splitter.terms.front(),lmods, pqueries);
1063
        processSimpleSpan(db, ermsg, splitter.terms.front(),
1064
                lmods, &pqueries);
1302
        }
1065
        }
1303
        break;
1066
        break;
1304
        default:
1067
        default:
1305
        m_hld.ugroups.push_back(vector<string>(1, *it));
1068
        m_hldata.ugroups.push_back(vector<string>(1, *it));
1306
        processPhraseOrNear(ermsg, &splitter, mods, pqueries,
1069
        processPhraseOrNear(db, ermsg, &splitter, mods, &pqueries,
1307
                    useNear, slack);
1070
                    useNear, slack);
1308
        }
1071
        }
1309
        if (m_curcl >= m_maxcl) {
1072
        if (m_curcl >= getMaxCl()) {
1310
        ermsg = "Maximum Xapian query size exceeded."
1073
        ermsg = "Maximum Xapian query size exceeded."
1311
            " Maybe increase maxXapianClauses.";
1074
            " Maybe increase maxXapianClauses.";
1312
        break;
1075
        break;
1313
        }
1076
        }
1314
    }
1077
    }
...
...
1327
    }
1090
    }
1328
    return true;
1091
    return true;
1329
}
1092
}
1330
1093
1331
// Translate a simple OR, AND, or EXCL search clause. 
1094
// Translate a simple OR, AND, or EXCL search clause. 
1332
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p, 
1095
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
1333
                     int maxexp, int maxcl)
1334
{
1096
{
1335
    LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
1097
    LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
1336
         getStemLang().c_str()));
1098
         getStemLang().c_str()));
1337
1099
1338
    Xapian::Query *qp = (Xapian::Query *)p;
1100
    Xapian::Query *qp = (Xapian::Query *)p;
...
...
1346
    case SCLT_EXCL: op = Xapian::Query::OP_OR; break;
1108
    case SCLT_EXCL: op = Xapian::Query::OP_OR; break;
1347
    default:
1109
    default:
1348
    LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
1110
    LOGERR(("SearchDataClauseSimple: bad m_tp %d\n", m_tp));
1349
    return false;
1111
    return false;
1350
    }
1112
    }
1113
1351
    vector<Xapian::Query> pqueries;
1114
    vector<Xapian::Query> pqueries;
1352
1353
    // We normally boost the original term in the stem expansion list. Don't
1354
    // do it if there are wildcards anywhere, this would skew the results.
1355
    bool doBoostUserTerm = 
1356
  (m_parentSearch && !m_parentSearch->haveWildCards()) || 
1357
  (m_parentSearch == 0 && !m_haveWildCards);
1358
1359
    StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm,
1360
             maxexp, maxcl);
1361
    if (!tr.processUserString(m_text, getModifiers(), m_reason, pqueries))
1115
    if (!processUserString(db, m_text, getModifiers(), m_reason, &pqueries))
1362
    return false;
1116
    return false;
1363
    if (pqueries.empty()) {
1117
    if (pqueries.empty()) {
1364
    LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
1118
    LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
1365
    return true;
1119
    return true;
1366
    }
1120
    }
...
...
1379
//
1133
//
1380
// We do not split the entry any more (used to do some crazy thing
1134
// We do not split the entry any more (used to do some crazy thing
1381
// about expanding multiple fragments in the past). We just take the
1135
// about expanding multiple fragments in the past). We just take the
1382
// value blanks and all and expand this against the indexed unsplit
1136
// value blanks and all and expand this against the indexed unsplit
1383
// file names
1137
// file names
1384
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p, 
1138
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
1385
                       int maxexp, int)
1386
{
1139
{
1387
    Xapian::Query *qp = (Xapian::Query *)p;
1140
    Xapian::Query *qp = (Xapian::Query *)p;
1388
    *qp = Xapian::Query();
1141
    *qp = Xapian::Query();
1389
1142
1390
    vector<string> names;
1143
    vector<string> names;
1144
    int maxexp = 10000;
1145
    db.getConf()->getConfParam("maxTermExpand", &maxexp);
1391
    db.filenameWildExp(m_text, names, maxexp);
1146
    db.filenameWildExp(m_text, names, maxexp);
1392
    *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
1147
    *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
1393
1148
1394
    if (m_weight != 1.0) {
1149
    if (m_weight != 1.0) {
1395
    *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
1150
    *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
1396
    }
1151
    }
1397
    return true;
1152
    return true;
1398
}
1153
}
1399
1154
1400
// Translate NEAR or PHRASE clause. 
1155
// Translate NEAR or PHRASE clause. 
1401
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p, 
1156
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
1402
                   int maxexp, int maxcl)
1403
{
1157
{
1404
    LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
1158
    LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
1405
1159
1406
    Xapian::Query *qp = (Xapian::Query *)p;
1160
    Xapian::Query *qp = (Xapian::Query *)p;
1407
    *qp = Xapian::Query();
1161
    *qp = Xapian::Query();
1408
1162
1409
    vector<Xapian::Query> pqueries;
1163
    vector<Xapian::Query> pqueries;
1410
    Xapian::Query nq;
1164
    Xapian::Query nq;
1411
1412
    // We normally boost the original term in the stem expansion list. Don't
1413
    // do it if there are wildcards anywhere, this would skew the results.
1414
    bool doBoostUserTerm = 
1415
  (m_parentSearch && !m_parentSearch->haveWildCards()) || 
1416
  (m_parentSearch == 0 && !m_haveWildCards);
1417
1165
1418
    // We produce a single phrase out of the user entry then use
1166
    // We produce a single phrase out of the user entry then use
1419
    // stringToXapianQueries() to lowercase and simplify the phrase
1167
    // stringToXapianQueries() to lowercase and simplify the phrase
1420
    // terms etc. This will result into a single (complex)
1168
    // terms etc. This will result into a single (complex)
1421
    // Xapian::Query.
1169
    // Xapian::Query.
1422
    if (m_text.find('\"') != string::npos) {
1170
    if (m_text.find('\"') != string::npos) {
1423
    m_text = neutchars(m_text, "\"");
1171
    m_text = neutchars(m_text, "\"");
1424
    }
1172
    }
1425
    string s = cstr_dquote + m_text + cstr_dquote;
1173
    string s = cstr_dquote + m_text + cstr_dquote;
1426
    bool useNear = (m_tp == SCLT_NEAR);
1174
    bool useNear = (m_tp == SCLT_NEAR);
1427
    StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm,
1428
             maxexp, maxcl);
1429
    if (!tr.processUserString(s, getModifiers(), m_reason, pqueries, 
1175
    if (!processUserString(db, s, getModifiers(), m_reason, &pqueries, 
1430
                  m_slack, useNear))
1176
               m_slack, useNear))
1431
    return false;
1177
    return false;
1432
    if (pqueries.empty()) {
1178
    if (pqueries.empty()) {
1433
    LOGERR(("SearchDataClauseDist: resolved to null query\n"));
1179
    LOGERR(("SearchDataClauseDist: resolved to null query\n"));
1434
    return true;
1180
    return true;
1435
    }
1181
    }