|
a/src/rcldb/searchdata.cpp |
|
b/src/rcldb/searchdata.cpp |
|
... |
|
... |
14 |
* Free Software Foundation, Inc.,
|
14 |
* Free Software Foundation, Inc.,
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
16 |
*/
|
16 |
*/
|
17 |
|
17 |
|
18 |
// Handle translation from rcl's SearchData structures to Xapian Queries
|
18 |
// Handle translation from rcl's SearchData structures to Xapian Queries
|
|
|
19 |
|
|
|
20 |
#include "autoconfig.h"
|
|
|
21 |
|
19 |
#include <stdio.h>
|
22 |
#include <stdio.h>
|
20 |
#include <fnmatch.h>
|
23 |
#include <fnmatch.h>
|
21 |
|
24 |
|
22 |
#include <string>
|
25 |
#include <string>
|
23 |
#include <vector>
|
26 |
#include <vector>
|
24 |
#include <algorithm>
|
27 |
#include <algorithm>
|
|
|
28 |
using namespace std;
|
25 |
|
29 |
|
26 |
#include "xapian.h"
|
30 |
#include "xapian.h"
|
27 |
|
31 |
|
28 |
#include "cstr.h"
|
32 |
#include "cstr.h"
|
29 |
#include "rcldb.h"
|
33 |
#include "rcldb.h"
|
|
|
34 |
#include "rcldb_p.h"
|
30 |
#include "searchdata.h"
|
35 |
#include "searchdata.h"
|
31 |
#include "debuglog.h"
|
36 |
#include "debuglog.h"
|
32 |
#include "smallut.h"
|
37 |
#include "smallut.h"
|
33 |
#include "textsplit.h"
|
38 |
#include "textsplit.h"
|
34 |
#include "unacpp.h"
|
39 |
#include "unacpp.h"
|
35 |
#include "utf8iter.h"
|
40 |
#include "utf8iter.h"
|
36 |
#include "stoplist.h"
|
41 |
#include "stoplist.h"
|
37 |
#include "rclconfig.h"
|
42 |
#include "rclconfig.h"
|
38 |
#include "termproc.h"
|
43 |
#include "termproc.h"
|
|
|
44 |
#include "synfamily.h"
|
|
|
45 |
#include "stemdb.h"
|
|
|
46 |
#include "expansiondbs.h"
|
39 |
|
47 |
|
40 |
#ifndef NO_NAMESPACES
|
|
|
41 |
using namespace std;
|
|
|
42 |
namespace Rcl {
|
48 |
namespace Rcl {
|
43 |
#endif
|
|
|
44 |
|
49 |
|
45 |
typedef vector<SearchDataClause *>::iterator qlist_it_t;
|
50 |
typedef vector<SearchDataClause *>::iterator qlist_it_t;
|
46 |
typedef vector<SearchDataClause *>::const_iterator qlist_cit_t;
|
51 |
typedef vector<SearchDataClause *>::const_iterator qlist_cit_t;
|
47 |
|
52 |
|
48 |
static const int original_term_wqf_booster = 10;
|
53 |
static const int original_term_wqf_booster = 10;
|
|
... |
|
... |
69 |
* You should have received a copy of the GNU General Public License
|
74 |
* You should have received a copy of the GNU General Public License
|
70 |
* along with this program; if not, write to the Free Software
|
75 |
* along with this program; if not, write to the Free Software
|
71 |
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
76 |
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
72 |
* USA
|
77 |
* USA
|
73 |
*/
|
78 |
*/
|
|
|
79 |
|
|
|
80 |
#ifdef RCL_INDEX_STRIPCHARS
|
|
|
81 |
#define bufprefix(BUF, L) {(BUF)[0] = L;}
|
|
|
82 |
#define bpoffs() 1
|
|
|
83 |
#else
|
|
|
84 |
static inline void bufprefix(char *buf, char c)
|
|
|
85 |
{
|
|
|
86 |
if (o_index_stripchars) {
|
|
|
87 |
buf[0] = c;
|
|
|
88 |
} else {
|
|
|
89 |
buf[0] = ':';
|
|
|
90 |
buf[1] = c;
|
|
|
91 |
buf[2] = ':';
|
|
|
92 |
}
|
|
|
93 |
}
|
|
|
94 |
static inline int bpoffs()
|
|
|
95 |
{
|
|
|
96 |
return o_index_stripchars ? 1 : 3;
|
|
|
97 |
}
|
|
|
98 |
#endif
|
|
|
99 |
|
74 |
static Xapian::Query
|
100 |
static Xapian::Query
|
75 |
date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
|
101 |
date_range_filter(int y1, int m1, int d1, int y2, int m2, int d2)
|
76 |
{
|
102 |
{
|
77 |
// Xapian uses a smallbuf and snprintf. Can't be bothered, we're
|
103 |
// Xapian uses a smallbuf and snprintf. Can't be bothered, we're
|
78 |
// only doing %d's !
|
104 |
// only doing %d's !
|
79 |
char buf[200];
|
105 |
char buf[200];
|
|
|
106 |
bufprefix(buf, 'D');
|
80 |
sprintf(buf, "D%04d%02d", y1, m1);
|
107 |
sprintf(buf+bpoffs(), "%04d%02d", y1, m1);
|
81 |
vector<Xapian::Query> v;
|
108 |
vector<Xapian::Query> v;
|
82 |
|
109 |
|
83 |
int d_last = monthdays(m1, y1);
|
110 |
int d_last = monthdays(m1, y1);
|
84 |
int d_end = d_last;
|
111 |
int d_end = d_last;
|
85 |
if (y1 == y2 && m1 == m2 && d2 < d_last) {
|
112 |
if (y1 == y2 && m1 == m2 && d2 < d_last) {
|
86 |
d_end = d2;
|
113 |
d_end = d2;
|
87 |
}
|
114 |
}
|
88 |
// Deal with any initial partial month
|
115 |
// Deal with any initial partial month
|
89 |
if (d1 > 1 || d_end < d_last) {
|
116 |
if (d1 > 1 || d_end < d_last) {
|
90 |
for ( ; d1 <= d_end ; d1++) {
|
117 |
for ( ; d1 <= d_end ; d1++) {
|
91 |
sprintf(buf + 7, "%02d", d1);
|
118 |
sprintf(buf + 6 + bpoffs(), "%02d", d1);
|
92 |
v.push_back(Xapian::Query(buf));
|
119 |
v.push_back(Xapian::Query(buf));
|
93 |
}
|
120 |
}
|
94 |
} else {
|
121 |
} else {
|
95 |
buf[0] = 'M';
|
122 |
bufprefix(buf, 'M');
|
96 |
v.push_back(Xapian::Query(buf));
|
123 |
v.push_back(Xapian::Query(buf));
|
97 |
}
|
124 |
}
|
98 |
|
125 |
|
99 |
if (y1 == y2 && m1 == m2) {
|
126 |
if (y1 == y2 && m1 == m2) {
|
100 |
return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
|
127 |
return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
|
101 |
}
|
128 |
}
|
102 |
|
129 |
|
103 |
int m_last = (y1 < y2) ? 12 : m2 - 1;
|
130 |
int m_last = (y1 < y2) ? 12 : m2 - 1;
|
104 |
while (++m1 <= m_last) {
|
131 |
while (++m1 <= m_last) {
|
105 |
sprintf(buf + 5, "%02d", m1);
|
132 |
sprintf(buf + 4 + bpoffs(), "%02d", m1);
|
106 |
buf[0] = 'M';
|
133 |
bufprefix(buf, 'M');
|
107 |
v.push_back(Xapian::Query(buf));
|
134 |
v.push_back(Xapian::Query(buf));
|
108 |
}
|
135 |
}
|
109 |
|
136 |
|
110 |
if (y1 < y2) {
|
137 |
if (y1 < y2) {
|
111 |
while (++y1 < y2) {
|
138 |
while (++y1 < y2) {
|
112 |
sprintf(buf + 1, "%04d", y1);
|
139 |
sprintf(buf + bpoffs(), "%04d", y1);
|
113 |
buf[0] = 'Y';
|
140 |
bufprefix(buf, 'Y');
|
114 |
v.push_back(Xapian::Query(buf));
|
141 |
v.push_back(Xapian::Query(buf));
|
115 |
}
|
142 |
}
|
116 |
sprintf(buf + 1, "%04d", y2);
|
143 |
sprintf(buf + bpoffs(), "%04d", y2);
|
117 |
buf[0] = 'M';
|
144 |
bufprefix(buf, 'M');
|
118 |
for (m1 = 1; m1 < m2; m1++) {
|
145 |
for (m1 = 1; m1 < m2; m1++) {
|
119 |
sprintf(buf + 5, "%02d", m1);
|
146 |
sprintf(buf + 4 + bpoffs(), "%02d", m1);
|
120 |
v.push_back(Xapian::Query(buf));
|
147 |
v.push_back(Xapian::Query(buf));
|
121 |
}
|
148 |
}
|
122 |
}
|
149 |
}
|
123 |
|
150 |
|
124 |
sprintf(buf + 5, "%02d", m2);
|
151 |
sprintf(buf + 2 + bpoffs(), "%02d", m2);
|
125 |
|
152 |
|
126 |
// Deal with any final partial month
|
153 |
// Deal with any final partial month
|
127 |
if (d2 < monthdays(m2, y2)) {
|
154 |
if (d2 < monthdays(m2, y2)) {
|
128 |
buf[0] = 'D';
|
155 |
bufprefix(buf, 'D');
|
129 |
for (d1 = 1 ; d1 <= d2; d1++) {
|
156 |
for (d1 = 1 ; d1 <= d2; d1++) {
|
130 |
sprintf(buf + 7, "%02d", d1);
|
157 |
sprintf(buf + 6 + bpoffs(), "%02d", d1);
|
131 |
v.push_back(Xapian::Query(buf));
|
158 |
v.push_back(Xapian::Query(buf));
|
132 |
}
|
159 |
}
|
133 |
} else {
|
160 |
} else {
|
134 |
buf[0] = 'M';
|
161 |
bufprefix(buf, 'M');
|
135 |
v.push_back(Xapian::Query(buf));
|
162 |
v.push_back(Xapian::Query(buf));
|
136 |
}
|
163 |
}
|
137 |
|
164 |
|
138 |
return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
|
165 |
return Xapian::Query(Xapian::Query::OP_OR, v.begin(), v.end());
|
139 |
}
|
166 |
}
|
|
... |
|
... |
170 |
}
|
197 |
}
|
171 |
tps = exptps;
|
198 |
tps = exptps;
|
172 |
return true;
|
199 |
return true;
|
173 |
}
|
200 |
}
|
174 |
|
201 |
|
175 |
bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
202 |
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
|
|
|
203 |
vector<SearchDataClause*>& query,
|
|
|
204 |
string& reason, void *d)
|
176 |
{
|
205 |
{
|
177 |
LOGDEB2(("SearchData::toNativeQuery: stemlang [%s]\n",
|
|
|
178 |
m_stemlang.c_str()));
|
|
|
179 |
Xapian::Query xq;
|
206 |
Xapian::Query xq;
|
180 |
m_reason.erase();
|
|
|
181 |
|
|
|
182 |
// Walk the clause list translating each in turn and building the
|
|
|
183 |
// Xapian query tree
|
|
|
184 |
for (qlist_it_t it = m_query.begin(); it != m_query.end(); it++) {
|
207 |
for (qlist_it_t it = query.begin(); it != query.end(); it++) {
|
185 |
Xapian::Query nq;
|
208 |
Xapian::Query nq;
|
186 |
if (!(*it)->toNativeQuery(db, &nq, m_stemlang)) {
|
209 |
if (!(*it)->toNativeQuery(db, &nq)) {
|
187 |
LOGERR(("SearchData::toNativeQuery: failed\n"));
|
210 |
LOGERR(("SearchData::clausesToQuery: toNativeQuery failed\n"));
|
188 |
m_reason = (*it)->getReason();
|
211 |
reason = (*it)->getReason();
|
189 |
return false;
|
212 |
return false;
|
190 |
}
|
213 |
}
|
191 |
if (nq.empty()) {
|
214 |
if (nq.empty()) {
|
192 |
LOGDEB(("SearchData::toNativeQuery: skipping empty clause\n"));
|
215 |
LOGDEB(("SearchData::clausesToQuery: skipping empty clause\n"));
|
193 |
continue;
|
216 |
continue;
|
194 |
}
|
217 |
}
|
195 |
// If this structure is an AND list, must use AND_NOT for excl clauses.
|
218 |
// If this structure is an AND list, must use AND_NOT for excl clauses.
|
196 |
// Else this is an OR list, and there can't be excl clauses (checked by
|
219 |
// Else this is an OR list, and there can't be excl clauses (checked by
|
197 |
// addClause())
|
220 |
// addClause())
|
198 |
Xapian::Query::op op;
|
221 |
Xapian::Query::op op;
|
199 |
if (m_tp == SCLT_AND) {
|
222 |
if (tp == SCLT_AND) {
|
200 |
if ((*it)->m_tp == SCLT_EXCL) {
|
223 |
if ((*it)->m_tp == SCLT_EXCL) {
|
201 |
op = Xapian::Query::OP_AND_NOT;
|
224 |
op = Xapian::Query::OP_AND_NOT;
|
202 |
} else {
|
225 |
} else {
|
203 |
op = Xapian::Query::OP_AND;
|
226 |
op = Xapian::Query::OP_AND;
|
204 |
}
|
227 |
}
|
|
... |
|
... |
214 |
xq = Xapian::Query(op, xq, nq);
|
237 |
xq = Xapian::Query(op, xq, nq);
|
215 |
}
|
238 |
}
|
216 |
}
|
239 |
}
|
217 |
if (xq.empty())
|
240 |
if (xq.empty())
|
218 |
xq = Xapian::Query::MatchAll;
|
241 |
xq = Xapian::Query::MatchAll;
|
|
|
242 |
|
|
|
243 |
*((Xapian::Query *)d) = xq;
|
|
|
244 |
return true;
|
|
|
245 |
}
|
|
|
246 |
|
|
|
247 |
bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
|
|
248 |
{
|
|
|
249 |
LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
|
|
|
250 |
m_reason.erase();
|
|
|
251 |
|
|
|
252 |
// Walk the clause list translating each in turn and building the
|
|
|
253 |
// Xapian query tree
|
|
|
254 |
Xapian::Query xq;
|
|
|
255 |
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
|
|
|
256 |
LOGERR(("SearchData::toNativeQuery: clausesToQuery failed\n"));
|
|
|
257 |
return false;
|
|
|
258 |
}
|
219 |
|
259 |
|
220 |
if (m_haveDates) {
|
260 |
if (m_haveDates) {
|
221 |
// If one of the extremities is unset, compute db extremas
|
261 |
// If one of the extremities is unset, compute db extremas
|
222 |
if (m_dates.y1 == 0 || m_dates.y2 == 0) {
|
262 |
if (m_dates.y1 == 0 || m_dates.y2 == 0) {
|
223 |
int minyear = 1970, maxyear = 2100;
|
263 |
int minyear = 1970, maxyear = 2100;
|
|
... |
|
... |
324 |
dit != m_dirspecs.end(); dit++) {
|
364 |
dit != m_dirspecs.end(); dit++) {
|
325 |
vector<string> vpath;
|
365 |
vector<string> vpath;
|
326 |
stringToTokens(dit->dir, vpath, "/");
|
366 |
stringToTokens(dit->dir, vpath, "/");
|
327 |
vector<string> pvpath;
|
367 |
vector<string> pvpath;
|
328 |
if (dit->dir[0] == '/')
|
368 |
if (dit->dir[0] == '/')
|
329 |
pvpath.push_back(pathelt_prefix);
|
369 |
pvpath.push_back(wrap_prefix(pathelt_prefix));
|
330 |
for (vector<string>::const_iterator pit = vpath.begin();
|
370 |
for (vector<string>::const_iterator pit = vpath.begin();
|
331 |
pit != vpath.end(); pit++){
|
371 |
pit != vpath.end(); pit++){
|
332 |
pvpath.push_back(pathelt_prefix + *pit);
|
372 |
pvpath.push_back(wrap_prefix(pathelt_prefix) + *pit);
|
333 |
}
|
373 |
}
|
334 |
Xapian::Query::op tdop;
|
374 |
Xapian::Query::op tdop;
|
335 |
if (dit->weight == 1.0) {
|
375 |
if (dit->weight == 1.0) {
|
336 |
tdop = dit->exclude ?
|
376 |
tdop = dit->exclude ?
|
337 |
Xapian::Query::OP_AND_NOT : Xapian::Query::OP_FILTER;
|
377 |
Xapian::Query::OP_AND_NOT : Xapian::Query::OP_FILTER;
|
|
... |
|
... |
444 |
addClause(nclp);
|
484 |
addClause(nclp);
|
445 |
} else {
|
485 |
} else {
|
446 |
// My type is AND. Change it to OR and insert two queries, one
|
486 |
// My type is AND. Change it to OR and insert two queries, one
|
447 |
// being the original query as a subquery, the other the
|
487 |
// being the original query as a subquery, the other the
|
448 |
// phrase.
|
488 |
// phrase.
|
449 |
SearchData *sd = new SearchData(m_tp);
|
489 |
SearchData *sd = new SearchData(m_tp, m_stemlang);
|
450 |
sd->m_query = m_query;
|
490 |
sd->m_query = m_query;
|
451 |
sd->m_stemlang = m_stemlang;
|
491 |
sd->m_stemlang = m_stemlang;
|
452 |
m_tp = SCLT_OR;
|
492 |
m_tp = SCLT_OR;
|
453 |
m_query.clear();
|
493 |
m_query.clear();
|
454 |
SearchDataClauseSub *oq =
|
494 |
SearchDataClauseSub *oq =
|
|
... |
|
... |
584 |
: m_db(db), m_field(field), m_stemlang(stmlng),
|
624 |
: m_db(db), m_field(field), m_stemlang(stmlng),
|
585 |
m_doBoostUserTerms(boostUser), m_hld(hld)
|
625 |
m_doBoostUserTerms(boostUser), m_hld(hld)
|
586 |
{ }
|
626 |
{ }
|
587 |
|
627 |
|
588 |
bool processUserString(const string &iq,
|
628 |
bool processUserString(const string &iq,
|
|
|
629 |
int mods,
|
589 |
string &ermsg,
|
630 |
string &ermsg,
|
590 |
vector<Xapian::Query> &pqueries,
|
631 |
vector<Xapian::Query> &pqueries,
|
591 |
const StopList &stops,
|
|
|
592 |
int slack = 0, bool useNear = false);
|
632 |
int slack = 0, bool useNear = false);
|
593 |
private:
|
633 |
private:
|
594 |
void expandTerm(bool dont, const string& term, vector<string>& exp,
|
634 |
void expandTerm(int mods,
|
|
|
635 |
const string& term, vector<string>& exp,
|
595 |
string& sterm, const string& prefix);
|
636 |
string& sterm, const string& prefix);
|
596 |
// After splitting entry on whitespace: process non-phrase element
|
637 |
// After splitting entry on whitespace: process non-phrase element
|
597 |
void processSimpleSpan(const string& span, bool nostemexp,
|
638 |
void processSimpleSpan(const string& span,
|
|
|
639 |
int mods,
|
598 |
vector<Xapian::Query> &pqueries);
|
640 |
vector<Xapian::Query> &pqueries);
|
599 |
// Process phrase/near element
|
641 |
// Process phrase/near element
|
600 |
void processPhraseOrNear(TextSplitQ *splitData,
|
642 |
void processPhraseOrNear(TextSplitQ *splitData,
|
|
|
643 |
int mods,
|
601 |
vector<Xapian::Query> &pqueries,
|
644 |
vector<Xapian::Query> &pqueries,
|
602 |
bool useNear, int slack, int mods);
|
645 |
bool useNear, int slack);
|
603 |
|
646 |
|
604 |
Db& m_db;
|
647 |
Db& m_db;
|
605 |
const string& m_field;
|
648 |
const string& m_field;
|
606 |
const string& m_stemlang;
|
649 |
const string& m_stemlang;
|
607 |
bool m_doBoostUserTerms;
|
650 |
const bool m_doBoostUserTerms;
|
608 |
HighlightData& m_hld;
|
651 |
HighlightData& m_hld;
|
609 |
};
|
652 |
};
|
610 |
|
653 |
|
611 |
#if 1
|
654 |
#if 1
|
612 |
static void listVector(const string& what, const vector<string>&l)
|
655 |
static void listVector(const string& what, const vector<string>&l)
|
|
... |
|
... |
617 |
}
|
660 |
}
|
618 |
LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
|
661 |
LOGDEB(("%s: %s\n", what.c_str(), a.c_str()));
|
619 |
}
|
662 |
}
|
620 |
#endif
|
663 |
#endif
|
621 |
|
664 |
|
622 |
/** Take simple term and expand stem and wildcards
|
665 |
/** Expand term into term list, using appropriate mode: stem, wildcards,
|
|
|
666 |
* diacritics...
|
623 |
*
|
667 |
*
|
624 |
* @param nostemexp don't perform stem expansion. This is mainly used to
|
668 |
* @param mods stem expansion, case and diacritics sensitivity control.
|
625 |
* prevent stem expansion inside phrases (because the user probably
|
|
|
626 |
* does not expect it). This does NOT prevent wild card expansion.
|
|
|
627 |
* Other factors than nostemexp can prevent stem expansion:
|
|
|
628 |
* a null stemlang, resulting from a global user preference, a
|
|
|
629 |
* capitalized term, or wildcard(s)
|
|
|
630 |
* @param term input single word
|
669 |
* @param term input single word
|
631 |
* @param exp output expansion list
|
670 |
* @param exp output expansion list
|
632 |
* @param sterm output original input term if there were no wildcards
|
671 |
* @param sterm output original input term if there were no wildcards
|
|
|
672 |
* @param prefix field prefix in index. We could recompute it, but the caller
|
|
|
673 |
* has it already. Used in the simple case where there is nothing to expand,
|
|
|
674 |
* and we just return the prefixed term (else Db::termMatch deals with it).
|
633 |
*/
|
675 |
*/
|
634 |
void StringToXapianQ::expandTerm(bool nostemexp,
|
676 |
void StringToXapianQ::expandTerm(int mods,
|
635 |
const string& term,
|
677 |
const string& term,
|
636 |
vector<string>& exp,
|
678 |
vector<string>& oexp, string &sterm,
|
637 |
string &sterm, const string& prefix)
|
679 |
const string& prefix)
|
638 |
{
|
680 |
{
|
639 |
LOGDEB2(("expandTerm: field [%s] term [%s] stemlang [%s] nostemexp %d\n",
|
681 |
LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
|
640 |
m_field.c_str(), term.c_str(), m_stemlang.c_str(), nostemexp));
|
682 |
mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
|
641 |
sterm.erase();
|
683 |
sterm.clear();
|
642 |
exp.clear();
|
684 |
oexp.clear();
|
643 |
if (term.empty()) {
|
685 |
if (term.empty())
|
644 |
return;
|
686 |
return;
|
645 |
}
|
|
|
646 |
|
687 |
|
647 |
bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
|
688 |
bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
|
648 |
|
689 |
|
|
|
690 |
// If there are no wildcards, add term to the list of user-entered terms
|
|
|
691 |
if (!haswild)
|
|
|
692 |
m_hld.uterms.insert(term);
|
|
|
693 |
|
|
|
694 |
bool nostemexp = (mods & SearchDataClause::SDCM_NOSTEMMING) != 0;
|
|
|
695 |
|
649 |
// No stemming if there are wildcards or prevented globally.
|
696 |
// No stem expansion if there are wildcards or if prevented by caller
|
650 |
if (haswild || m_stemlang.empty()) {
|
697 |
if (haswild || m_stemlang.empty()) {
|
651 |
LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
|
698 |
LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
|
652 |
nostemexp = true;
|
699 |
nostemexp = true;
|
653 |
}
|
700 |
}
|
654 |
|
701 |
|
655 |
if (!haswild)
|
702 |
bool noexpansion = nostemexp && !haswild;
|
656 |
m_hld.uterms.insert(term);
|
|
|
657 |
|
703 |
|
658 |
if (nostemexp && !haswild) {
|
704 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
705 |
bool diac_sensitive = (mods & SearchDataClause::SDCM_DIACSENS) != 0;
|
|
|
706 |
bool case_sensitive = (mods & SearchDataClause::SDCM_CASESENS) != 0;
|
|
|
707 |
|
|
|
708 |
if (o_index_stripchars) {
|
|
|
709 |
diac_sensitive = case_sensitive = false;
|
|
|
710 |
} else {
|
|
|
711 |
// If we are working with a raw index, apply the rules for case and
|
|
|
712 |
// diacritics sensitivity.
|
|
|
713 |
|
|
|
714 |
// If any character has a diacritic, we become
|
|
|
715 |
// diacritic-sensitive. Note that the way that the test is
|
|
|
716 |
// performed (conversion+comparison) will automatically ignore
|
|
|
717 |
// accented characters which are actually a separate letter
|
|
|
718 |
if (unachasaccents(term))
|
|
|
719 |
diac_sensitive = true;
|
|
|
720 |
|
|
|
721 |
// If any character apart the first is uppercase, we become
|
|
|
722 |
// case-sensitive. The first character is reserved for
|
|
|
723 |
// turning off stemming. You need to use a query language
|
|
|
724 |
// modifier to search for Floor in a case-sensitive way.
|
|
|
725 |
Utf8Iter it(term);
|
|
|
726 |
it++;
|
|
|
727 |
if (unachasuppercase(term.substr(it.getBpos())))
|
|
|
728 |
case_sensitive = true;
|
|
|
729 |
|
|
|
730 |
// If we are sensitive to case or diacritics turn stemming off
|
|
|
731 |
if (diac_sensitive || case_sensitive)
|
|
|
732 |
nostemexp = true;
|
|
|
733 |
|
|
|
734 |
if (!case_sensitive || !diac_sensitive)
|
|
|
735 |
noexpansion = false;
|
|
|
736 |
}
|
|
|
737 |
#endif
|
|
|
738 |
|
|
|
739 |
if (noexpansion) {
|
659 |
sterm = term;
|
740 |
sterm = term;
|
660 |
exp.resize(1);
|
741 |
oexp.push_back(prefix + term);
|
661 |
exp[0] = prefix + term;
|
742 |
LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
|
662 |
} else {
|
743 |
return;
|
|
|
744 |
}
|
|
|
745 |
|
|
|
746 |
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
|
|
|
747 |
XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all",
|
|
|
748 |
&unacfoldtrans);
|
|
|
749 |
vector<string> lexp;
|
|
|
750 |
|
663 |
TermMatchResult res;
|
751 |
TermMatchResult res;
|
664 |
if (haswild) {
|
752 |
if (haswild) {
|
|
|
753 |
// Note that if there are wildcards, we do a direct from-index
|
|
|
754 |
// expansion, which means that we are casediac-sensitive. There
|
|
|
755 |
// would be nothing to prevent us to expand from the casediac
|
|
|
756 |
// synonyms first. To be done later
|
665 |
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1,
|
757 |
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1,
|
666 |
m_field);
|
758 |
m_field);
|
667 |
} else {
|
759 |
goto termmatchtoresult;
|
|
|
760 |
}
|
|
|
761 |
|
668 |
sterm = term;
|
762 |
sterm = term;
|
|
|
763 |
|
|
|
764 |
#ifdef RCL_INDEX_STRIPCHARS
|
|
|
765 |
|
669 |
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1,
|
766 |
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
|
670 |
m_field);
|
767 |
|
|
|
768 |
#else
|
|
|
769 |
|
|
|
770 |
if (o_index_stripchars) {
|
|
|
771 |
// If the index is raw, we can only come here if nostemexp is unset
|
|
|
772 |
// and we just need stem expansion.
|
|
|
773 |
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
|
|
|
774 |
goto termmatchtoresult;
|
|
|
775 |
}
|
|
|
776 |
|
|
|
777 |
// No stem expansion when diacritic or case sensitivity is set, it
|
|
|
778 |
// makes no sense (it would mess with the diacritics anyway if
|
|
|
779 |
// they are not in the stem part). In these 3 cases, perform
|
|
|
780 |
// appropriate expansion from the charstripping db, and do a bogus
|
|
|
781 |
// wildcard expansion (there is no wild card) to generate the
|
|
|
782 |
// result:
|
|
|
783 |
|
|
|
784 |
if (diac_sensitive && case_sensitive) {
|
|
|
785 |
// No expansion whatsoever
|
|
|
786 |
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, m_field);
|
|
|
787 |
goto termmatchtoresult;
|
|
|
788 |
}
|
|
|
789 |
|
|
|
790 |
if (diac_sensitive) {
|
|
|
791 |
// Expand for accents and case, filtering for same accents,
|
|
|
792 |
// then bogus wildcard expansion for generating result
|
|
|
793 |
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
|
|
794 |
synac.synExpand(term, lexp, &foldtrans);
|
|
|
795 |
goto exptotermatch;
|
|
|
796 |
}
|
|
|
797 |
|
|
|
798 |
if (case_sensitive) {
|
|
|
799 |
// Expand for accents and case, filtering for same case, then
|
|
|
800 |
// bogus wildcard expansion for generating result
|
|
|
801 |
SynTermTransUnac unactrans(UNACOP_UNAC);
|
|
|
802 |
synac.synExpand(term, lexp, &unactrans);
|
|
|
803 |
goto exptotermatch;
|
|
|
804 |
}
|
|
|
805 |
|
|
|
806 |
// We are neither accent- nor case- sensitive and may need stem
|
|
|
807 |
// expansion or not.
|
|
|
808 |
|
|
|
809 |
// Expand for accents and case
|
|
|
810 |
synac.synExpand(term, lexp);
|
|
|
811 |
LOGDEB(("ExpTerm: casediac: %s\n", stringsToString(lexp).c_str()));
|
|
|
812 |
if (nostemexp)
|
|
|
813 |
goto exptotermatch;
|
|
|
814 |
|
|
|
815 |
// Need stem expansion. Lowercase the result of accent and case
|
|
|
816 |
// expansion for input to stemdb.
|
|
|
817 |
for (unsigned int i = 0; i < lexp.size(); i++) {
|
|
|
818 |
string lower;
|
|
|
819 |
unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
|
|
|
820 |
lexp[i] = lower;
|
|
|
821 |
}
|
|
|
822 |
sort(lexp.begin(), lexp.end());
|
|
|
823 |
{
|
|
|
824 |
vector<string>::iterator uit = unique(lexp.begin(), lexp.end());
|
|
|
825 |
lexp.resize(uit - lexp.begin());
|
|
|
826 |
StemDb db(m_db.m_ndb->xrdb);
|
|
|
827 |
vector<string> exp1;
|
|
|
828 |
for (vector<string>::const_iterator it = lexp.begin();
|
|
|
829 |
it != lexp.end(); it++) {
|
|
|
830 |
db.stemExpand(m_stemlang, *it, exp1);
|
671 |
}
|
831 |
}
|
|
|
832 |
LOGDEB(("ExpTerm: stem: %s\n", stringsToString(exp1).c_str()));
|
|
|
833 |
|
|
|
834 |
// Expand the resulting list for case (all stemdb content
|
|
|
835 |
// is lowercase)
|
|
|
836 |
lexp.clear();
|
|
|
837 |
for (vector<string>::const_iterator it = exp1.begin();
|
|
|
838 |
it != exp1.end(); it++) {
|
|
|
839 |
synac.synExpand(*it, lexp);
|
|
|
840 |
}
|
|
|
841 |
sort(lexp.begin(), lexp.end());
|
|
|
842 |
uit = unique(lexp.begin(), lexp.end());
|
|
|
843 |
lexp.resize(uit - lexp.begin());
|
|
|
844 |
}
|
|
|
845 |
LOGDEB(("ExpTerm: case exp of stem: %s\n", stringsToString(lexp).c_str()));
|
|
|
846 |
|
|
|
847 |
// Bogus wildcard expand to generate the result
|
|
|
848 |
exptotermatch:
|
|
|
849 |
for (vector<string>::const_iterator it = lexp.begin();
|
|
|
850 |
it != lexp.end(); it++) {
|
|
|
851 |
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it,
|
|
|
852 |
res, -1, m_field);
|
|
|
853 |
}
|
|
|
854 |
#endif
|
|
|
855 |
|
|
|
856 |
// Term match entries to vector of terms
|
|
|
857 |
termmatchtoresult:
|
672 |
for (vector<TermMatchEntry>::const_iterator it = res.entries.begin();
|
858 |
for (vector<TermMatchEntry>::const_iterator it = res.entries.begin();
|
673 |
it != res.entries.end(); it++) {
|
859 |
it != res.entries.end(); it++) {
|
674 |
exp.push_back(it->term);
|
860 |
oexp.push_back(it->term);
|
675 |
}
|
|
|
676 |
}
|
861 |
}
|
|
|
862 |
LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
|
677 |
}
|
863 |
}
|
678 |
|
864 |
|
679 |
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
|
865 |
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
|
680 |
void multiply_groups(vector<vector<string> >::const_iterator vvit,
|
866 |
void multiply_groups(vector<vector<string> >::const_iterator vvit,
|
681 |
vector<vector<string> >::const_iterator vvend,
|
867 |
vector<vector<string> >::const_iterator vvend,
|
|
... |
|
... |
708 |
// vector)
|
894 |
// vector)
|
709 |
comb.pop_back();
|
895 |
comb.pop_back();
|
710 |
}
|
896 |
}
|
711 |
}
|
897 |
}
|
712 |
|
898 |
|
713 |
void StringToXapianQ::processSimpleSpan(const string& span, bool nostemexp,
|
899 |
void StringToXapianQ::processSimpleSpan(const string& span,
|
|
|
900 |
int mods,
|
714 |
vector<Xapian::Query> &pqueries)
|
901 |
vector<Xapian::Query> &pqueries)
|
715 |
{
|
902 |
{
|
716 |
LOGDEB2(("StringToXapianQ::processSimpleSpan: [%s] nostemexp %d\n",
|
903 |
LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
|
717 |
span.c_str(), int(nostemexp)));
|
904 |
span.c_str(), (unsigned int)mods));
|
718 |
vector<string> exp;
|
905 |
vector<string> exp;
|
719 |
string sterm; // dumb version of user term
|
906 |
string sterm; // dumb version of user term
|
720 |
|
907 |
|
721 |
string prefix;
|
908 |
string prefix;
|
722 |
const FieldTraits *ftp;
|
909 |
const FieldTraits *ftp;
|
723 |
if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
|
910 |
if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
|
724 |
prefix = ftp->pfx;
|
911 |
prefix = wrap_prefix(ftp->pfx);
|
725 |
}
|
912 |
}
|
726 |
|
913 |
|
727 |
expandTerm(nostemexp, span, exp, sterm, prefix);
|
914 |
expandTerm(mods, span, exp, sterm, prefix);
|
728 |
|
915 |
|
729 |
// Set up the highlight data. No prefix should go in there
|
916 |
// Set up the highlight data. No prefix should go in there
|
730 |
for (vector<string>::const_iterator it = exp.begin();
|
917 |
for (vector<string>::const_iterator it = exp.begin();
|
731 |
it != exp.end(); it++) {
|
918 |
it != exp.end(); it++) {
|
732 |
m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
|
919 |
m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
|
|
... |
|
... |
753 |
// User entry element had several terms: transform into a PHRASE or
|
940 |
// User entry element had several terms: transform into a PHRASE or
|
754 |
// NEAR xapian query, the elements of which can themselves be OR
|
941 |
// NEAR xapian query, the elements of which can themselves be OR
|
755 |
// queries if the terms get expanded by stemming or wildcards (we
|
942 |
// queries if the terms get expanded by stemming or wildcards (we
|
756 |
// don't do stemming for PHRASE though)
|
943 |
// don't do stemming for PHRASE though)
|
757 |
void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
944 |
void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
|
|
945 |
int mods,
|
758 |
vector<Xapian::Query> &pqueries,
|
946 |
vector<Xapian::Query> &pqueries,
|
759 |
bool useNear, int slack, int mods)
|
947 |
bool useNear, int slack)
|
760 |
{
|
948 |
{
|
761 |
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
949 |
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
762 |
Xapian::Query::OP_PHRASE;
|
950 |
Xapian::Query::OP_PHRASE;
|
763 |
vector<Xapian::Query> orqueries;
|
951 |
vector<Xapian::Query> orqueries;
|
764 |
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
952 |
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
|
... |
|
... |
767 |
vector<vector<string> >groups;
|
955 |
vector<vector<string> >groups;
|
768 |
|
956 |
|
769 |
string prefix;
|
957 |
string prefix;
|
770 |
const FieldTraits *ftp;
|
958 |
const FieldTraits *ftp;
|
771 |
if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
|
959 |
if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
|
772 |
prefix = ftp->pfx;
|
960 |
prefix = wrap_prefix(ftp->pfx);
|
773 |
}
|
961 |
}
|
774 |
|
962 |
|
775 |
if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {
|
963 |
if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {
|
776 |
orqueries.push_back(Xapian::Query(prefix + start_of_field_term));
|
964 |
orqueries.push_back(Xapian::Query(prefix + start_of_field_term));
|
777 |
slack++;
|
965 |
slack++;
|
|
... |
|
... |
788 |
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE)
|
976 |
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE)
|
789 |
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
977 |
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
790 |
|| hadmultiple
|
978 |
|| hadmultiple
|
791 |
#endif // single OR inside NEAR
|
979 |
#endif // single OR inside NEAR
|
792 |
;
|
980 |
;
|
793 |
|
981 |
int lmods = mods;
|
|
|
982 |
if (nostemexp)
|
|
|
983 |
lmods |= SearchDataClause::SDCM_NOSTEMMING;
|
794 |
string sterm;
|
984 |
string sterm;
|
795 |
vector<string> exp;
|
985 |
vector<string> exp;
|
796 |
expandTerm(nostemexp, *it, exp, sterm, prefix);
|
986 |
expandTerm(lmods, *it, exp, sterm, prefix);
|
797 |
LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
|
987 |
LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
|
798 |
listVector("", exp);
|
988 |
listVector("", exp);
|
799 |
// groups is used for highlighting, we don't want prefixes in there.
|
989 |
// groups is used for highlighting, we don't want prefixes in there.
|
800 |
vector<string> noprefs;
|
990 |
vector<string> noprefs;
|
801 |
for (vector<string>::const_iterator it = exp.begin();
|
991 |
for (vector<string>::const_iterator it = exp.begin();
|
|
... |
|
... |
880 |
* composition of the phrase terms (no stem expansion in this case)
|
1070 |
* composition of the phrase terms (no stem expansion in this case)
|
881 |
* @return the subquery count (either or'd stem-expanded terms or phrase word
|
1071 |
* @return the subquery count (either or'd stem-expanded terms or phrase word
|
882 |
* count)
|
1072 |
* count)
|
883 |
*/
|
1073 |
*/
|
884 |
bool StringToXapianQ::processUserString(const string &iq,
|
1074 |
bool StringToXapianQ::processUserString(const string &iq,
|
|
|
1075 |
int mods,
|
885 |
string &ermsg,
|
1076 |
string &ermsg,
|
886 |
vector<Xapian::Query> &pqueries,
|
1077 |
vector<Xapian::Query> &pqueries,
|
887 |
const StopList& stops,
|
|
|
888 |
int slack,
|
1078 |
int slack,
|
889 |
bool useNear
|
1079 |
bool useNear
|
890 |
)
|
1080 |
)
|
891 |
{
|
1081 |
{
|
892 |
LOGDEB(("StringToXapianQ:: query string: [%s], slack %d, near %d\n", iq.c_str(), slack, useNear));
|
1082 |
LOGDEB(("StringToXapianQ:: qstr [%s] mods 0x%x slack %d near %d\n",
|
|
|
1083 |
iq.c_str(), mods, slack, useNear));
|
893 |
ermsg.erase();
|
1084 |
ermsg.erase();
|
|
|
1085 |
|
|
|
1086 |
const StopList stops = m_db.getStopList();
|
894 |
|
1087 |
|
895 |
// Simple whitespace-split input into user-level words and
|
1088 |
// Simple whitespace-split input into user-level words and
|
896 |
// double-quoted phrases: word1 word2 "this is a phrase".
|
1089 |
// double-quoted phrases: word1 word2 "this is a phrase".
|
897 |
//
|
1090 |
//
|
898 |
// The text splitter may further still decide that the resulting
|
1091 |
// The text splitter may further still decide that the resulting
|
|
... |
|
... |
906 |
// expansion and transform into an appropriate Xapian::Query
|
1099 |
// expansion and transform into an appropriate Xapian::Query
|
907 |
try {
|
1100 |
try {
|
908 |
for (vector<string>::iterator it = phrases.begin();
|
1101 |
for (vector<string>::iterator it = phrases.begin();
|
909 |
it != phrases.end(); it++) {
|
1102 |
it != phrases.end(); it++) {
|
910 |
LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));
|
1103 |
LOGDEB0(("strToXapianQ: phrase/word: [%s]\n", it->c_str()));
|
|
|
1104 |
// Anchoring modifiers
|
911 |
int mods = stringToMods(*it);
|
1105 |
int amods = stringToMods(*it);
|
912 |
int terminc = mods != 0 ? 1 : 0;
|
1106 |
int terminc = amods != 0 ? 1 : 0;
|
|
|
1107 |
mods |= amods;
|
913 |
// If there are multiple spans in this element, including
|
1108 |
// If there are multiple spans in this element, including
|
914 |
// at least one composite, we have to increase the slack
|
1109 |
// at least one composite, we have to increase the slack
|
915 |
// else a phrase query including a span would fail.
|
1110 |
// else a phrase query including a span would fail.
|
916 |
// Ex: "term0@term1 term2" is onlyspans-split as:
|
1111 |
// Ex: "term0@term1 term2" is onlyspans-split as:
|
917 |
// 0 term0@term1 0 12
|
1112 |
// 0 term0@term1 0 12
|
|
... |
|
... |
928 |
TermProcQ tpq;
|
1123 |
TermProcQ tpq;
|
929 |
TermProc *nxt = &tpq;
|
1124 |
TermProc *nxt = &tpq;
|
930 |
TermProcStop tpstop(nxt, stops); nxt = &tpstop;
|
1125 |
TermProcStop tpstop(nxt, stops); nxt = &tpstop;
|
931 |
//TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
|
1126 |
//TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
|
932 |
//tpcommon.onlygrams(true);
|
1127 |
//tpcommon.onlygrams(true);
|
933 |
TermProcPrep tpprep(nxt); nxt = &tpprep;
|
1128 |
TermProcPrep tpprep(nxt);
|
|
|
1129 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
1130 |
if (o_index_stripchars)
|
|
|
1131 |
#endif
|
|
|
1132 |
nxt = &tpprep;
|
934 |
|
1133 |
|
935 |
TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |
|
1134 |
TextSplitQ splitter(TextSplit::Flags(TextSplit::TXTS_ONLYSPANS |
|
936 |
TextSplit::TXTS_KEEPWILD),
|
1135 |
TextSplit::TXTS_KEEPWILD),
|
937 |
stops, nxt);
|
1136 |
stops, nxt);
|
938 |
tpq.setTSQ(&splitter);
|
1137 |
tpq.setTSQ(&splitter);
|
939 |
splitter.text_to_words(*it);
|
1138 |
splitter.text_to_words(*it);
|
940 |
|
1139 |
|
941 |
slack += splitter.lastpos - splitter.terms.size() + 1;
|
1140 |
slack += splitter.lastpos - splitter.terms.size() + 1;
|
942 |
|
1141 |
|
943 |
LOGDEB0(("strToXapianQ: termcount: %d\n", splitter.terms.size()));
|
1142 |
LOGDEB0(("strToXapianQ: termcount: %d\n", splitter.terms.size()));
|
944 |
switch (splitter.terms.size() + terminc) {
|
1143 |
switch (splitter.terms.size() + terminc) {
|
945 |
case 0:
|
1144 |
case 0:
|
946 |
continue;// ??
|
1145 |
continue;// ??
|
947 |
case 1:
|
1146 |
case 1: {
|
|
|
1147 |
int lmods = mods;
|
|
|
1148 |
if (splitter.nostemexps.front())
|
|
|
1149 |
lmods |= SearchDataClause::SDCM_NOSTEMMING;
|
948 |
m_hld.ugroups.push_back(vector<string>(1, *it));
|
1150 |
m_hld.ugroups.push_back(vector<string>(1, *it));
|
949 |
processSimpleSpan(splitter.terms.front(),
|
1151 |
processSimpleSpan(splitter.terms.front(), lmods, pqueries);
|
950 |
splitter.nostemexps.front(), pqueries);
|
1152 |
}
|
951 |
break;
|
1153 |
break;
|
952 |
default:
|
1154 |
default:
|
953 |
m_hld.ugroups.push_back(vector<string>(1, *it));
|
1155 |
m_hld.ugroups.push_back(vector<string>(1, *it));
|
954 |
processPhraseOrNear(&splitter, pqueries, useNear, slack, mods);
|
1156 |
processPhraseOrNear(&splitter, mods, pqueries, useNear, slack);
|
955 |
}
|
1157 |
}
|
956 |
}
|
1158 |
}
|
957 |
} catch (const Xapian::Error &e) {
|
1159 |
} catch (const Xapian::Error &e) {
|
958 |
ermsg = e.get_msg();
|
1160 |
ermsg = e.get_msg();
|
959 |
} catch (const string &s) {
|
1161 |
} catch (const string &s) {
|
|
... |
|
... |
969 |
}
|
1171 |
}
|
970 |
return true;
|
1172 |
return true;
|
971 |
}
|
1173 |
}
|
972 |
|
1174 |
|
973 |
// Translate a simple OR, AND, or EXCL search clause.
|
1175 |
// Translate a simple OR, AND, or EXCL search clause.
|
974 |
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
1176 |
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
|
975 |
const string& stemlang)
|
|
|
976 |
{
|
1177 |
{
|
977 |
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
|
|
|
978 |
stemlang;
|
|
|
979 |
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
|
1178 |
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
|
980 |
stemlang.c_str()));
|
1179 |
getStemLang().c_str()));
|
981 |
|
1180 |
|
982 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1181 |
Xapian::Query *qp = (Xapian::Query *)p;
|
983 |
*qp = Xapian::Query();
|
1182 |
*qp = Xapian::Query();
|
984 |
|
1183 |
|
985 |
Xapian::Query::op op;
|
1184 |
Xapian::Query::op op;
|
|
... |
|
... |
998 |
// do it if there are wildcards anywhere, this would skew the results.
|
1197 |
// do it if there are wildcards anywhere, this would skew the results.
|
999 |
bool doBoostUserTerm =
|
1198 |
bool doBoostUserTerm =
|
1000 |
(m_parentSearch && !m_parentSearch->haveWildCards()) ||
|
1199 |
(m_parentSearch && !m_parentSearch->haveWildCards()) ||
|
1001 |
(m_parentSearch == 0 && !m_haveWildCards);
|
1200 |
(m_parentSearch == 0 && !m_haveWildCards);
|
1002 |
|
1201 |
|
1003 |
StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
|
1202 |
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm);
|
1004 |
if (!tr.processUserString(m_text, m_reason, pqueries, db.getStopList()))
|
1203 |
if (!tr.processUserString(m_text, getModifiers(), m_reason, pqueries))
|
1005 |
return false;
|
1204 |
return false;
|
1006 |
if (pqueries.empty()) {
|
1205 |
if (pqueries.empty()) {
|
1007 |
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
1206 |
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
1008 |
return true;
|
1207 |
return true;
|
1009 |
}
|
1208 |
}
|
|
... |
|
... |
1022 |
//
|
1221 |
//
|
1023 |
// We do not split the entry any more (used to do some crazy thing
|
1222 |
// We do not split the entry any more (used to do some crazy thing
|
1024 |
// about expanding multiple fragments in the past. We just take the
|
1223 |
// about expanding multiple fragments in the past. We just take the
|
1025 |
// value blanks and all and expand this against the indexed unsplit
|
1224 |
// value blanks and all and expand this against the indexed unsplit
|
1026 |
// file names
|
1225 |
// file names
|
1027 |
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
|
1226 |
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
|
1028 |
const string&)
|
|
|
1029 |
{
|
1227 |
{
|
1030 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1228 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1031 |
*qp = Xapian::Query();
|
1229 |
*qp = Xapian::Query();
|
1032 |
|
1230 |
|
1033 |
vector<string> names;
|
1231 |
vector<string> names;
|
|
... |
|
... |
1039 |
}
|
1237 |
}
|
1040 |
return true;
|
1238 |
return true;
|
1041 |
}
|
1239 |
}
|
1042 |
|
1240 |
|
1043 |
// Translate NEAR or PHRASE clause.
|
1241 |
// Translate NEAR or PHRASE clause.
|
1044 |
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
1242 |
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
|
1045 |
const string& stemlang)
|
|
|
1046 |
{
|
1243 |
{
|
1047 |
const string& l_stemlang = (m_modifiers&SDCM_NOSTEMMING)? cstr_null:
|
|
|
1048 |
stemlang;
|
|
|
1049 |
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
1244 |
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
1050 |
|
1245 |
|
1051 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1246 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1052 |
*qp = Xapian::Query();
|
1247 |
*qp = Xapian::Query();
|
1053 |
|
1248 |
|
|
... |
|
... |
1067 |
if (m_text.find('\"') != string::npos) {
|
1262 |
if (m_text.find('\"') != string::npos) {
|
1068 |
m_text = neutchars(m_text, "\"");
|
1263 |
m_text = neutchars(m_text, "\"");
|
1069 |
}
|
1264 |
}
|
1070 |
string s = cstr_dquote + m_text + cstr_dquote;
|
1265 |
string s = cstr_dquote + m_text + cstr_dquote;
|
1071 |
bool useNear = (m_tp == SCLT_NEAR);
|
1266 |
bool useNear = (m_tp == SCLT_NEAR);
|
1072 |
StringToXapianQ tr(db, m_hldata, m_field, l_stemlang, doBoostUserTerm);
|
1267 |
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm);
|
1073 |
if (!tr.processUserString(s, m_reason, pqueries, db.getStopList(),
|
1268 |
if (!tr.processUserString(s, getModifiers(), m_reason, pqueries,
|
1074 |
m_slack, useNear))
|
1269 |
m_slack, useNear))
|
1075 |
return false;
|
1270 |
return false;
|
1076 |
if (pqueries.empty()) {
|
1271 |
if (pqueries.empty()) {
|
1077 |
LOGERR(("SearchDataClauseDist: resolved to null query\n"));
|
1272 |
LOGERR(("SearchDataClauseDist: resolved to null query\n"));
|
1078 |
return true;
|
1273 |
return true;
|