|
a/src/rcldb/searchdatatox.cpp |
|
b/src/rcldb/searchdatatox.cpp |
|
... |
|
... |
45 |
#include "synfamily.h"
|
45 |
#include "synfamily.h"
|
46 |
#include "stemdb.h"
|
46 |
#include "stemdb.h"
|
47 |
#include "expansiondbs.h"
|
47 |
#include "expansiondbs.h"
|
48 |
#include "base64.h"
|
48 |
#include "base64.h"
|
49 |
#include "daterange.h"
|
49 |
#include "daterange.h"
|
|
|
50 |
#include "rclvalues.h"
|
50 |
|
51 |
|
51 |
namespace Rcl {
|
52 |
namespace Rcl {
|
52 |
|
|
|
53 |
typedef vector<SearchDataClause *>::iterator qlist_it_t;
|
|
|
54 |
|
53 |
|
55 |
static const int original_term_wqf_booster = 10;
|
54 |
static const int original_term_wqf_booster = 10;
|
56 |
|
55 |
|
57 |
// Expand doc categories and mime type wild card expressions
|
56 |
// Expand doc categories and mime type wild card expressions
|
58 |
//
|
57 |
//
|
|
... |
|
... |
60 |
// against the index.
|
59 |
// against the index.
|
61 |
bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
|
60 |
bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
|
62 |
{
|
61 |
{
|
63 |
const RclConfig *cfg = db.getConf();
|
62 |
const RclConfig *cfg = db.getConf();
|
64 |
if (!cfg) {
|
63 |
if (!cfg) {
|
65 |
LOGFATAL("Db::expandFileTypes: null configuration!!\n" );
|
64 |
LOGFATAL("Db::expandFileTypes: null configuration!!\n");
|
66 |
return false;
|
65 |
return false;
|
67 |
}
|
66 |
}
|
68 |
vector<string> exptps;
|
67 |
vector<string> exptps;
|
69 |
|
68 |
|
70 |
for (vector<string>::iterator it = tps.begin(); it != tps.end(); it++) {
|
69 |
for (vector<string>::iterator it = tps.begin(); it != tps.end(); it++) {
|
|
... |
|
... |
108 |
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
|
107 |
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
|
109 |
vector<SearchDataClause*>& query,
|
108 |
vector<SearchDataClause*>& query,
|
110 |
string& reason, void *d)
|
109 |
string& reason, void *d)
|
111 |
{
|
110 |
{
|
112 |
Xapian::Query xq;
|
111 |
Xapian::Query xq;
|
113 |
for (qlist_it_t it = query.begin(); it != query.end(); it++) {
|
112 |
for (auto& clausep : query) {
|
114 |
Xapian::Query nq;
|
113 |
Xapian::Query nq;
|
115 |
if (!(*it)->toNativeQuery(db, &nq)) {
|
114 |
if (!clausep->toNativeQuery(db, &nq)) {
|
116 |
LOGERR("SearchData::clausesToQuery: toNativeQuery failed: " << ((*it)->getReason()) << "\n" );
|
115 |
LOGERR("SearchData::clausesToQuery: toNativeQuery failed: "
|
|
|
116 |
<< clausep->getReason() << "\n");
|
117 |
reason += (*it)->getReason() + " ";
|
117 |
reason += clausep->getReason() + " ";
|
118 |
return false;
|
118 |
return false;
|
119 |
}
|
119 |
}
|
120 |
if (nq.empty()) {
|
120 |
if (nq.empty()) {
|
121 |
LOGDEB("SearchData::clausesToQuery: skipping empty clause\n" );
|
121 |
LOGDEB("SearchData::clausesToQuery: skipping empty clause\n");
|
122 |
continue;
|
122 |
continue;
|
123 |
}
|
123 |
}
|
124 |
// If this structure is an AND list, must use AND_NOT for excl clauses.
|
124 |
// If this structure is an AND list, must use AND_NOT for excl clauses.
|
125 |
// Else this is an OR list, and there can't be excl clauses (checked by
|
125 |
// Else this is an OR list, and there can't be excl clauses (checked by
|
126 |
// addClause())
|
126 |
// addClause())
|
127 |
Xapian::Query::op op;
|
127 |
Xapian::Query::op op;
|
128 |
if (tp == SCLT_AND) {
|
128 |
if (tp == SCLT_AND) {
|
129 |
if ((*it)->getexclude()) {
|
129 |
if (clausep->getexclude()) {
|
130 |
op = Xapian::Query::OP_AND_NOT;
|
130 |
op = Xapian::Query::OP_AND_NOT;
|
131 |
} else {
|
131 |
} else {
|
132 |
op = Xapian::Query::OP_AND;
|
132 |
op = Xapian::Query::OP_AND;
|
133 |
}
|
133 |
}
|
134 |
} else {
|
134 |
} else {
|
|
... |
|
... |
141 |
xq = nq;
|
141 |
xq = nq;
|
142 |
} else {
|
142 |
} else {
|
143 |
xq = Xapian::Query(op, xq, nq);
|
143 |
xq = Xapian::Query(op, xq, nq);
|
144 |
}
|
144 |
}
|
145 |
if (int(xq.get_length()) >= getMaxCl()) {
|
145 |
if (int(xq.get_length()) >= getMaxCl()) {
|
146 |
LOGERR("" << (maxXapClauseMsg) << "\n" );
|
146 |
LOGERR("" << maxXapClauseMsg << "\n");
|
147 |
m_reason += maxXapClauseMsg;
|
147 |
m_reason += maxXapClauseMsg;
|
148 |
if (!o_index_stripchars)
|
148 |
if (!o_index_stripchars)
|
149 |
m_reason += maxXapClauseCaseDiacMsg;
|
149 |
m_reason += maxXapClauseCaseDiacMsg;
|
150 |
return false;
|
150 |
return false;
|
151 |
}
|
151 |
}
|
152 |
}
|
152 |
}
|
153 |
|
153 |
|
154 |
LOGDEB0("SearchData::clausesToQuery: got " << (xq.get_length()) << " clauses\n" );
|
154 |
LOGDEB0("SearchData::clausesToQuery: got " << xq.get_length()<<" clauses\n");
|
155 |
|
155 |
|
156 |
if (xq.empty())
|
156 |
if (xq.empty())
|
157 |
xq = Xapian::Query::MatchAll;
|
157 |
xq = Xapian::Query::MatchAll;
|
158 |
|
158 |
|
159 |
*((Xapian::Query *)d) = xq;
|
159 |
*((Xapian::Query *)d) = xq;
|
160 |
return true;
|
160 |
return true;
|
161 |
}
|
161 |
}
|
162 |
|
162 |
|
163 |
bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
163 |
bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
164 |
{
|
164 |
{
|
165 |
LOGDEB("SearchData::toNativeQuery: stemlang [" << (m_stemlang) << "]\n" );
|
165 |
LOGDEB("SearchData::toNativeQuery: stemlang [" << m_stemlang << "]\n");
|
166 |
m_reason.erase();
|
166 |
m_reason.erase();
|
167 |
|
167 |
|
168 |
db.getConf()->getConfParam("maxTermExpand", &m_maxexp);
|
168 |
db.getConf()->getConfParam("maxTermExpand", &m_maxexp);
|
169 |
db.getConf()->getConfParam("maxXapianClauses", &m_maxcl);
|
169 |
db.getConf()->getConfParam("maxXapianClauses", &m_maxcl);
|
170 |
db.getConf()->getConfParam("autocasesens", &m_autocasesens);
|
170 |
db.getConf()->getConfParam("autocasesens", &m_autocasesens);
|
|
... |
|
... |
172 |
|
172 |
|
173 |
// Walk the clause list translating each in turn and building the
|
173 |
// Walk the clause list translating each in turn and building the
|
174 |
// Xapian query tree
|
174 |
// Xapian query tree
|
175 |
Xapian::Query xq;
|
175 |
Xapian::Query xq;
|
176 |
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
|
176 |
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
|
177 |
LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: " << (m_reason) << "\n" );
|
177 |
LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: "
|
|
|
178 |
<< m_reason << "\n");
|
178 |
return false;
|
179 |
return false;
|
179 |
}
|
180 |
}
|
180 |
|
181 |
|
181 |
if (m_haveDates) {
|
182 |
if (m_haveDates) {
|
182 |
// If one of the extremities is unset, compute db extremas
|
183 |
// If one of the extremities is unset, compute db extremas
|
183 |
if (m_dates.y1 == 0 || m_dates.y2 == 0) {
|
184 |
if (m_dates.y1 == 0 || m_dates.y2 == 0) {
|
184 |
int minyear = 1970, maxyear = 2100;
|
185 |
int minyear = 1970, maxyear = 2100;
|
185 |
if (!db.maxYearSpan(&minyear, &maxyear)) {
|
186 |
if (!db.maxYearSpan(&minyear, &maxyear)) {
|
186 |
LOGERR("Can't retrieve index min/max dates\n" );
|
187 |
LOGERR("Can't retrieve index min/max dates\n");
|
187 |
//whatever, go on.
|
188 |
//whatever, go on.
|
188 |
}
|
189 |
}
|
189 |
|
190 |
|
190 |
if (m_dates.y1 == 0) {
|
191 |
if (m_dates.y1 == 0) {
|
191 |
m_dates.y1 = minyear;
|
192 |
m_dates.y1 = minyear;
|
|
... |
|
... |
196 |
m_dates.y2 = maxyear;
|
197 |
m_dates.y2 = maxyear;
|
197 |
m_dates.m2 = 12;
|
198 |
m_dates.m2 = 12;
|
198 |
m_dates.d2 = 31;
|
199 |
m_dates.d2 = 31;
|
199 |
}
|
200 |
}
|
200 |
}
|
201 |
}
|
201 |
LOGDEB("Db::toNativeQuery: date interval: " << (m_dates.y1) << "-" << (m_dates.m1) << "-" << (m_dates.d1) << "/" << (m_dates.y2) << "-" << (m_dates.m2) << "-" << (m_dates.d2) << "\n" );
|
202 |
LOGDEB("Db::toNativeQuery: date interval: " << m_dates.y1 <<
|
|
|
203 |
"-" << m_dates.m1 << "-" << m_dates.d1 << "/" <<
|
|
|
204 |
m_dates.y2 << "-" << m_dates.m2 << "-" << m_dates.d2 << "\n");
|
202 |
Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1,
|
205 |
Xapian::Query dq = date_range_filter(m_dates.y1, m_dates.m1, m_dates.d1,
|
203 |
m_dates.y2, m_dates.m2, m_dates.d2);
|
206 |
m_dates.y2, m_dates.m2, m_dates.d2);
|
204 |
if (dq.empty()) {
|
207 |
if (dq.empty()) {
|
205 |
LOGINFO("Db::toNativeQuery: date filter is empty\n" );
|
208 |
LOGINFO("Db::toNativeQuery: date filter is empty\n");
|
206 |
}
|
209 |
}
|
207 |
// If no probabilistic query is provided then promote the daterange
|
210 |
// If no probabilistic query is provided then promote the daterange
|
208 |
// filter to be THE query instead of filtering an empty query.
|
211 |
// filter to be THE query instead of filtering an empty query.
|
209 |
if (xq.empty()) {
|
212 |
if (xq.empty()) {
|
210 |
LOGINFO("Db::toNativeQuery: proba query is empty\n" );
|
213 |
LOGINFO("Db::toNativeQuery: proba query is empty\n");
|
211 |
xq = dq;
|
214 |
xq = dq;
|
212 |
} else {
|
215 |
} else {
|
213 |
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);
|
216 |
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);
|
214 |
}
|
217 |
}
|
215 |
}
|
218 |
}
|
|
... |
|
... |
237 |
}
|
240 |
}
|
238 |
|
241 |
|
239 |
// If no probabilistic query is provided then promote the
|
242 |
// If no probabilistic query is provided then promote the
|
240 |
// filter to be THE query instead of filtering an empty query.
|
243 |
// filter to be THE query instead of filtering an empty query.
|
241 |
if (xq.empty()) {
|
244 |
if (xq.empty()) {
|
242 |
LOGINFO("Db::toNativeQuery: proba query is empty\n" );
|
245 |
LOGINFO("Db::toNativeQuery: proba query is empty\n");
|
243 |
xq = sq;
|
246 |
xq = sq;
|
244 |
} else {
|
247 |
} else {
|
245 |
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
|
248 |
xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
|
246 |
}
|
249 |
}
|
247 |
}
|
250 |
}
|
|
... |
|
... |
261 |
|
264 |
|
262 |
Xapian::Query tq;
|
265 |
Xapian::Query tq;
|
263 |
for (vector<string>::iterator it = m_filetypes.begin();
|
266 |
for (vector<string>::iterator it = m_filetypes.begin();
|
264 |
it != m_filetypes.end(); it++) {
|
267 |
it != m_filetypes.end(); it++) {
|
265 |
string term = wrap_prefix(mimetype_prefix) + *it;
|
268 |
string term = wrap_prefix(mimetype_prefix) + *it;
|
266 |
LOGDEB0("Adding file type term: [" << (term) << "]\n" );
|
269 |
LOGDEB0("Adding file type term: [" << term << "]\n");
|
267 |
tq = tq.empty() ? Xapian::Query(term) :
|
270 |
tq = tq.empty() ? Xapian::Query(term) :
|
268 |
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
|
271 |
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
|
269 |
}
|
272 |
}
|
270 |
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
|
273 |
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
|
271 |
}
|
274 |
}
|
|
... |
|
... |
276 |
|
279 |
|
277 |
Xapian::Query tq;
|
280 |
Xapian::Query tq;
|
278 |
for (vector<string>::iterator it = m_nfiletypes.begin();
|
281 |
for (vector<string>::iterator it = m_nfiletypes.begin();
|
279 |
it != m_nfiletypes.end(); it++) {
|
282 |
it != m_nfiletypes.end(); it++) {
|
280 |
string term = wrap_prefix(mimetype_prefix) + *it;
|
283 |
string term = wrap_prefix(mimetype_prefix) + *it;
|
281 |
LOGDEB0("Adding negative file type term: [" << (term) << "]\n" );
|
284 |
LOGDEB0("Adding negative file type term: [" << term << "]\n");
|
282 |
tq = tq.empty() ? Xapian::Query(term) :
|
285 |
tq = tq.empty() ? Xapian::Query(term) :
|
283 |
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
|
286 |
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
|
284 |
}
|
287 |
}
|
285 |
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);
|
288 |
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);
|
286 |
}
|
289 |
}
|
|
... |
|
... |
331 |
bool takeword(const std::string &term, int pos, int bs, int be) {
|
334 |
bool takeword(const std::string &term, int pos, int bs, int be) {
|
332 |
m_alltermcount++;
|
335 |
m_alltermcount++;
|
333 |
if (m_lastpos < pos)
|
336 |
if (m_lastpos < pos)
|
334 |
m_lastpos = pos;
|
337 |
m_lastpos = pos;
|
335 |
bool noexpand = be ? m_ts->nostemexp() : true;
|
338 |
bool noexpand = be ? m_ts->nostemexp() : true;
|
336 |
LOGDEB1("TermProcQ::takeword: pushing [" << (term) << "] pos " << (pos) << " noexp " << (noexpand) << "\n" );
|
339 |
LOGDEB1("TermProcQ::takeword: pushing [" << term << "] pos " <<
|
|
|
340 |
pos << " noexp " << noexpand << "\n");
|
337 |
if (m_terms[pos].size() < term.size()) {
|
341 |
if (m_terms[pos].size() < term.size()) {
|
338 |
m_terms[pos] = term;
|
342 |
m_terms[pos] = term;
|
339 |
m_nste[pos] = noexpand;
|
343 |
m_nste[pos] = noexpand;
|
340 |
}
|
344 |
}
|
341 |
return true;
|
345 |
return true;
|
|
... |
|
... |
575 |
for (vector<string>::iterator it = v.begin(); it != v.end(); it++) {
|
579 |
for (vector<string>::iterator it = v.begin(); it != v.end(); it++) {
|
576 |
*it = prefix + *it;
|
580 |
*it = prefix + *it;
|
577 |
}
|
581 |
}
|
578 |
}
|
582 |
}
|
579 |
|
583 |
|
580 |
void SearchDataClauseSimple::
|
584 |
void SearchDataClauseSimple::processSimpleSpan(
|
581 |
processSimpleSpan(Rcl::Db &db, string& ermsg,
|
585 |
Rcl::Db &db, string& ermsg, const string& span, int mods, void *pq)
|
582 |
const string& span,
|
|
|
583 |
int mods, void * pq)
|
|
|
584 |
{
|
586 |
{
|
585 |
vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);
|
587 |
vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);
|
586 |
LOGDEB0("StringToXapianQ::processSimpleSpan: [" << (span) << "] mods 0x" << ((unsigned int)mods) << "\n" );
|
588 |
LOGDEB0("StringToXapianQ::processSimpleSpan: [" << span << "] mods 0x"
|
|
|
589 |
<< (unsigned int)mods << "\n");
|
587 |
vector<string> exp;
|
590 |
vector<string> exp;
|
588 |
string sterm; // dumb version of user term
|
591 |
string sterm; // dumb version of user term
|
589 |
|
592 |
|
590 |
string prefix;
|
593 |
string prefix;
|
591 |
const FieldTraits *ftp;
|
594 |
const FieldTraits *ftp;
|
|
... |
|
... |
677 |
|
680 |
|
678 |
// Go through the list and perform stem/wildcard expansion for each element
|
681 |
// Go through the list and perform stem/wildcard expansion for each element
|
679 |
vector<bool>::const_iterator nxit = splitData->nostemexps().begin();
|
682 |
vector<bool>::const_iterator nxit = splitData->nostemexps().begin();
|
680 |
for (vector<string>::const_iterator it = splitData->terms().begin();
|
683 |
for (vector<string>::const_iterator it = splitData->terms().begin();
|
681 |
it != splitData->terms().end(); it++, nxit++) {
|
684 |
it != splitData->terms().end(); it++, nxit++) {
|
682 |
LOGDEB0("ProcessPhrase: processing [" << *it << "]\n" );
|
685 |
LOGDEB0("ProcessPhrase: processing [" << *it << "]\n");
|
683 |
// Adjust when we do stem expansion. Not if disabled by
|
686 |
// Adjust when we do stem expansion. Not if disabled by
|
684 |
// caller, not inside phrases, and some versions of xapian
|
687 |
// caller, not inside phrases, and some versions of xapian
|
685 |
// will accept only one OR clause inside NEAR.
|
688 |
// will accept only one OR clause inside NEAR.
|
686 |
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE)
|
689 |
bool nostemexp = *nxit || (op == Xapian::Query::OP_PHRASE)
|
687 |
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
690 |
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
|
... |
|
... |
693 |
lmods |= SearchDataClause::SDCM_NOSTEMMING;
|
696 |
lmods |= SearchDataClause::SDCM_NOSTEMMING;
|
694 |
string sterm;
|
697 |
string sterm;
|
695 |
vector<string> exp;
|
698 |
vector<string> exp;
|
696 |
if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix))
|
699 |
if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix))
|
697 |
return;
|
700 |
return;
|
698 |
LOGDEB0("ProcessPhraseOrNear: exp size " << (exp.size()) << ", exp: " << (stringsToString(exp)) << "\n" );
|
701 |
LOGDEB0("ProcessPhraseOrNear: exp size " << exp.size() << ", exp: " <<
|
|
|
702 |
stringsToString(exp) << "\n");
|
699 |
// groups is used for highlighting, we don't want prefixes in there.
|
703 |
// groups is used for highlighting, we don't want prefixes in there.
|
700 |
vector<string> noprefs;
|
704 |
vector<string> noprefs;
|
701 |
for (vector<string>::const_iterator it = exp.begin();
|
705 |
for (vector<string>::const_iterator it = exp.begin();
|
702 |
it != exp.end(); it++) {
|
706 |
it != exp.end(); it++) {
|
703 |
noprefs.push_back(it->substr(prefix.size()));
|
707 |
noprefs.push_back(it->substr(prefix.size()));
|
|
... |
|
... |
719 |
slack++;
|
723 |
slack++;
|
720 |
}
|
724 |
}
|
721 |
|
725 |
|
722 |
// Generate an appropriate PHRASE/NEAR query with adjusted slack
|
726 |
// Generate an appropriate PHRASE/NEAR query with adjusted slack
|
723 |
// For phrases, give a relevance boost like we do for original terms
|
727 |
// For phrases, give a relevance boost like we do for original terms
|
724 |
LOGDEB2("PHRASE/NEAR: alltermcount " << (splitData->alltermcount()) << " lastpos " << (splitData->lastpos()) << "\n" );
|
728 |
LOGDEB2("PHRASE/NEAR: alltermcount " << splitData->alltermcount() <<
|
|
|
729 |
" lastpos " << splitData->lastpos() << "\n");
|
725 |
Xapian::Query xq(op, orqueries.begin(), orqueries.end(),
|
730 |
Xapian::Query xq(op, orqueries.begin(), orqueries.end(),
|
726 |
splitData->lastpos() + 1 + slack);
|
731 |
splitData->lastpos() + 1 + slack);
|
727 |
if (op == Xapian::Query::OP_PHRASE)
|
732 |
if (op == Xapian::Query::OP_PHRASE)
|
728 |
xq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, xq,
|
733 |
xq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, xq,
|
729 |
original_term_wqf_booster);
|
734 |
original_term_wqf_booster);
|
|
... |
|
... |
789 |
int slack, bool useNear)
|
794 |
int slack, bool useNear)
|
790 |
{
|
795 |
{
|
791 |
vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
|
796 |
vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
|
792 |
int mods = m_modifiers;
|
797 |
int mods = m_modifiers;
|
793 |
|
798 |
|
794 |
LOGDEB("StringToXapianQ:pUS:: qstr [" << (iq) << "] fld [" << (m_field) << "] mods 0x" << (mods) << " slack " << (slack) << " near " << (useNear) << "\n" );
|
799 |
LOGDEB("StringToXapianQ:pUS:: qstr [" << iq << "] fld [" << m_field <<
|
|
|
800 |
"] mods 0x"<<mods<<" slack " << slack << " near " << useNear <<"\n");
|
795 |
ermsg.erase();
|
801 |
ermsg.erase();
|
796 |
m_curcl = 0;
|
802 |
m_curcl = 0;
|
797 |
const StopList stops = db.getStopList();
|
803 |
const StopList stops = db.getStopList();
|
798 |
|
804 |
|
799 |
// Simple whitespace-split input into user-level words and
|
805 |
// Simple whitespace-split input into user-level words and
|
|
... |
|
... |
809 |
// Process each element: textsplit into terms, handle stem/wildcard
|
815 |
// Process each element: textsplit into terms, handle stem/wildcard
|
810 |
// expansion and transform into an appropriate Xapian::Query
|
816 |
// expansion and transform into an appropriate Xapian::Query
|
811 |
try {
|
817 |
try {
|
812 |
for (vector<string>::iterator it = phrases.begin();
|
818 |
for (vector<string>::iterator it = phrases.begin();
|
813 |
it != phrases.end(); it++) {
|
819 |
it != phrases.end(); it++) {
|
814 |
LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n" );
|
820 |
LOGDEB0("strToXapianQ: phrase/word: [" << *it << "]\n");
|
815 |
// Anchoring modifiers
|
821 |
// Anchoring modifiers
|
816 |
int amods = stringToMods(*it);
|
822 |
int amods = stringToMods(*it);
|
817 |
int terminc = amods != 0 ? 1 : 0;
|
823 |
int terminc = amods != 0 ? 1 : 0;
|
818 |
mods |= amods;
|
824 |
mods |= amods;
|
819 |
// If there are multiple spans in this element, including
|
825 |
// If there are multiple spans in this element, including
|
|
... |
|
... |
847 |
tpq.setTSQ(&splitter);
|
853 |
tpq.setTSQ(&splitter);
|
848 |
splitter.text_to_words(*it);
|
854 |
splitter.text_to_words(*it);
|
849 |
|
855 |
|
850 |
slack += tpq.lastpos() - int(tpq.terms().size()) + 1;
|
856 |
slack += tpq.lastpos() - int(tpq.terms().size()) + 1;
|
851 |
|
857 |
|
852 |
LOGDEB0("strToXapianQ: termcount: " << (tpq.terms().size()) << "\n" );
|
858 |
LOGDEB0("strToXapianQ: termcount: " << tpq.terms().size() << "\n");
|
853 |
switch (tpq.terms().size() + terminc) {
|
859 |
switch (tpq.terms().size() + terminc) {
|
854 |
case 0:
|
860 |
case 0:
|
855 |
continue;// ??
|
861 |
continue;// ??
|
856 |
case 1: {
|
862 |
case 1: {
|
857 |
int lmods = mods;
|
863 |
int lmods = mods;
|
|
... |
|
... |
882 |
ermsg = s;
|
888 |
ermsg = s;
|
883 |
} catch (...) {
|
889 |
} catch (...) {
|
884 |
ermsg = "Caught unknown exception";
|
890 |
ermsg = "Caught unknown exception";
|
885 |
}
|
891 |
}
|
886 |
if (!ermsg.empty()) {
|
892 |
if (!ermsg.empty()) {
|
887 |
LOGERR("stringToXapianQueries: " << (ermsg) << "\n" );
|
893 |
LOGERR("stringToXapianQueries: " << ermsg << "\n");
|
888 |
return false;
|
894 |
return false;
|
889 |
}
|
895 |
}
|
890 |
return true;
|
896 |
return true;
|
891 |
}
|
897 |
}
|
892 |
|
898 |
|
893 |
// Translate a simple OR or AND search clause.
|
899 |
// Translate a simple OR or AND search clause.
|
894 |
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
|
900 |
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
|
895 |
{
|
901 |
{
|
896 |
LOGDEB("SearchDataClauseSimple::toNativeQuery: fld [" << (m_field) << "] val [" << (m_text) << "] stemlang [" << (getStemLang()) << "]\n" );
|
902 |
LOGDEB("SearchDataClauseSimple::toNativeQuery: fld [" << m_field <<
|
|
|
903 |
"] val [" << m_text << "] stemlang [" << getStemLang() << "]\n");
|
897 |
|
904 |
|
|
|
905 |
// Transform (in)equalities into a range query
|
|
|
906 |
switch (getrel()) {
|
|
|
907 |
case REL_EQUALS:
|
|
|
908 |
{
|
|
|
909 |
SearchDataClauseRange cl(*this, gettext(), gettext());
|
|
|
910 |
bool ret = cl.toNativeQuery(db, p);
|
|
|
911 |
m_reason = cl.getReason();
|
|
|
912 |
return ret;
|
|
|
913 |
}
|
|
|
914 |
case REL_LT: case REL_LTE:
|
|
|
915 |
{
|
|
|
916 |
SearchDataClauseRange cl(*this, "", gettext());
|
|
|
917 |
bool ret = cl.toNativeQuery(db, p);
|
|
|
918 |
m_reason = cl.getReason();
|
|
|
919 |
return ret;
|
|
|
920 |
}
|
|
|
921 |
case REL_GT: case REL_GTE:
|
|
|
922 |
{
|
|
|
923 |
SearchDataClauseRange cl(*this, gettext(), "");
|
|
|
924 |
bool ret = cl.toNativeQuery(db, p);
|
|
|
925 |
m_reason = cl.getReason();
|
|
|
926 |
return ret;
|
|
|
927 |
}
|
|
|
928 |
default:
|
|
|
929 |
break;
|
|
|
930 |
}
|
|
|
931 |
|
898 |
Xapian::Query *qp = (Xapian::Query *)p;
|
932 |
Xapian::Query *qp = (Xapian::Query *)p;
|
899 |
*qp = Xapian::Query();
|
933 |
*qp = Xapian::Query();
|
900 |
|
934 |
|
901 |
Xapian::Query::op op;
|
935 |
Xapian::Query::op op;
|
902 |
switch (m_tp) {
|
936 |
switch (m_tp) {
|
903 |
case SCLT_AND: op = Xapian::Query::OP_AND; break;
|
937 |
case SCLT_AND: op = Xapian::Query::OP_AND; break;
|
904 |
case SCLT_OR: op = Xapian::Query::OP_OR; break;
|
938 |
case SCLT_OR: op = Xapian::Query::OP_OR; break;
|
905 |
default:
|
939 |
default:
|
906 |
LOGERR("SearchDataClauseSimple: bad m_tp " << (m_tp) << "\n" );
|
940 |
LOGERR("SearchDataClauseSimple: bad m_tp " << m_tp << "\n");
|
907 |
m_reason = "Internal error";
|
941 |
m_reason = "Internal error";
|
908 |
return false;
|
942 |
return false;
|
909 |
}
|
943 |
}
|
910 |
|
944 |
|
911 |
vector<Xapian::Query> pqueries;
|
945 |
vector<Xapian::Query> pqueries;
|
912 |
if (!processUserString(db, m_text, m_reason, &pqueries))
|
946 |
if (!processUserString(db, m_text, m_reason, &pqueries))
|
913 |
return false;
|
947 |
return false;
|
914 |
if (pqueries.empty()) {
|
948 |
if (pqueries.empty()) {
|
915 |
LOGERR("SearchDataClauseSimple: resolved to null query\n" );
|
949 |
LOGERR("SearchDataClauseSimple: resolved to null query\n");
|
916 |
m_reason = string("Resolved to null query. Term too long ? : [" +
|
950 |
m_reason = string("Resolved to null query. Term too long ? : [" +
|
917 |
m_text + string("]"));
|
951 |
m_text + string("]"));
|
918 |
return false;
|
952 |
return false;
|
919 |
}
|
953 |
}
|
920 |
|
954 |
|
921 |
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
|
955 |
*qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
|
922 |
if (m_weight != 1.0) {
|
956 |
if (m_weight != 1.0) {
|
923 |
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
957 |
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
|
|
958 |
}
|
|
|
959 |
return true;
|
|
|
960 |
}
|
|
|
961 |
|
|
|
962 |
// Translate a range clause. This only works if a Xapian value slot
|
|
|
963 |
// was attributed to the field.
|
|
|
964 |
bool SearchDataClauseRange::toNativeQuery(Rcl::Db &db, void *p)
|
|
|
965 |
{
|
|
|
966 |
LOGDEB("SearchDataClauseRange::toNativeQuery: " << m_field <<
|
|
|
967 |
" :[" << m_text << ".." << m_t2 << "]\n");
|
|
|
968 |
Xapian::Query *qp = (Xapian::Query *)p;
|
|
|
969 |
*qp = Xapian::Query();
|
|
|
970 |
|
|
|
971 |
if (m_field.empty() || (m_text.empty() && m_t2.empty())) {
|
|
|
972 |
m_reason = "Range clause needs a field and a value";
|
|
|
973 |
return false;
|
|
|
974 |
}
|
|
|
975 |
|
|
|
976 |
// Get the value number for the field from the configuration
|
|
|
977 |
const FieldTraits *ftp;
|
|
|
978 |
if (!db.fieldToTraits(m_field, &ftp, true)) {
|
|
|
979 |
m_reason = string("field ") + m_field + " not found in configuration";
|
|
|
980 |
return false;
|
|
|
981 |
}
|
|
|
982 |
if (ftp->valueslot == 0) {
|
|
|
983 |
m_reason = string("No value slot specified in configuration for field ")
|
|
|
984 |
+ m_field;
|
|
|
985 |
return false;
|
|
|
986 |
}
|
|
|
987 |
LOGDEB("SearchDataClauseRange: value slot " << ftp->valueslot << endl);
|
|
|
988 |
// Build Xapian VALUE query.
|
|
|
989 |
string errstr;
|
|
|
990 |
try {
|
|
|
991 |
if (m_text.empty()) {
|
|
|
992 |
*qp = Xapian::Query(Xapian::Query::OP_VALUE_LE,
|
|
|
993 |
ftp->valueslot, convert_field_value(*ftp, m_t2));
|
|
|
994 |
} else if (m_t2.empty()) {
|
|
|
995 |
*qp = Xapian::Query(Xapian::Query::OP_VALUE_GE, ftp->valueslot,
|
|
|
996 |
convert_field_value(*ftp, m_text));
|
|
|
997 |
} else {
|
|
|
998 |
*qp = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, ftp->valueslot,
|
|
|
999 |
convert_field_value(*ftp, m_text),
|
|
|
1000 |
convert_field_value(*ftp, m_t2));
|
|
|
1001 |
}
|
|
|
1002 |
}
|
|
|
1003 |
XCATCHERROR(errstr);
|
|
|
1004 |
if (!errstr.empty()) {
|
|
|
1005 |
LOGERR("SearchDataClauseRange: range query creation failed for slot "<<
|
|
|
1006 |
ftp->valueslot << endl);
|
|
|
1007 |
m_reason = "Range query creation failed\n";
|
|
|
1008 |
*qp = Xapian::Query();
|
|
|
1009 |
return false;
|
924 |
}
|
1010 |
}
|
925 |
return true;
|
1011 |
return true;
|
926 |
}
|
1012 |
}
|
927 |
|
1013 |
|
928 |
// Translate a FILENAME search clause. This always comes
|
1014 |
// Translate a FILENAME search clause. This always comes
|
|
... |
|
... |
1016 |
}
|
1102 |
}
|
1017 |
|
1103 |
|
1018 |
// Translate NEAR or PHRASE clause.
|
1104 |
// Translate NEAR or PHRASE clause.
|
1019 |
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
|
1105 |
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
|
1020 |
{
|
1106 |
{
|
1021 |
LOGDEB("SearchDataClauseDist::toNativeQuery\n" );
|
1107 |
LOGDEB("SearchDataClauseDist::toNativeQuery\n");
|
1022 |
|
1108 |
|
1023 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1109 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1024 |
*qp = Xapian::Query();
|
1110 |
*qp = Xapian::Query();
|
1025 |
|
1111 |
|
1026 |
vector<Xapian::Query> pqueries;
|
1112 |
vector<Xapian::Query> pqueries;
|
|
... |
|
... |
1035 |
string s = cstr_dquote + m_text + cstr_dquote;
|
1121 |
string s = cstr_dquote + m_text + cstr_dquote;
|
1036 |
bool useNear = (m_tp == SCLT_NEAR);
|
1122 |
bool useNear = (m_tp == SCLT_NEAR);
|
1037 |
if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear))
|
1123 |
if (!processUserString(db, s, m_reason, &pqueries, m_slack, useNear))
|
1038 |
return false;
|
1124 |
return false;
|
1039 |
if (pqueries.empty()) {
|
1125 |
if (pqueries.empty()) {
|
1040 |
LOGERR("SearchDataClauseDist: resolved to null query\n" );
|
1126 |
LOGERR("SearchDataClauseDist: resolved to null query\n");
|
1041 |
m_reason = string("Resolved to null query. Term too long ? : [" +
|
1127 |
m_reason = string("Resolved to null query. Term too long ? : [" +
|
1042 |
m_text + string("]"));
|
1128 |
m_text + string("]"));
|
1043 |
return false;
|
1129 |
return false;
|
1044 |
}
|
1130 |
}
|
1045 |
|
1131 |
|