|
a/src/query/wasatorcl.cpp |
|
b/src/query/wasatorcl.cpp |
|
... |
|
... |
33 |
#include "smallut.h"
|
33 |
#include "smallut.h"
|
34 |
#include "rclconfig.h"
|
34 |
#include "rclconfig.h"
|
35 |
#include "refcntr.h"
|
35 |
#include "refcntr.h"
|
36 |
#include "textsplit.h"
|
36 |
#include "textsplit.h"
|
37 |
|
37 |
|
38 |
Rcl::SearchData *wasaStringToRcl(RclConfig *config,
|
38 |
static Rcl::SearchData *wasaQueryToRcl(RclConfig *config, WasaQuery *wasa,
|
39 |
const string &qs, string &reason,
|
39 |
const string& autosuffs, string& reason)
|
40 |
const string& autosuffs)
|
|
|
41 |
{
|
40 |
{
|
42 |
StringToWasaQuery parser;
|
|
|
43 |
WasaQuery *wq = parser.stringToQuery(qs, reason);
|
|
|
44 |
if (wq == 0)
|
41 |
if (wasa == 0) {
|
45 |
return 0;
|
42 |
reason = "NULL query";
|
46 |
Rcl::SearchData *rq = wasaQueryToRcl(config, wq, autosuffs);
|
|
|
47 |
if (rq == 0) {
|
|
|
48 |
reason = "Failed translating xesam query structure to recoll";
|
|
|
49 |
return 0;
|
43 |
return 0;
|
50 |
}
|
44 |
}
|
51 |
return rq;
|
|
|
52 |
}
|
|
|
53 |
|
|
|
54 |
Rcl::SearchData *wasaQueryToRcl(RclConfig *config,
|
|
|
55 |
WasaQuery *wasa, const string& autosuffs)
|
|
|
56 |
{
|
|
|
57 |
if (wasa == 0)
|
|
|
58 |
return 0;
|
|
|
59 |
if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) {
|
45 |
if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) {
|
|
|
46 |
reason = "Top query neither AND nor OR ?";
|
60 |
LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n"));
|
47 |
LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n"));
|
61 |
return 0;
|
48 |
return 0;
|
62 |
}
|
49 |
}
|
63 |
|
50 |
|
64 |
Rcl::SearchData *sdata = new
|
51 |
Rcl::SearchData *sdata = new
|
|
... |
|
... |
68 |
"AND" : "OR"));
|
55 |
"AND" : "OR"));
|
69 |
|
56 |
|
70 |
WasaQuery::subqlist_t::iterator it;
|
57 |
WasaQuery::subqlist_t::iterator it;
|
71 |
Rcl::SearchDataClause *nclause;
|
58 |
Rcl::SearchDataClause *nclause;
|
72 |
|
59 |
|
|
|
60 |
// Walk the list of clauses. Some pseudo-field types need special
|
|
|
61 |
// processing, which results in setting data in the top struct
|
|
|
62 |
// instead of adding a clause. We check for these first
|
73 |
for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) {
|
63 |
for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) {
|
|
|
64 |
|
|
|
65 |
if (!stringicmp("mime", (*it)->m_fieldspec) ||
|
|
|
66 |
!stringicmp("format", (*it)->m_fieldspec)) {
|
|
|
67 |
if ((*it)->m_op != WasaQuery::OP_LEAF) {
|
|
|
68 |
reason = "Negative mime/format clauses not supported yet";
|
|
|
69 |
return 0;
|
|
|
70 |
}
|
|
|
71 |
sdata->addFiletype((*it)->m_value);
|
|
|
72 |
continue;
|
|
|
73 |
}
|
|
|
74 |
|
|
|
75 |
// Xesam uses "type", we also support "rclcat", for broad
|
|
|
76 |
// categories like "audio", "presentation", etc.
|
|
|
77 |
if (!stringicmp("rclcat", (*it)->m_fieldspec) ||
|
|
|
78 |
!stringicmp("type", (*it)->m_fieldspec)) {
|
|
|
79 |
if ((*it)->m_op != WasaQuery::OP_LEAF) {
|
|
|
80 |
reason = "Negative rclcat/type clauses not supported yet";
|
|
|
81 |
return 0;
|
|
|
82 |
}
|
|
|
83 |
list<string> mtypes;
|
|
|
84 |
if (config && config->getMimeCatTypes((*it)->m_value, mtypes)
|
|
|
85 |
&& !mtypes.empty()) {
|
|
|
86 |
for (list<string>::iterator mit = mtypes.begin();
|
|
|
87 |
mit != mtypes.end(); mit++) {
|
|
|
88 |
sdata->addFiletype(*mit);
|
|
|
89 |
}
|
|
|
90 |
} else {
|
|
|
91 |
reason = "Unknown rclcat/type value: no mime types found";
|
|
|
92 |
return 0;
|
|
|
93 |
}
|
|
|
94 |
continue;
|
|
|
95 |
}
|
|
|
96 |
|
|
|
97 |
// Filtering on location
|
|
|
98 |
if (!stringicmp("dir", (*it)->m_fieldspec)) {
|
|
|
99 |
sdata->setTopdir((*it)->m_value, (*it)->m_op == WasaQuery::OP_EXCL);
|
|
|
100 |
continue;
|
|
|
101 |
}
|
|
|
102 |
|
|
|
103 |
// Handle "date" spec
|
|
|
104 |
if (!stringicmp("date", (*it)->m_fieldspec)) {
|
|
|
105 |
if ((*it)->m_op != WasaQuery::OP_LEAF) {
|
|
|
106 |
reason = "Negative date filtering not supported";
|
|
|
107 |
return 0;
|
|
|
108 |
}
|
|
|
109 |
DateInterval di;
|
|
|
110 |
if (!parsedateinterval((*it)->m_value, &di)) {
|
|
|
111 |
LOGERR(("wasaQueryToRcl: bad date interval format\n"));
|
|
|
112 |
reason = "Bad date interval format";
|
|
|
113 |
return 0;
|
|
|
114 |
}
|
|
|
115 |
LOGDEB(("wasaQueryToRcl:: date span: %d-%d-%d/%d-%d-%d\n",
|
|
|
116 |
di.y1,di.m1,di.d1, di.y2,di.m2,di.d2));
|
|
|
117 |
sdata->setDateSpan(&di);
|
|
|
118 |
continue;
|
|
|
119 |
}
|
|
|
120 |
|
|
|
121 |
// "Regular" processing follows:
|
74 |
switch ((*it)->m_op) {
|
122 |
switch ((*it)->m_op) {
|
75 |
case WasaQuery::OP_NULL:
|
123 |
case WasaQuery::OP_NULL:
|
76 |
case WasaQuery::OP_AND:
|
124 |
case WasaQuery::OP_AND:
|
77 |
default:
|
125 |
default:
|
|
|
126 |
reason = "Found bad NULL or AND query type in list";
|
78 |
LOGINFO(("wasaQueryToRcl: found bad NULL or AND q type in list\n"));
|
127 |
LOGERR(("wasaQueryToRcl: found bad NULL or AND q type in list\n"));
|
79 |
continue;
|
128 |
continue;
|
|
|
129 |
|
80 |
case WasaQuery::OP_LEAF: {
|
130 |
case WasaQuery::OP_LEAF: {
|
81 |
LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n",
|
131 |
LOGDEB2(("wasaQueryToRcl: leaf clause [%s]:[%s]\n",
|
82 |
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
132 |
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
83 |
|
|
|
84 |
// Special cases (mime, category, dir filter ...). Not pretty.
|
|
|
85 |
|
|
|
86 |
if (!stringicmp("mime", (*it)->m_fieldspec) ||
|
|
|
87 |
!stringicmp("format", (*it)->m_fieldspec)
|
|
|
88 |
) {
|
|
|
89 |
sdata->addFiletype((*it)->m_value);
|
|
|
90 |
break;
|
|
|
91 |
}
|
|
|
92 |
|
|
|
93 |
// Xesam uses "type", we also support "rclcat", for broad
|
|
|
94 |
// categories like "audio", "presentation", etc.
|
|
|
95 |
if (!stringicmp("rclcat", (*it)->m_fieldspec) ||
|
|
|
96 |
!stringicmp("type", (*it)->m_fieldspec)) {
|
|
|
97 |
list<string> mtypes;
|
|
|
98 |
if (config && config->getMimeCatTypes((*it)->m_value, mtypes)) {
|
|
|
99 |
for (list<string>::iterator mit = mtypes.begin();
|
|
|
100 |
mit != mtypes.end(); mit++) {
|
|
|
101 |
sdata->addFiletype(*mit);
|
|
|
102 |
}
|
|
|
103 |
}
|
|
|
104 |
break;
|
|
|
105 |
}
|
|
|
106 |
if (!stringicmp("dir", (*it)->m_fieldspec)) {
|
|
|
107 |
sdata->setTopdir((*it)->m_value);
|
|
|
108 |
break;
|
|
|
109 |
}
|
|
|
110 |
|
133 |
|
111 |
// Change terms found in the "autosuffs" list into "ext"
|
134 |
// Change terms found in the "autosuffs" list into "ext"
|
112 |
// field queries
|
135 |
// field queries
|
113 |
if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
|
136 |
if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
|
114 |
vector<string> asfv;
|
137 |
vector<string> asfv;
|
|
... |
|
... |
119 |
(*it)->m_modifiers |= WasaQuery::WQM_NOSTEM;
|
142 |
(*it)->m_modifiers |= WasaQuery::WQM_NOSTEM;
|
120 |
}
|
143 |
}
|
121 |
}
|
144 |
}
|
122 |
}
|
145 |
}
|
123 |
|
146 |
|
124 |
// Handle "date" spec
|
|
|
125 |
if (!stringicmp("date", (*it)->m_fieldspec)) {
|
|
|
126 |
DateInterval di;
|
|
|
127 |
if (!parsedateinterval((*it)->m_value, &di)) {
|
|
|
128 |
LOGERR(("wasaQueryToRcl: bad date interval format\n"));
|
|
|
129 |
// Process rest of query anyway ?
|
|
|
130 |
break;
|
|
|
131 |
}
|
|
|
132 |
LOGDEB(("wasaQueryToRcl:: date span: %d-%d-%d/%d-%d-%d\n",
|
|
|
133 |
di.y1,di.m1,di.d1, di.y2,di.m2,di.d2))
|
|
|
134 |
sdata->setDateSpan(&di);
|
|
|
135 |
break;
|
|
|
136 |
}
|
|
|
137 |
|
|
|
138 |
// "Regular" processing follows:
|
|
|
139 |
unsigned int mods = (unsigned int)(*it)->m_modifiers;
|
147 |
unsigned int mods = (unsigned int)(*it)->m_modifiers;
|
140 |
|
148 |
|
141 |
if (TextSplit::hasVisibleWhite((*it)->m_value)) {
|
149 |
if (TextSplit::hasVisibleWhite((*it)->m_value)) {
|
142 |
int slack = (mods & WasaQuery::WQM_PHRASESLACK) ? 10 : 0;
|
150 |
int slack = (mods & WasaQuery::WQM_PHRASESLACK) ? 10 : 0;
|
143 |
Rcl::SClType tp = Rcl::SCLT_PHRASE;
|
151 |
Rcl::SClType tp = Rcl::SCLT_PHRASE;
|
|
... |
|
... |
152 |
nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,
|
160 |
nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND,
|
153 |
(*it)->m_value,
|
161 |
(*it)->m_value,
|
154 |
(*it)->m_fieldspec);
|
162 |
(*it)->m_fieldspec);
|
155 |
}
|
163 |
}
|
156 |
if (nclause == 0) {
|
164 |
if (nclause == 0) {
|
|
|
165 |
reason = "Out of memory";
|
157 |
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
166 |
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
158 |
return 0;
|
167 |
return 0;
|
159 |
}
|
168 |
}
|
160 |
if (mods & WasaQuery::WQM_NOSTEM) {
|
169 |
if (mods & WasaQuery::WQM_NOSTEM) {
|
161 |
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
170 |
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
|
... |
|
... |
174 |
// Note: have to add dquotes which will be translated to
|
183 |
// Note: have to add dquotes which will be translated to
|
175 |
// phrase if there are several words in there. Not pretty
|
184 |
// phrase if there are several words in there. Not pretty
|
176 |
// but should work. If there is actually a single
|
185 |
// but should work. If there is actually a single
|
177 |
// word, it will not be taken as a phrase, and
|
186 |
// word, it will not be taken as a phrase, and
|
178 |
// stem-expansion will work normally
|
187 |
// stem-expansion will work normally
|
|
|
188 |
// Have to do this because searchdata has nothing like and_not
|
179 |
nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL,
|
189 |
nclause = new Rcl::SearchDataClauseSimple(Rcl::SCLT_EXCL,
|
180 |
string("\"") +
|
190 |
string("\"") +
|
181 |
(*it)->m_value + "\"",
|
191 |
(*it)->m_value + "\"",
|
182 |
(*it)->m_fieldspec);
|
192 |
(*it)->m_fieldspec);
|
183 |
|
193 |
|
184 |
if (nclause == 0) {
|
194 |
if (nclause == 0) {
|
|
|
195 |
reason = "Out of memory";
|
185 |
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
196 |
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
186 |
return 0;
|
197 |
return 0;
|
187 |
}
|
198 |
}
|
188 |
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
|
199 |
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
|
189 |
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
200 |
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
|
... |
|
... |
192 |
|
203 |
|
193 |
case WasaQuery::OP_OR:
|
204 |
case WasaQuery::OP_OR:
|
194 |
LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n",
|
205 |
LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n",
|
195 |
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
206 |
(*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
|
196 |
// Create a subquery.
|
207 |
// Create a subquery.
|
197 |
Rcl::SearchData *sub = wasaQueryToRcl(config, *it);
|
208 |
Rcl::SearchData *sub =
|
|
|
209 |
wasaQueryToRcl(config, *it, autosuffs, reason);
|
198 |
if (sub == 0) {
|
210 |
if (sub == 0) {
|
199 |
continue;
|
211 |
continue;
|
200 |
}
|
212 |
}
|
201 |
nclause =
|
213 |
nclause =
|
202 |
new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB,
|
214 |
new Rcl::SearchDataClauseSub(Rcl::SCLT_SUB,
|
203 |
RefCntr<Rcl::SearchData>(sub));
|
215 |
RefCntr<Rcl::SearchData>(sub));
|
204 |
if (nclause == 0) {
|
216 |
if (nclause == 0) {
|
205 |
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
217 |
LOGERR(("wasaQueryToRcl: out of memory\n"));
|
|
|
218 |
reason = "Out of memory";
|
206 |
return 0;
|
219 |
return 0;
|
207 |
}
|
220 |
}
|
208 |
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
|
221 |
if ((*it)->m_modifiers & WasaQuery::WQM_NOSTEM)
|
209 |
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
222 |
nclause->setModifiers(Rcl::SearchDataClause::SDCM_NOSTEMMING);
|
210 |
sdata->addClause(nclause);
|
223 |
sdata->addClause(nclause);
|
211 |
}
|
224 |
}
|
212 |
}
|
225 |
}
|
213 |
|
226 |
|
214 |
return sdata;
|
227 |
return sdata;
|
215 |
}
|
228 |
}
|
|
|
229 |
|
|
|
230 |
Rcl::SearchData *wasaStringToRcl(RclConfig *config,
|
|
|
231 |
const string &qs, string &reason,
|
|
|
232 |
const string& autosuffs)
|
|
|
233 |
{
|
|
|
234 |
StringToWasaQuery parser;
|
|
|
235 |
WasaQuery *wq = parser.stringToQuery(qs, reason);
|
|
|
236 |
if (wq == 0)
|
|
|
237 |
return 0;
|
|
|
238 |
return wasaQueryToRcl(config, wq, autosuffs, reason);
|
|
|
239 |
}
|