|
a |
|
b/src/query/wasaparseaux.cpp |
|
|
1 |
/* Copyright (C) 2006 J.F.Dockes
|
|
|
2 |
* This program is free software; you can redistribute it and/or modify
|
|
|
3 |
* it under the terms of the GNU General Public License as published by
|
|
|
4 |
* the Free Software Foundation; either version 2 of the License, or
|
|
|
5 |
* (at your option) any later version.
|
|
|
6 |
*
|
|
|
7 |
* This program is distributed in the hope that it will be useful,
|
|
|
8 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
9 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
10 |
* GNU General Public License for more details.
|
|
|
11 |
*
|
|
|
12 |
* You should have received a copy of the GNU General Public License
|
|
|
13 |
* along with this program; if not, write to the
|
|
|
14 |
* Free Software Foundation, Inc.,
|
|
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
16 |
*/
|
|
|
17 |
#include "autoconfig.h"
|
|
|
18 |
|
|
|
19 |
#include <iostream>
|
|
|
20 |
|
|
|
21 |
#include "wasatorcl.h"
|
|
|
22 |
#include "wasaparserdriver.h"
|
|
|
23 |
#include "searchdata.h"
|
|
|
24 |
#include "debuglog.h"
|
|
|
25 |
|
|
|
26 |
#define YYDEBUG 1
|
|
|
27 |
|
|
|
28 |
// bison-generated file
|
|
|
29 |
#include "wasaparse.h"
|
|
|
30 |
|
|
|
31 |
using namespace std;
|
|
|
32 |
using namespace Rcl;
|
|
|
33 |
|
|
|
34 |
|
|
|
35 |
void
|
|
|
36 |
yy::parser::error (const location_type& l, const std::string& m)
|
|
|
37 |
{
|
|
|
38 |
d->setreason(m);
|
|
|
39 |
}
|
|
|
40 |
|
|
|
41 |
|
|
|
42 |
SearchData *wasaStringToRcl(const RclConfig *config,
|
|
|
43 |
const std::string& stemlang,
|
|
|
44 |
const std::string& query, string &reason,
|
|
|
45 |
const std::string& autosuffs)
|
|
|
46 |
{
|
|
|
47 |
WasaParserDriver d(config, stemlang, autosuffs);
|
|
|
48 |
SearchData *sd = d.parse(query);
|
|
|
49 |
if (!sd)
|
|
|
50 |
reason = d.getreason();
|
|
|
51 |
return sd;
|
|
|
52 |
}
|
|
|
53 |
|
|
|
54 |
SearchData *WasaParserDriver::parse(const std::string& in)
|
|
|
55 |
{
|
|
|
56 |
m_input = in;
|
|
|
57 |
m_index = 0;
|
|
|
58 |
delete m_result;
|
|
|
59 |
m_result = 0;
|
|
|
60 |
m_returns = stack<int>();
|
|
|
61 |
|
|
|
62 |
yy::parser parser(this);
|
|
|
63 |
parser.set_debug_level(0);
|
|
|
64 |
|
|
|
65 |
if (parser.parse() != 0) {
|
|
|
66 |
delete m_result;
|
|
|
67 |
m_result = 0;
|
|
|
68 |
}
|
|
|
69 |
|
|
|
70 |
return m_result;
|
|
|
71 |
}
|
|
|
72 |
|
|
|
73 |
int WasaParserDriver::GETCHAR()
|
|
|
74 |
{
|
|
|
75 |
if (!m_returns.empty()) {
|
|
|
76 |
int c = m_returns.top();
|
|
|
77 |
m_returns.pop();
|
|
|
78 |
return c;
|
|
|
79 |
}
|
|
|
80 |
if (m_index < m_input.size())
|
|
|
81 |
return m_input[m_index++];
|
|
|
82 |
return 0;
|
|
|
83 |
}
|
|
|
84 |
void WasaParserDriver::UNGETCHAR(int c)
|
|
|
85 |
{
|
|
|
86 |
m_returns.push(c);
|
|
|
87 |
}
|
|
|
88 |
|
|
|
89 |
// Add clause to query, handling special pseudo-clauses for size/date
|
|
|
90 |
// etc. (mostly determined on field name).
|
|
|
91 |
bool WasaParserDriver::addClause(SearchData *sd,
|
|
|
92 |
SearchDataClauseSimple* cl)
|
|
|
93 |
{
|
|
|
94 |
if (cl->getfield().empty()) {
|
|
|
95 |
// Simple clause with empty field spec.
|
|
|
96 |
// Possibly change terms found in the "autosuffs" list into "ext"
|
|
|
97 |
// field queries
|
|
|
98 |
if (!m_autosuffs.empty()) {
|
|
|
99 |
vector<string> asfv;
|
|
|
100 |
if (stringToStrings(m_autosuffs, asfv)) {
|
|
|
101 |
if (find_if(asfv.begin(), asfv.end(),
|
|
|
102 |
StringIcmpPred(cl->gettext())) != asfv.end()) {
|
|
|
103 |
cl->setfield("ext");
|
|
|
104 |
cl->addModifier(SearchDataClause::SDCM_NOSTEMMING);
|
|
|
105 |
}
|
|
|
106 |
}
|
|
|
107 |
}
|
|
|
108 |
return sd->addClause(cl);
|
|
|
109 |
}
|
|
|
110 |
|
|
|
111 |
|
|
|
112 |
const string& fld = cl->getfield();
|
|
|
113 |
|
|
|
114 |
// MIME types and categories
|
|
|
115 |
if (!stringicmp("mime", fld) ||!stringicmp("format", fld)) {
|
|
|
116 |
if (cl->getexclude()) {
|
|
|
117 |
sd->remFiletype(cl->gettext());
|
|
|
118 |
} else {
|
|
|
119 |
sd->addFiletype(cl->gettext());
|
|
|
120 |
}
|
|
|
121 |
delete cl;
|
|
|
122 |
return true;
|
|
|
123 |
}
|
|
|
124 |
|
|
|
125 |
if (!stringicmp("rclcat", fld) || !stringicmp("type", fld)) {
|
|
|
126 |
vector<string> mtypes;
|
|
|
127 |
if (m_config && m_config->getMimeCatTypes(cl->gettext(), mtypes)) {
|
|
|
128 |
for (vector<string>::iterator mit = mtypes.begin();
|
|
|
129 |
mit != mtypes.end(); mit++) {
|
|
|
130 |
if (cl->getexclude()) {
|
|
|
131 |
sd->remFiletype(*mit);
|
|
|
132 |
} else {
|
|
|
133 |
sd->addFiletype(*mit);
|
|
|
134 |
}
|
|
|
135 |
}
|
|
|
136 |
}
|
|
|
137 |
delete cl;
|
|
|
138 |
return true;
|
|
|
139 |
}
|
|
|
140 |
|
|
|
141 |
// Handle "date" spec
|
|
|
142 |
if (!stringicmp("date", fld)) {
|
|
|
143 |
DateInterval di;
|
|
|
144 |
if (!parsedateinterval(cl->gettext(), &di)) {
|
|
|
145 |
LOGERR(("Bad date interval format: %s\n",
|
|
|
146 |
cl->gettext().c_str()));
|
|
|
147 |
m_reason = "Bad date interval format";
|
|
|
148 |
delete cl;
|
|
|
149 |
return false;
|
|
|
150 |
}
|
|
|
151 |
LOGDEB(("addClause:: date span: %d-%d-%d/%d-%d-%d\n",
|
|
|
152 |
di.y1,di.m1,di.d1, di.y2,di.m2,di.d2));
|
|
|
153 |
sd->setDateSpan(&di);
|
|
|
154 |
delete cl;
|
|
|
155 |
return true;
|
|
|
156 |
}
|
|
|
157 |
|
|
|
158 |
// Handle "size" spec
|
|
|
159 |
if (!stringicmp("size", fld)) {
|
|
|
160 |
char *cp;
|
|
|
161 |
size_t size = strtoll(cl->gettext().c_str(), &cp, 10);
|
|
|
162 |
if (*cp != 0) {
|
|
|
163 |
switch (*cp) {
|
|
|
164 |
case 'k': case 'K': size *= 1E3;break;
|
|
|
165 |
case 'm': case 'M': size *= 1E6;break;
|
|
|
166 |
case 'g': case 'G': size *= 1E9;break;
|
|
|
167 |
case 't': case 'T': size *= 1E12;break;
|
|
|
168 |
default:
|
|
|
169 |
m_reason = string("Bad multiplier suffix: ") + *cp;
|
|
|
170 |
delete cl;
|
|
|
171 |
return false;
|
|
|
172 |
}
|
|
|
173 |
}
|
|
|
174 |
|
|
|
175 |
SearchDataClause::Relation rel = cl->getrel();
|
|
|
176 |
|
|
|
177 |
delete cl;
|
|
|
178 |
|
|
|
179 |
switch (rel) {
|
|
|
180 |
case SearchDataClause::REL_EQUALS:
|
|
|
181 |
sd->setMaxSize(size);
|
|
|
182 |
sd->setMinSize(size);
|
|
|
183 |
break;
|
|
|
184 |
case SearchDataClause::REL_LT:
|
|
|
185 |
case SearchDataClause::REL_LTE:
|
|
|
186 |
sd->setMaxSize(size);
|
|
|
187 |
break;
|
|
|
188 |
case SearchDataClause::REL_GT:
|
|
|
189 |
case SearchDataClause::REL_GTE:
|
|
|
190 |
sd->setMinSize(size);
|
|
|
191 |
break;
|
|
|
192 |
default:
|
|
|
193 |
m_reason = "Bad relation operator with size query. Use > < or =";
|
|
|
194 |
return false;
|
|
|
195 |
}
|
|
|
196 |
return true;
|
|
|
197 |
}
|
|
|
198 |
|
|
|
199 |
if (!stringicmp("dir", fld)) {
|
|
|
200 |
// dir filtering special case
|
|
|
201 |
SearchDataClausePath *nclause =
|
|
|
202 |
new SearchDataClausePath(cl->gettext(), cl->getexclude());
|
|
|
203 |
delete cl;
|
|
|
204 |
return sd->addClause(nclause);
|
|
|
205 |
}
|
|
|
206 |
|
|
|
207 |
if (cl->getTp() == SCLT_OR || cl->getTp() == SCLT_AND) {
|
|
|
208 |
// If this is a normal clause and the term has commas or
|
|
|
209 |
// slashes inside, take it as a list, turn the slashes/commas
|
|
|
210 |
// to spaces, leave unquoted. Otherwise, this would end up as
|
|
|
211 |
// a phrase query. This is a handy way to enter multiple terms
|
|
|
212 |
// to be searched inside a field. We interpret ',' as AND, and
|
|
|
213 |
// '/' as OR. No mixes allowed and ',' wins.
|
|
|
214 |
SClType tp = SCLT_FILENAME;// impossible value
|
|
|
215 |
string ns = neutchars(cl->gettext(), ",");
|
|
|
216 |
if (ns.compare(cl->gettext())) {
|
|
|
217 |
// had ','
|
|
|
218 |
tp = SCLT_AND;
|
|
|
219 |
} else {
|
|
|
220 |
ns = neutchars(cl->gettext(), "/");
|
|
|
221 |
if (ns.compare(cl->gettext())) {
|
|
|
222 |
// had not ',' but has '/'
|
|
|
223 |
tp = SCLT_OR;
|
|
|
224 |
}
|
|
|
225 |
}
|
|
|
226 |
|
|
|
227 |
if (tp != SCLT_FILENAME) {
|
|
|
228 |
SearchDataClauseSimple *ncl =
|
|
|
229 |
new SearchDataClauseSimple(tp, ns, fld);
|
|
|
230 |
delete cl;
|
|
|
231 |
return sd->addClause(ncl);
|
|
|
232 |
}
|
|
|
233 |
}
|
|
|
234 |
return sd->addClause(cl);
|
|
|
235 |
}
|
|
|
236 |
|