--- a/src/query/wasastringtoquery.cpp
+++ b/src/query/wasaparse.y
@@ -1,515 +1,415 @@
-/* Copyright (C) 2006 J.F.Dockes
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the
- * Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-#ifndef TEST_WASASTRINGTOQUERY
+%{
+#define YYDEBUG 1
+
#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <regex.h>
-
-#include "smallut.h"
-#include "wasastringtoquery.h"
-
-#undef DEB_WASASTRINGTOQ
-#ifdef DEB_WASASTRINGTOQ
-#define DPRINT(X) fprintf X
-#define DUMPQ(Q) {string D;Q->describe(D);fprintf(stderr, "%s\n", D.c_str());}
-#else
-#define DPRINT(X)
-#define DUMPQ(Q)
-#endif
-
-WasaQuery::~WasaQuery()
-{
- for (vector<WasaQuery*>::iterator it = m_subs.begin();
- it != m_subs.end(); it++) {
- delete *it;
- }
- m_subs.clear();
-}
-
-static const char* reltosrel(WasaQuery::Rel rel)
-{
- switch (rel) {
- case WasaQuery::REL_EQUALS: return "=";
- case WasaQuery::REL_CONTAINS: return ":";
- case WasaQuery::REL_LT: return "<";
- case WasaQuery::REL_LTE: return "<=";
- case WasaQuery::REL_GT: return ">";
- case WasaQuery::REL_GTE: return ">=";
- default: return "?";
- }
-}
-
-void WasaQuery::describe(string &desc) const
-{
- desc += "(";
- string fieldspec = m_fieldspec.empty() ? string() : m_fieldspec +
- reltosrel(m_rel);
- switch (m_op) {
- case OP_NULL:
- desc += "NULL";
- break;
- case OP_LEAF:
- if (m_exclude)
- desc += "NOT (";
- desc += fieldspec + m_value;
- if (m_exclude)
- desc += ")";
- break;
- case OP_OR:
- case OP_AND:
- for (vector<WasaQuery *>::const_iterator it = m_subs.begin();
- it != m_subs.end(); it++) {
- (*it)->describe(desc);
- vector<WasaQuery *>::const_iterator it1 = it;
- it1++;
- if (it1 != m_subs.end())
- desc += m_op == OP_OR ? "OR ": "AND ";
- }
- break;
- }
- if (desc[desc.length() - 1] == ' ')
- desc.erase(desc.length() - 1);
- desc += ")";
- if (m_modifiers != 0) {
- if (m_modifiers & WQM_BOOST) desc += "BOOST|";
- if (m_modifiers & WQM_CASESENS) desc += "CASESENS|";
- if (m_modifiers & WQM_DIACSENS) desc += "DIACSENS|";
- if (m_modifiers & WQM_FUZZY) desc += "FUZZY|";
- if (m_modifiers & WQM_NOSTEM) desc += "NOSTEM|";
- if (m_modifiers & WQM_PHRASESLACK) {
- char buf[100];
- sprintf(buf, "%d", m_slack);
- desc += "PHRASESLACK(" + string(buf) + string(")|");
- }
- if (m_modifiers & WQM_PROX) desc += "PROX|";
- if (m_modifiers & WQM_REGEX) desc += "REGEX|";
- if (m_modifiers & WQM_SLOPPY) desc += "SLOPPY|";
- if (m_modifiers & WQM_WORDS) desc += "WORDS|";
-
- if (desc.length() > 0 && desc[desc.length()-1] == '|')
- desc.erase(desc.length()-1);
- }
- desc += " ";
-}
-
-// The string query parser code:
-
-/* Shamelessly lifted from Beagle:
- * This is our regular Expression Pattern:
- * we expect something like this:
- * -key:"Value String"modifiers
- * key:Value
- * or
- * Value
-*/
-
-/* The master regular expression used to parse a query string
- * Sub-expressions in parenthesis are numbered from 1. Each opening
- * parenthesis increases the index, but we're not interested in all
- * Deviations from standard:
- * Relation: the standard-conformant line read as (release<1.16):
- "(:|=|<|>|<=|>=)" //7 Relation
- but we are not actually making use of the relation type
- (interpreting all as ":"), and this can product unexpected results
- as a (ie pasted) search for nonexfield=value will silently drop
- the nonexfield part, while the user probably was not aware of
- triggering a field search (expecting just ':' to do this).
- */
-static const char * parserExpr =
- "(OR|\\|\\|)[[:space:]]*" //1 OR,||
- "|"
- "(AND|&&)[[:space:]]*" // 2 AND,&& (ignored, default)
- "|"
- "(" //3
- "([+-])?" //4 Force or exclude indicator
- "(" //5
- "([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre"
- "[[:space:]]*"
- "(:|=|>|<)" //7 Relation
- "[[:space:]]*)?"
- "(" //8
- "(\"" //9
- "([^\"]+)" //10 "A quoted term"
- "\")"
- "([bcCdDeflLoprsw.0-9]*)" //11 modifiers
- "|"
- "([^[:space:]\"]+)" //12 ANormalTerm
- ")"
- ")[[:space:]]*"
+
+#include <iostream>
+#include <string>
+
+#include "searchdata.h"
+#include "wasaparserdriver.h"
+#include "wasaparse.tab.h"
+
+using namespace std;
+
+int yylex(yy::parser::semantic_type *, WasaParserDriver *);
+void yyerror(char const *);
+static void qualify(Rcl::SearchDataClauseDist *, const string &);
+
+static void addSubQuery(WasaParserDriver *d,
+ Rcl::SearchData *sd, Rcl::SearchData *sq)
+{
+ sd->addClause(new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>(sq)));
+}
+
+%}
+
+%skeleton "lalr1.cc"
+%defines
+%error-verbose
+
+%parse-param {WasaParserDriver* d}
+%lex-param {WasaParserDriver* d}
+
+%union {
+ std::string *str;
+ Rcl::SearchDataClauseSimple *cl;
+ Rcl::SearchData *sd;
+}
+%destructor {delete $$;} <str>
+
+%type <cl> qualquote
+%type <cl> fieldexpr
+%type <cl> term
+%type <sd> query
+%type <str> complexfieldname
+
+ /* Non operator tokens need precedence because of the possibility of
+ concatenation which needs to have lower prec than OR */
+%left <str> WORD
+%left <str> QUOTED
+%left <str> QUALIFIERS
+%left AND UCONCAT
+%left OR
+
+%token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER
+
+%%
+
+topquery: query
+{
+ d->m_result = $1;
+}
+
+query:
+query query %prec UCONCAT
+{
+ //cerr << "q: query query" << endl;
+ Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+ addSubQuery(d, sd, $1);
+ addSubQuery(d, sd, $2);
+ $$ = sd;
+}
+| query AND query
+{
+ //cerr << "q: query AND query" << endl;
+ Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+ addSubQuery(d, sd, $1);
+ addSubQuery(d, sd, $3);
+ $$ = sd;
+}
+| query OR query
+{
+ //cerr << "q: query OR query" << endl;
+ Rcl::SearchData *top = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+ Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, d->m_stemlang);
+ addSubQuery(d, sd, $1);
+ addSubQuery(d, sd, $3);
+ addSubQuery(d, top, sd);
+ $$ = top;
+}
+| '(' query ')'
+{
+ //cerr << "q: ( query )" << endl;
+ $$ = $2;
+}
+|
+fieldexpr %prec UCONCAT
+{
+ //cerr << "q: fieldexpr" << endl;
+ Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+ d->addClause(sd, $1);
+ $$ = sd;
+}
;
-// For debugging the parser. But see also NMATCH
-static const char *matchNames[] = {
- /* 0*/ "",
- /* 1*/ "OR",
- /* 2*/ "AND",
- /* 3*/ "",
- /* 4*/ "+-",
- /* 5*/ "",
- /* 6*/ "FIELD",
- /* 7*/ "RELATION",
- /* 8*/ "",
- /* 9*/ "",
- /*10*/ "QUOTEDTERM",
- /*11*/ "MODIFIERS",
- /*12*/ "TERM",
-};
-#define NMATCH (sizeof(matchNames) / sizeof(char *))
-
-// Symbolic names for the interesting submatch indices
-enum SbMatchIdx {SMI_OR=1, SMI_AND=2, SMI_PM=4, SMI_FIELD=6, SMI_REL=7,
- SMI_QUOTED=10, SMI_MODIF=11, SMI_TERM=12};
-
-static const int maxmatchlen = 1024;
-static const int errbuflen = 300;
-
-class StringToWasaQuery::Internal {
-public:
- Internal()
- : m_rxneedsfree(false)
- {}
- ~Internal()
- {
- if (m_rxneedsfree)
- regfree(&m_rx);
- }
- bool checkSubMatch(int i, char *match, string& reason)
- {
- if (i < 0 || i >= int(NMATCH) || m_pmatch[i].rm_so == -1) {
- //DPRINT((stderr, "checkSubMatch: no match: i %d rm_so %d\n",
- //i, m_pmatch[i].rm_so));
- return false;
- }
- if (m_pmatch[i].rm_eo - m_pmatch[i].rm_so <= 0) {
- // weird and fatal
- reason = "Internal regular expression handling error";
- return false;
- }
- //DPRINT((stderr, "checkSubMatch: so %d eo %d\n", m_pmatch[i].rm_so,
- //m_pmatch[i].rm_eo));
- memcpy(match, m_cp + m_pmatch[i].rm_so,
- m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
- match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
- return true;
- }
-
- WasaQuery *stringToQuery(const string& str, string& reason);
-
- friend class StringToWasaQuery;
-private:
- const char *m_cp;
- regex_t m_rx;
- bool m_rxneedsfree;
- regmatch_t m_pmatch[NMATCH];
-};
-
-StringToWasaQuery::StringToWasaQuery()
- : internal(new Internal)
-{
-}
-
-StringToWasaQuery::~StringToWasaQuery()
-{
- delete internal;
-}
-
-WasaQuery *
-StringToWasaQuery::stringToQuery(const string& str, string& reason)
-{
- if (internal == 0)
- return 0;
- WasaQuery *wq = internal->stringToQuery(str, reason);
- DUMPQ(wq);
- return wq;
-}
-
-WasaQuery *
-StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
-{
- if (m_rxneedsfree)
- regfree(&m_rx);
-
- char errbuf[errbuflen+1];
- int errcode;
- if ((errcode = regcomp(&m_rx, parserExpr, REG_EXTENDED)) != 0) {
- regerror(errcode, &m_rx, errbuf, errbuflen);
- reason = errbuf;
- return 0;
- }
- m_rxneedsfree = true;
-
- const char *cpe;
- m_cp = str.c_str();
- cpe = str.c_str() + str.length();
-
- WasaQuery *query = new WasaQuery;
- query->m_op = WasaQuery::OP_AND;
- WasaQuery *orChain = 0;
- bool prev_or = false;
-
- // Loop on repeated regexp matches on the main string.
- for (int loop = 0;;loop++) {
- if ((errcode = regexec(&m_rx, m_cp, NMATCH, m_pmatch, 0))) {
- regerror(errcode, &m_rx, errbuf, errbuflen);
- reason = errbuf;
- return 0;
- }
- if (m_pmatch[0].rm_eo <= 0) {
- // weird and fatal
- reason = "Internal regular expression handling error";
- return 0;
- }
-
-#ifdef DEB_WASASTRINGTOQ
- DPRINT((stderr, "Next part:\n"));
- for (unsigned int i = 0; i < NMATCH; i++) {
- if (m_pmatch[i].rm_so == -1) continue;
- char match[maxmatchlen+1];
- memcpy(match, m_cp + m_pmatch[i].rm_so,
- m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
- match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
- if (matchNames[i][0])
- DPRINT((stderr, "%10s: [%s] (%d->%d)\n", matchNames[i], match,
- (int)m_pmatch[i].rm_so, (int)m_pmatch[i].rm_eo));
- }
-#endif
-
- char match[maxmatchlen+1];
- if (checkSubMatch(SMI_OR, match, reason)) {
- if (prev_or) {
- // Bad syntax
- reason = "Bad syntax: consecutive OR";
- return 0;
- }
-
- if (orChain == 0) {
- // Fist OR seen: start OR subclause.
- if ((orChain = new WasaQuery()) == 0) {
- reason = "Out of memory";
- return 0;
- }
- orChain->m_op = WasaQuery::OP_OR;
- }
-
- // For the first OR, we need to transfer the previous
- // query from the main vector to the OR subquery
- if (orChain->m_subs.empty() && !query->m_subs.empty()) {
- orChain->m_subs.push_back(query->m_subs.back());
- query->m_subs.pop_back();
- }
- prev_or = true;
-
- } else if (checkSubMatch(SMI_AND, match, reason)) {
- // Do nothing, AND is the default. We might want to check for
- // errors like consecutive ANDs, or OR AND
-
- } else {
-
- WasaQuery *nclause = new WasaQuery;
- if (nclause == 0) {
- reason = "Out of memory";
- return 0;
- }
-
- // Check for quoted or unquoted value
- unsigned int mods = 0;
- if (checkSubMatch(SMI_QUOTED, match, reason)) {
- nclause->m_value = match;
- mods |= WasaQuery::WQM_QUOTED;
- } else if (checkSubMatch(SMI_TERM, match, reason)) {
- nclause->m_value = match;
- }
-
- if (nclause->m_value.empty()) {
- // Isolated +- or fieldname: without a value. Ignore until
- // told otherwise.
- DPRINT((stderr, "Clause with empty value, skipping\n"));
- delete nclause;
- goto nextfield;
- }
-
- if (checkSubMatch(SMI_MODIF, match, reason)) {
- DPRINT((stderr, "Got modifiers: [%s]\n", match));
- for (unsigned int i = 0; i < strlen(match); i++) {
- switch (match[i]) {
- case 'b':
- mods |= WasaQuery::WQM_BOOST;
- nclause->m_weight = 10.0;
- break;
- case 'c': break;
- case 'C': mods |= WasaQuery::WQM_CASESENS; break;
- case 'd': break;
- case 'D': mods |= WasaQuery::WQM_DIACSENS; break;
- case 'e': mods |= WasaQuery::WQM_CASESENS |
- WasaQuery::WQM_DIACSENS |
- WasaQuery::WQM_NOSTEM;
- break;
- case 'f': mods |= WasaQuery::WQM_FUZZY; break;
- case 'l': mods |= WasaQuery::WQM_NOSTEM; break;
- case 'L': break;
- case 'o':
- mods |= WasaQuery::WQM_PHRASESLACK;
- // Default slack if specified only by 'o' is 10.
- nclause->m_slack = 10;
- if (i < strlen(match) - 1) {
- char *endptr;
- int slack = strtol(match+i+1, &endptr, 10);
- if (endptr != match+i+1) {
- i += endptr - (match+i+1);
- nclause->m_slack = slack;
- }
- }
- break;
- case 'p':
- mods |= WasaQuery::WQM_PROX;
- nclause->m_slack = 10;
- break;
- case 'r': mods |= WasaQuery::WQM_REGEX; break;
- case 's': mods |= WasaQuery::WQM_SLOPPY; break;
- case 'w': mods |= WasaQuery::WQM_WORDS; break;
- case '.':case '0':case '1':case '2':case '3':case '4':
- case '5':case '6':case '7':case '8':case '9':
- {
- int n;
- float factor;
- if (sscanf(match+i, "%f %n", &factor, &n)) {
- nclause->m_weight = factor;
- DPRINT((stderr, "Got factor %.2f len %d\n",
- factor, n));
- }
- if (n)
- i += n-1;
- }
- }
- }
- }
- nclause->m_modifiers = WasaQuery::Modifier(mods);
-
- // Field indicator ?
- if (checkSubMatch(SMI_FIELD, match, reason)) {
- // We used Check for special fields indicating sorting
- // etc. here but this went away from the spec. See 1.4
- // if it comes back
- nclause->m_fieldspec = match;
- if (checkSubMatch(SMI_REL, match, reason)) {
- switch (match[0]) {
- case '=':nclause->m_rel = WasaQuery::REL_EQUALS;break;
- case ':':nclause->m_rel = WasaQuery::REL_CONTAINS;break;
- case '<':
- if (match[1] == '=')
- nclause->m_rel = WasaQuery::REL_LTE;
- else
- nclause->m_rel = WasaQuery::REL_LT;
- break;
- case '>':
- if (match[1] == '=')
- nclause->m_rel = WasaQuery::REL_GTE;
- else
- nclause->m_rel = WasaQuery::REL_GT;
- break;
- default:
- nclause->m_rel = WasaQuery::REL_CONTAINS;
- }
- } else {
- // ?? If field matched we should have a relation
- nclause->m_rel = WasaQuery::REL_CONTAINS;
- }
- }
-
- nclause->m_op = WasaQuery::OP_LEAF;
- // +- indicator ?
- if (checkSubMatch(SMI_PM, match, reason) && match[0] == '-') {
- nclause->m_exclude = true;
- } else {
- nclause->m_exclude = false;
- }
-
- if (prev_or) {
- // The precedent token was an OR, add new clause to or chain
- //DPRINT((stderr, "Adding to OR chain\n"));
- orChain->m_subs.push_back(nclause);
- } else {
- if (orChain) {
- // Getting out of OR. Add the OR subquery to the main one
- //DPRINT((stderr, "Adding OR chain to main\n"));
- query->m_subs.push_back(orChain);
- orChain = 0;
- }
- //DPRINT((stderr, "Adding to main chain\n"));
- // Add new clause to main query
- query->m_subs.push_back(nclause);
- }
-
- prev_or = false;
- }
-
- nextfield:
- // Advance current string position. We checked earlier that
- // the increment is strictly positive, so we won't loop
- // forever
- m_cp += m_pmatch[0].rm_eo;
- if (m_cp >= cpe)
- break;
- }
-
- if (orChain) {
- // Getting out of OR. Add the OR subquery to the main one
- DPRINT((stderr, "Adding OR chain to main.Before: \n"));
- DUMPQ(query);
- DUMPQ(orChain);
- query->m_subs.push_back(orChain);
- }
-
- regfree(&m_rx);
- m_rxneedsfree = false;
- return query;
-}
-
-#else // TEST
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "wasastringtoquery.h"
-
-static char *thisprog;
-
-int main(int argc, char **argv)
-{
- thisprog = argv[0];
- argc--; argv++;
-
- if (argc != 1) {
- fprintf(stderr, "need one arg\n");
- exit(1);
- }
- const string str = *argv++;argc--;
- string reason;
- StringToWasaQuery qparser;
- WasaQuery *q = qparser.stringToQuery(str, reason);
- if (q == 0) {
- fprintf(stderr, "stringToQuery failed: %s\n", reason.c_str());
- exit(1);
- }
- string desc;
- q->describe(desc);
- fprintf(stderr, "Finally: %s\n", desc.c_str());
- exit(0);
-}
-
-#endif // TEST_WASASTRINGTOQUERY
+fieldexpr: term
+{
+ // cerr << "fe: simple fieldexpr: " << $1->gettext() << endl;
+ $$ = $1;
+}
+| complexfieldname EQUALS term
+{
+ // cerr << "fe: " << *$1 << " = " << $3->gettext() << endl;
+ $3->setfield(*$1);
+ $3->setrel(Rcl::SearchDataClause::REL_EQUALS);
+ $$ = $3;
+ delete $1;
+}
+| complexfieldname CONTAINS term
+{
+ // cerr << "fe: " << *$1 << " : " << $3->gettext() << endl;
+ $3->setfield(*$1);
+ $3->setrel(Rcl::SearchDataClause::REL_CONTAINS);
+ $$ = $3;
+ delete $1;
+}
+| complexfieldname SMALLER term
+{
+ // cerr << "fe: " << *$1 << " < " << $3->gettext() << endl;
+ $3->setfield(*$1);
+ $3->setrel(Rcl::SearchDataClause::REL_LT);
+ $$ = $3;
+ delete $1;
+}
+| complexfieldname SMALLEREQ term
+{
+ // cerr << "fe: " << *$1 << " <= " << $3->gettext() << endl;
+ $3->setfield(*$1);
+ $3->setrel(Rcl::SearchDataClause::REL_LTE);
+ $$ = $3;
+ delete $1;
+}
+| complexfieldname GREATER term
+{
+ // cerr << "fe: " << *$1 << " > " << $3->gettext() << endl;
+ $3->setfield(*$1);
+ $3->setrel(Rcl::SearchDataClause::REL_GT);
+ $$ = $3;
+ delete $1;
+}
+| complexfieldname GREATEREQ term
+{
+ // cerr << "fe: " << *$1 << " >= " << $3->gettext() << endl;
+ $3->setfield(*$1);
+ $3->setrel(Rcl::SearchDataClause::REL_GTE);
+ $$ = $3;
+ delete $1;
+}
+| '-' fieldexpr
+{
+ // cerr << "fe: - fieldexpr[" << $2->gettext() << "]" << endl;
+ $2->setexclude(true);
+ $$ = $2;
+}
+;
+
+/* Deal with field names like dc:title */
+complexfieldname:
+WORD
+{
+ // cerr << "cfn: WORD" << endl;
+ $$ = $1;
+}
+|
+complexfieldname CONTAINS WORD
+{
+ // cerr << "cfn: complexfieldname ':' WORD" << endl;
+ $$ = new string(*$1 + string(":") + *$3);
+ delete $1;
+ delete $3;
+}
+
+term:
+WORD
+{
+ //cerr << "term[" << *$1 << "]" << endl;
+ $$ = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *$1);
+ delete $1;
+}
+| qualquote
+{
+ $$ = $1;
+}
+
+qualquote:
+QUOTED
+{
+ // cerr << "QUOTED[" << *$1 << "]" << endl;
+ $$ = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
+ delete $1;
+}
+| QUOTED QUALIFIERS
+{
+ // cerr << "QUOTED[" << *$1 << "] QUALIFIERS[" << *$2 << "]" << endl;
+ Rcl::SearchDataClauseDist *cl =
+ new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
+ qualify(cl, *$2);
+ $$ = cl;
+ delete $1;
+ delete $2;
+}
+
+
+%%
+
+#include <ctype.h>
+
+// Look for int at index, skip and return new index found? value.
+static unsigned int qualGetInt(const string& q, unsigned int cur, int *pval)
+{
+ unsigned int ncur = cur;
+ if (cur < q.size() - 1) {
+ char *endptr;
+ int val = strtol(&q[cur + 1], &endptr, 10);
+ if (endptr != &q[cur + 1]) {
+ ncur += endptr - &q[cur + 1];
+ *pval = val;
+ }
+ }
+ return ncur;
+}
+
+static void qualify(Rcl::SearchDataClauseDist *cl, const string& quals)
+{
+ // cerr << "qualify(" << cl << ", " << quals << ")" << endl;
+ for (unsigned int i = 0; i < quals.length(); i++) {
+ //fprintf(stderr, "qual char %c\n", quals[i]);
+ switch (quals[i]) {
+ case 'b':
+ cl->setWeight(10.0);
+ break;
+ case 'c': break;
+ case 'C':
+ cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
+ break;
+ case 'd': break;
+ case 'D':
+ cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
+ break;
+ case 'e':
+ cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
+ cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
+ cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
+ break;
+ case 'l':
+ cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
+ break;
+ case 'L': break;
+ case 'o':
+ {
+ int slack = 10;
+ i = qualGetInt(quals, i, &slack);
+ cl->setslack(slack);
+ //cerr << "set slack " << cl->getslack() << " done" << endl;
+ }
+ break;
+ case 'p':
+ cl->setTp(Rcl::SCLT_NEAR);
+ if (cl->getslack() == 0) {
+ cl->setslack(10);
+ //cerr << "set slack " << cl->getslack() << " done" << endl;
+ }
+ break;
+ case '.':case '0':case '1':case '2':case '3':case '4':
+ case '5':case '6':case '7':case '8':case '9':
+ {
+ int n = 0;
+ float factor = 1.0;
+ if (sscanf(&(quals[i]), "%f %n", &factor, &n)) {
+ if (factor != 1.0) {
+ cl->setWeight(factor);
+ }
+ }
+ if (n > 0)
+ i += n - 1;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+
+// specialstartchars are special only at the beginning of a token
+// (e.g. doctor-who is a term, not 2 terms separated by '-')
+static const string specialstartchars("-");
+// specialinchars are special everywhere except inside a quoted string
+static const string specialinchars(":=<>()");
+
+// Called with the first dquote already read
+static int parseString(WasaParserDriver *d, yy::parser::semantic_type *yylval)
+{
+ string* value = new string();
+ d->qualifiers().clear();
+ int c;
+ while ((c = d->GETCHAR())) {
+ switch (c) {
+ case '\\':
+ /* Escape: get next char */
+ c = d->GETCHAR();
+ if (c == 0) {
+ value->push_back(c);
+ goto out;
+ }
+ value->push_back(c);
+ break;
+ case '"':
+ /* End of string. Look for qualifiers */
+ while ((c = d->GETCHAR()) && !isspace(c))
+ d->qualifiers().push_back(c);
+ goto out;
+ default:
+ value->push_back(c);
+ }
+ }
+out:
+ //cerr << "GOT QUOTED ["<<value<<"] quals [" << d->qualifiers() << "]" << endl;
+ yylval->str = value;
+ return yy::parser::token::QUOTED;
+}
+
+
+int yylex(yy::parser::semantic_type *yylval, WasaParserDriver *d)
+{
+ if (!d->qualifiers().empty()) {
+ yylval->str = new string();
+ yylval->str->swap(d->qualifiers());
+ return yy::parser::token::QUALIFIERS;
+ }
+
+ int c;
+
+ /* Skip white space. */
+ while ((c = d->GETCHAR()) && isspace(c))
+ continue;
+
+ if (c == 0)
+ return 0;
+
+ if (specialstartchars.find_first_of(c) != string::npos) {
+ //cerr << "yylex: return " << c << endl;
+ return c;
+ }
+
+ // field-term relations
+ switch (c) {
+ case '=': return yy::parser::token::EQUALS;
+ case ':': return yy::parser::token::CONTAINS;
+ case '<': {
+ int c1 = d->GETCHAR();
+ if (c1 == '=') {
+ return yy::parser::token::SMALLEREQ;
+ } else {
+ d->UNGETCHAR(c1);
+ return yy::parser::token::SMALLER;
+ }
+ }
+ case '>': {
+ int c1 = d->GETCHAR();
+ if (c1 == '=') {
+ return yy::parser::token::GREATEREQ;
+ } else {
+ d->UNGETCHAR(c1);
+ return yy::parser::token::GREATER;
+ }
+ }
+ case '(': case ')':
+ return c;
+ }
+
+ if (c == '"')
+ return parseString(d, yylval);
+
+ d->UNGETCHAR(c);
+
+ // Other chars start a term or field name or reserved word
+ string* word = new string();
+ while ((c = d->GETCHAR())) {
+ if (isspace(c)) {
+ //cerr << "Word broken by whitespace" << endl;
+ break;
+ } else if (specialinchars.find_first_of(c) != string::npos) {
+ //cerr << "Word broken by special char" << endl;
+ d->UNGETCHAR(c);
+ break;
+ } else if (c == 0) {
+ //cerr << "Word broken by EOF" << endl;
+ break;
+ } else {
+ word->push_back(c);
+ }
+ }
+
+ if (!word->compare("AND") || !word->compare("&&")) {
+ delete word;
+ return yy::parser::token::AND;
+ } else if (!word->compare("OR") || !word->compare("||")) {
+ delete word;
+ return yy::parser::token::OR;
+ }
+
+// cerr << "Got word [" << word << "]" << endl;
+ yylval->str = word;
+ return yy::parser::token::WORD;
+}