recoll / Code / Commit [3fb718]

Commit [3fb718] RECOLL_1_22_MAINT RECOLL_1_23_MAINT RECOLL_1_24_MAINT internal-xsl master History

Converted query language parser from the old regexp jungle to bison. Allow using parentheses for clearer syntax.

Authored by: Jean-Francois Dockes 2015-01-29

Child(ren): [b605dc]

added	src/rcldb/searchdatatox.cpp
changed	src/kde/kioslave/kio_recoll/htmlif.cpp
changed	src/kde/kioslave/kio_recoll/kio_recoll.cpp
changed	src/lib/mkMake.in
changed	src/php/recoll/recoll.cpp
changed	src/python/recoll/pyrecoll.cpp
changed	src/query/Makefile
changed	src/query/recollq.cpp
changed	src/query/wasatorcl.h
changed	src/rcldb/searchdata.cpp
changed	src/rcldb/searchdata.h
changed	src/Makefile.in
copied	src/query/wasastringtoquery.cpp -> src/query/wasaparse.y
copied	src/query/wasastringtoquery.h -> src/query/wasaparserdriver.h
copied	src/query/wasatorcl.cpp -> src/query/wasaparse.cpp

src/rcldb/searchdatatox.cpp Diff Switch to side-by-side view

src/kde/kioslave/kio_recoll/htmlif.cpp Diff Switch to side-by-side view

src/kde/kioslave/kio_recoll/kio_recoll.cpp Diff Switch to side-by-side view

src/lib/mkMake.in Diff Switch to side-by-side view

src/php/recoll/recoll.cpp Diff Switch to side-by-side view

src/python/recoll/pyrecoll.cpp Diff Switch to side-by-side view

src/query/Makefile Diff Switch to side-by-side view

src/query/recollq.cpp Diff Switch to side-by-side view

src/query/wasatorcl.h Diff Switch to side-by-side view

src/rcldb/searchdata.cpp Diff Switch to side-by-side view

src/rcldb/searchdata.h Diff Switch to side-by-side view

src/Makefile.in Diff Switch to side-by-side view

src/query/wasastringtoquery.cpp to src/query/wasaparse.y

--- a/src/query/wasastringtoquery.cpp
+++ b/src/query/wasaparse.y
@@ -1,515 +1,415 @@
-/* Copyright (C) 2006 J.F.Dockes
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation; either version 2 of the License, or
- *   (at your option) any later version.
- *
- *   This program is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with this program; if not, write to the
- *   Free Software Foundation, Inc.,
- *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-#ifndef TEST_WASASTRINGTOQUERY
+%{
+#define YYDEBUG 1
+
 #include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <regex.h>
-
-#include "smallut.h"
-#include "wasastringtoquery.h"
-
-#undef DEB_WASASTRINGTOQ
-#ifdef DEB_WASASTRINGTOQ
-#define DPRINT(X) fprintf X
-#define DUMPQ(Q) {string D;Q->describe(D);fprintf(stderr, "%s\n", D.c_str());}
-#else
-#define DPRINT(X)
-#define DUMPQ(Q)
-#endif
-
-WasaQuery::~WasaQuery()
-{
-    for (vector<WasaQuery*>::iterator it = m_subs.begin();
-	 it != m_subs.end(); it++) {
-	delete *it;
-    }
-    m_subs.clear();
-}
-
-static const char* reltosrel(WasaQuery::Rel rel)
-{
-    switch (rel) {
-    case WasaQuery::REL_EQUALS: return "=";
-    case WasaQuery::REL_CONTAINS: return ":";
-    case WasaQuery::REL_LT: return "<";
-    case WasaQuery::REL_LTE: return "<=";
-    case WasaQuery::REL_GT: return ">";
-    case WasaQuery::REL_GTE: return ">=";
-    default: return "?";
-    }
-}
-
-void WasaQuery::describe(string &desc) const
-{
-    desc += "(";
-    string fieldspec = m_fieldspec.empty() ? string() : m_fieldspec + 
-	reltosrel(m_rel);
-    switch (m_op) {
-    case OP_NULL: 
-	desc += "NULL"; 
-	break;
-    case OP_LEAF: 
-	if (m_exclude)
-	    desc += "NOT (";
-	desc += fieldspec + m_value;
-	if (m_exclude)
-	    desc += ")";
-	break;
-    case OP_OR: 
-    case OP_AND:
-	for (vector<WasaQuery *>::const_iterator it = m_subs.begin();
-	     it != m_subs.end(); it++) {
-	    (*it)->describe(desc);
-	    vector<WasaQuery *>::const_iterator it1 = it;
-	    it1++;
-	    if (it1 != m_subs.end())
-		desc += m_op == OP_OR ? "OR ": "AND ";
-	}
-	break;
-    }
-    if (desc[desc.length() - 1] == ' ')
-	desc.erase(desc.length() - 1);
-    desc += ")"; 
-    if (m_modifiers != 0) {
-	if (m_modifiers & WQM_BOOST)     desc += "BOOST|";
-	if (m_modifiers & WQM_CASESENS)  desc += "CASESENS|";
-	if (m_modifiers & WQM_DIACSENS)  desc += "DIACSENS|";
-	if (m_modifiers & WQM_FUZZY)     desc += "FUZZY|";
-	if (m_modifiers & WQM_NOSTEM)    desc += "NOSTEM|";
-	if (m_modifiers & WQM_PHRASESLACK) {
-	    char buf[100];
-	    sprintf(buf, "%d", m_slack);
-	    desc += "PHRASESLACK(" + string(buf) + string(")|");
-	}
-	if (m_modifiers & WQM_PROX)      desc += "PROX|";
-	if (m_modifiers & WQM_REGEX)     desc += "REGEX|";
-	if (m_modifiers & WQM_SLOPPY)    desc += "SLOPPY|";
-	if (m_modifiers & WQM_WORDS)     desc += "WORDS|";
-
-	if (desc.length() > 0 && desc[desc.length()-1] == '|')
-	    desc.erase(desc.length()-1);
-    }
-    desc += " ";
-}
-
-// The string query parser code:
-
-/* Shamelessly lifted from Beagle:			
- * This is our regular Expression Pattern:
- * we expect something like this:
- * -key:"Value String"modifiers
- * key:Value
- * or
- * Value
-*/
-
-/* The master regular expression used to parse a query string
- * Sub-expressions in parenthesis are numbered from 1. Each opening
- * parenthesis increases the index, but we're not interested in all
- * Deviations from standard:
- *  Relation: the standard-conformant line read as (release<1.16):
-        "(:|=|<|>|<=|>=)"            //7 Relation
-    but we are not actually making use of the relation type
-    (interpreting all as ":"), and this can product unexpected results
-    as a (ie pasted) search for nonexfield=value will silently drop
-    the nonexfield part, while the user probably was not aware of
-    triggering a field search (expecting just ':' to do this).
- */
-static const char * parserExpr = 
-    "(OR|\\|\\|)[[:space:]]*"        //1 OR,|| 
-    "|"
-    "(AND|&&)[[:space:]]*"           // 2 AND,&& (ignored, default)
-    "|"
-    "("                              //3 
-      "([+-])?"                      //4 Force or exclude indicator
-      "("                            //5
-        "([[:alpha:]][[:alnum:]:]*)" //6 Field spec: ie: "dc:title:letitre"
-        "[[:space:]]*"
-        "(:|=|>|<)"            //7 Relation
-        "[[:space:]]*)?"
-      "("                            //8
-        "(\""                        //9
-          "([^\"]+)"                 //10 "A quoted term"
-        "\")"                        
-        "([bcCdDeflLoprsw.0-9]*)"             //11 modifiers
-        "|"
-        "([^[:space:]\"]+)"          //12 ANormalTerm
-      ")"
-    ")[[:space:]]*"
+
+#include <iostream>
+#include <string>
+
+#include "searchdata.h"
+#include "wasaparserdriver.h"
+#include "wasaparse.tab.h"
+
+using namespace std;
+
+int yylex(yy::parser::semantic_type *, WasaParserDriver *);
+void yyerror(char const *);
+static void qualify(Rcl::SearchDataClauseDist *, const string &);
+
+static void addSubQuery(WasaParserDriver *d,
+                        Rcl::SearchData *sd, Rcl::SearchData *sq)
+{
+    sd->addClause(new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>(sq)));
+}
+
+%}
+
+%skeleton "lalr1.cc"
+%defines
+%error-verbose
+
+%parse-param {WasaParserDriver* d}
+%lex-param {WasaParserDriver* d}
+
+%union {
+    std::string *str;
+    Rcl::SearchDataClauseSimple *cl;
+    Rcl::SearchData *sd;
+}
+%destructor {delete $$;} <str>
+
+%type <cl> qualquote
+%type <cl> fieldexpr
+%type <cl> term
+%type <sd> query
+%type <str> complexfieldname
+
+ /* Non operator tokens need precedence because of the possibility of
+    concatenation which needs to have lower prec than OR */
+%left <str> WORD
+%left <str> QUOTED
+%left <str> QUALIFIERS
+%left AND UCONCAT
+%left OR
+
+%token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER
+
+%%
+
+topquery: query
+{
+    d->m_result = $1;
+}
+
+query: 
+query query %prec UCONCAT
+{
+    //cerr << "q: query query" << endl;
+    Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+    addSubQuery(d, sd, $1);
+    addSubQuery(d, sd, $2);
+    $$ = sd;
+}
+| query AND query
+{
+    //cerr << "q: query AND query" << endl;
+    Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+    addSubQuery(d, sd, $1);
+    addSubQuery(d, sd, $3);
+    $$ = sd;
+}
+| query OR query
+{
+    //cerr << "q: query OR query" << endl;
+    Rcl::SearchData *top = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+    Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, d->m_stemlang);
+    addSubQuery(d, sd, $1);
+    addSubQuery(d, sd, $3);
+    addSubQuery(d, top, sd);
+    $$ = top;
+}
+| '(' query ')' 
+{
+    //cerr << "q: ( query )" << endl;
+    $$ = $2;
+}
+|
+fieldexpr %prec UCONCAT
+{
+    //cerr << "q: fieldexpr" << endl;
+    Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
+    d->addClause(sd, $1);
+    $$ = sd;
+}
 ;
 
-// For debugging the parser. But see also NMATCH
-static const char *matchNames[] = {
-     /* 0*/   "",
-     /* 1*/   "OR",
-     /* 2*/   "AND",
-     /* 3*/   "",
-     /* 4*/   "+-",
-     /* 5*/   "",
-     /* 6*/   "FIELD",
-     /* 7*/   "RELATION",
-     /* 8*/   "",
-     /* 9*/   "",
-     /*10*/   "QUOTEDTERM",
-     /*11*/   "MODIFIERS",
-     /*12*/   "TERM",
-};
-#define NMATCH (sizeof(matchNames) / sizeof(char *))
-
-// Symbolic names for the interesting submatch indices
-enum SbMatchIdx {SMI_OR=1, SMI_AND=2, SMI_PM=4, SMI_FIELD=6, SMI_REL=7,
-		 SMI_QUOTED=10, SMI_MODIF=11, SMI_TERM=12};
-
-static const int maxmatchlen = 1024;
-static const int errbuflen = 300;
-
-class StringToWasaQuery::Internal {
-public:
-    Internal() 
-	: m_rxneedsfree(false)
-    {}
-    ~Internal()
-    {
-	if (m_rxneedsfree)
-	    regfree(&m_rx);
-    }
-    bool checkSubMatch(int i, char *match, string& reason)
-    {
-	if (i < 0 || i >= int(NMATCH) || m_pmatch[i].rm_so == -1) {
-	    //DPRINT((stderr, "checkSubMatch: no match: i %d rm_so %d\n", 
-	    //i, m_pmatch[i].rm_so));
-	    return false;
-	}
-	if (m_pmatch[i].rm_eo - m_pmatch[i].rm_so <= 0) {
-	    // weird and fatal
-	    reason = "Internal regular expression handling error";
-	    return false;
-	}
-	//DPRINT((stderr, "checkSubMatch: so %d eo %d\n", m_pmatch[i].rm_so, 
-	//m_pmatch[i].rm_eo));
-	memcpy(match, m_cp + m_pmatch[i].rm_so, 
-	       m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
-	match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
-	return true;
-    }
-
-    WasaQuery *stringToQuery(const string& str, string& reason);
-
-    friend class StringToWasaQuery;
-private:
-    const char *m_cp;
-    regex_t     m_rx;
-    bool        m_rxneedsfree;
-    regmatch_t  m_pmatch[NMATCH];
-};
-
-StringToWasaQuery::StringToWasaQuery() 
-    : internal(new Internal)
-{
-}
-
-StringToWasaQuery::~StringToWasaQuery()
-{
-    delete internal;
-}
-
-WasaQuery *
-StringToWasaQuery::stringToQuery(const string& str, string& reason)
-{
-    if (internal == 0)
-	return 0;
-    WasaQuery *wq = internal->stringToQuery(str, reason);
-    DUMPQ(wq);
-    return wq;
-}
-
-WasaQuery *
-StringToWasaQuery::Internal::stringToQuery(const string& str, string& reason)
-{
-    if (m_rxneedsfree)
-	regfree(&m_rx);
-
-    char errbuf[errbuflen+1];
-    int errcode;
-    if ((errcode = regcomp(&m_rx, parserExpr, REG_EXTENDED)) != 0) {
-	regerror(errcode, &m_rx, errbuf, errbuflen);
-	reason = errbuf;
-	return 0;
-    }
-    m_rxneedsfree = true;
-
-    const char *cpe;
-    m_cp = str.c_str();
-    cpe = str.c_str() + str.length();
-
-    WasaQuery *query = new WasaQuery;
-    query->m_op = WasaQuery::OP_AND;
-    WasaQuery *orChain = 0;
-    bool prev_or = false;
-
-    // Loop on repeated regexp matches on the main string.
-    for (int loop = 0;;loop++) {
-	if ((errcode = regexec(&m_rx, m_cp, NMATCH, m_pmatch, 0))) {
-	    regerror(errcode, &m_rx, errbuf, errbuflen);
-	    reason = errbuf;
-	    return 0;
-	}
-	if (m_pmatch[0].rm_eo <= 0) {
-	    // weird and fatal
-	    reason = "Internal regular expression handling error";
-	    return 0;
-	}
-
-#ifdef DEB_WASASTRINGTOQ
-	DPRINT((stderr, "Next part:\n"));
-	for (unsigned int i = 0; i < NMATCH; i++) {
-	    if (m_pmatch[i].rm_so == -1) 	continue;
-	    char match[maxmatchlen+1];
-	    memcpy(match, m_cp + m_pmatch[i].rm_so,
-		   m_pmatch[i].rm_eo - m_pmatch[i].rm_so);
-	    match[m_pmatch[i].rm_eo - m_pmatch[i].rm_so] = 0;
-	    if (matchNames[i][0])
-		DPRINT((stderr, "%10s: [%s] (%d->%d)\n", matchNames[i], match, 
-			(int)m_pmatch[i].rm_so, (int)m_pmatch[i].rm_eo));
-	}
-#endif
-
-	char match[maxmatchlen+1];
-	if (checkSubMatch(SMI_OR, match, reason)) {
-	    if (prev_or) {
-		// Bad syntax
-		reason = "Bad syntax: consecutive OR";
-		return 0;
-	    }
-
-	    if (orChain == 0) {
-		// Fist OR seen: start OR subclause.
-		if ((orChain = new WasaQuery()) == 0) {
-		    reason = "Out of memory";
-		    return 0;
-		}
-		orChain->m_op = WasaQuery::OP_OR;
-	    }
-
-	    // For the first OR, we need to transfer the previous
-	    // query from the main vector to the OR subquery
-	    if (orChain->m_subs.empty() && !query->m_subs.empty()) {
-		orChain->m_subs.push_back(query->m_subs.back());
-		query->m_subs.pop_back();
-	    }
-	    prev_or = true;
-
-	} else if (checkSubMatch(SMI_AND, match, reason)) {
-	    // Do nothing, AND is the default. We might want to check for 
-	    // errors like consecutive ANDs, or OR AND
-
-	} else {
-
-	    WasaQuery *nclause = new WasaQuery;
-	    if (nclause == 0) {
-		reason = "Out of memory";
-		return 0;
-	    }
-
-	    // Check for quoted or unquoted value
-	    unsigned int mods = 0;
-	    if (checkSubMatch(SMI_QUOTED, match, reason)) {
-		nclause->m_value = match;
-                mods |= WasaQuery::WQM_QUOTED;
-	    } else if (checkSubMatch(SMI_TERM, match, reason)) {
-		nclause->m_value = match;
-	    }
-
-	    if (nclause->m_value.empty()) {
-		// Isolated +- or fieldname: without a value. Ignore until
-		// told otherwise.
-		DPRINT((stderr, "Clause with empty value, skipping\n"));
-		delete nclause;
-		goto nextfield;
-	    }
-	    
-	    if (checkSubMatch(SMI_MODIF, match, reason)) {
-		DPRINT((stderr, "Got modifiers: [%s]\n", match));
-		for (unsigned int i = 0; i < strlen(match); i++) {
-		    switch (match[i]) {
-		    case 'b': 
-			mods |= WasaQuery::WQM_BOOST; 
-			nclause->m_weight = 10.0;
-			break;
-		    case 'c': break;
-		    case 'C': mods |= WasaQuery::WQM_CASESENS; break;
-		    case 'd': break;
-		    case 'D': mods |= WasaQuery::WQM_DIACSENS; break;
-		    case 'e': mods |= WasaQuery::WQM_CASESENS | 
-			    WasaQuery::WQM_DIACSENS |  
-			    WasaQuery::WQM_NOSTEM; 
-			break;
-		    case 'f': mods |= WasaQuery::WQM_FUZZY; break;
-		    case 'l': mods |= WasaQuery::WQM_NOSTEM; break;
-		    case 'L': break;
-		    case 'o': 
-			mods |= WasaQuery::WQM_PHRASESLACK; 
-			// Default slack if specified only by 'o' is 10.
-			nclause->m_slack = 10;
-			if (i < strlen(match) - 1) {
-			    char *endptr;
-			    int slack = strtol(match+i+1, &endptr, 10);
-			    if (endptr != match+i+1) {
-				i += endptr - (match+i+1);
-				nclause->m_slack = slack;
-			    }
-			}
-			break;
-		    case 'p': 
-			mods |= WasaQuery::WQM_PROX; 
-			nclause->m_slack = 10;
-			break;
-		    case 'r': mods |= WasaQuery::WQM_REGEX; break;
-		    case 's': mods |= WasaQuery::WQM_SLOPPY; break;
-		    case 'w': mods |= WasaQuery::WQM_WORDS; break;
-		    case '.':case '0':case '1':case '2':case '3':case '4':
-		    case '5':case '6':case '7':case '8':case '9':
-		    {
-			int n;
-			float factor;
-			if (sscanf(match+i, "%f %n", &factor, &n)) {
-			    nclause->m_weight = factor;
-			    DPRINT((stderr, "Got factor %.2f len %d\n",
-				    factor, n));
-			}
-			if (n)
-			    i += n-1;
-		    }
-		    }
-		}
-	    }
-	    nclause->m_modifiers = WasaQuery::Modifier(mods);
-
-	    // Field indicator ?
-	    if (checkSubMatch(SMI_FIELD, match, reason)) {
-		// We used Check for special fields indicating sorting
-		// etc. here but this went away from the spec. See 1.4
-		// if it comes back
-		nclause->m_fieldspec = match;
-		if (checkSubMatch(SMI_REL, match, reason)) {
-		    switch (match[0]) {
-		    case '=':nclause->m_rel = WasaQuery::REL_EQUALS;break;
-		    case ':':nclause->m_rel = WasaQuery::REL_CONTAINS;break;
-		    case '<':
-			if (match[1] == '=')
-			    nclause->m_rel = WasaQuery::REL_LTE;
-			else
-			    nclause->m_rel = WasaQuery::REL_LT;
-			break;
-		    case '>':
-			if (match[1] == '=')
-			    nclause->m_rel = WasaQuery::REL_GTE;
-			else
-			    nclause->m_rel = WasaQuery::REL_GT;
-			break;
-		    default:
-			nclause->m_rel = WasaQuery::REL_CONTAINS;
-		    }
-		} else {
-		    // ?? If field matched we should have a relation
-		    nclause->m_rel = WasaQuery::REL_CONTAINS;
-		}
-	    }
-
-	    nclause->m_op = WasaQuery::OP_LEAF;
-	    // +- indicator ?
-	    if (checkSubMatch(SMI_PM, match, reason) && match[0] == '-') {
-		nclause->m_exclude = true;
-	    } else {
-		nclause->m_exclude = false;
-	    }
-
-	    if (prev_or) {
-		// The precedent token was an OR, add new clause to or chain
-		//DPRINT((stderr, "Adding to OR chain\n"));
-		orChain->m_subs.push_back(nclause);
-	    } else {
-		if (orChain) {
-		    // Getting out of OR. Add the OR subquery to the main one
-		    //DPRINT((stderr, "Adding OR chain to main\n"));
-		    query->m_subs.push_back(orChain);
-		    orChain = 0;
-		} 
-		//DPRINT((stderr, "Adding to main chain\n"));
-		// Add new clause to main query
-		query->m_subs.push_back(nclause);
-	    }
-
-	    prev_or = false;
-	}
-
-    nextfield:
-	// Advance current string position. We checked earlier that
-	// the increment is strictly positive, so we won't loop
-	// forever
-	m_cp += m_pmatch[0].rm_eo;
-	if (m_cp >= cpe)
-	    break;
-    }
-
-    if (orChain) {
-	// Getting out of OR. Add the OR subquery to the main one
-	DPRINT((stderr, "Adding OR chain to main.Before: \n"));
-	DUMPQ(query);
-	DUMPQ(orChain);
-	query->m_subs.push_back(orChain);
-    }
-
-    regfree(&m_rx);
-    m_rxneedsfree = false;
-    return query;
-}
-
-#else // TEST
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "wasastringtoquery.h"
-
-static char *thisprog;
-
-int main(int argc, char **argv)
-{
-    thisprog = argv[0];
-    argc--; argv++;
-
-    if (argc != 1) {
-	fprintf(stderr, "need one arg\n");
-	exit(1);
-    }
-    const string str = *argv++;argc--;
-    string reason;
-    StringToWasaQuery qparser;
-    WasaQuery *q = qparser.stringToQuery(str, reason);
-    if (q == 0) {
-	fprintf(stderr, "stringToQuery failed: %s\n", reason.c_str());
-	exit(1);
-    }
-    string desc;
-    q->describe(desc);
-    fprintf(stderr, "Finally: %s\n", desc.c_str());
-    exit(0);
-}
-
-#endif // TEST_WASASTRINGTOQUERY
+fieldexpr: term 
+{
+    // cerr << "fe: simple fieldexpr: " << $1->gettext() << endl;
+    $$ = $1;
+}
+| complexfieldname EQUALS term 
+{
+    // cerr << "fe: " << *$1 << " = " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_EQUALS);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname CONTAINS term 
+{
+    // cerr << "fe: " << *$1 << " : " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_CONTAINS);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname SMALLER term 
+{
+    // cerr << "fe: " << *$1 << " < " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_LT);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname SMALLEREQ term 
+{
+    // cerr << "fe: " << *$1 << " <= " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_LTE);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname GREATER term 
+{
+    // cerr << "fe: "  << *$1 << " > " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_GT);
+    $$ = $3;
+    delete $1;
+}
+| complexfieldname GREATEREQ term 
+{
+    // cerr << "fe: " << *$1 << " >= " << $3->gettext() << endl;
+    $3->setfield(*$1);
+    $3->setrel(Rcl::SearchDataClause::REL_GTE);
+    $$ = $3;
+    delete $1;
+}
+| '-' fieldexpr 
+{
+    // cerr << "fe: - fieldexpr[" << $2->gettext() << "]" << endl;
+    $2->setexclude(true);
+    $$ = $2;
+}
+;
+
+/* Deal with field names like dc:title */
+complexfieldname: 
+WORD
+{
+    // cerr << "cfn: WORD" << endl;
+    $$ = $1;
+}
+|
+complexfieldname CONTAINS WORD
+{
+    // cerr << "cfn: complexfieldname ':' WORD" << endl;
+    $$ = new string(*$1 + string(":") + *$3);
+    delete $1;
+    delete $3;
+}
+
+term: 
+WORD
+{
+    //cerr << "term[" << *$1 << "]" << endl;
+    $$ = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *$1);
+    delete $1;
+}
+| qualquote 
+{
+    $$ = $1;
+}
+
+qualquote: 
+QUOTED
+{
+    // cerr << "QUOTED[" << *$1 << "]" << endl;
+    $$ = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
+    delete $1;
+}
+| QUOTED QUALIFIERS 
+{
+    // cerr << "QUOTED[" << *$1 << "] QUALIFIERS[" << *$2 << "]" << endl;
+    Rcl::SearchDataClauseDist *cl = 
+        new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
+    qualify(cl, *$2);
+    $$ = cl;
+    delete $1;
+    delete $2;
+}
+
+
+%%
+
+#include <ctype.h>
+
+// Look for int at index, skip and return new index found? value.
+static unsigned int qualGetInt(const string& q, unsigned int cur, int *pval)
+{
+    unsigned int ncur = cur;
+    if (cur < q.size() - 1) {
+        char *endptr;
+        int val = strtol(&q[cur + 1], &endptr, 10);
+        if (endptr != &q[cur + 1]) {
+            ncur += endptr - &q[cur + 1];
+            *pval = val;
+        }
+    }
+    return ncur;
+}
+
+static void qualify(Rcl::SearchDataClauseDist *cl, const string& quals)
+{
+    // cerr << "qualify(" << cl << ", " << quals << ")" << endl;
+    for (unsigned int i = 0; i < quals.length(); i++) {
+        //fprintf(stderr, "qual char %c\n", quals[i]);
+        switch (quals[i]) {
+        case 'b': 
+            cl->setWeight(10.0);
+            break;
+        case 'c': break;
+        case 'C': 
+            cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
+            break;
+        case 'd': break;
+        case 'D':  
+            cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
+            break;
+        case 'e': 
+            cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
+            cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
+            cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
+            break;
+        case 'l': 
+            cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
+            break;
+        case 'L': break;
+        case 'o':  
+        {
+            int slack = 10;
+            i = qualGetInt(quals, i, &slack);
+            cl->setslack(slack);
+            //cerr << "set slack " << cl->getslack() << " done" << endl;
+        }
+        break;
+        case 'p': 
+            cl->setTp(Rcl::SCLT_NEAR);
+            if (cl->getslack() == 0) {
+                cl->setslack(10);
+                //cerr << "set slack " << cl->getslack() << " done" << endl;
+            }
+            break;
+        case '.':case '0':case '1':case '2':case '3':case '4':
+        case '5':case '6':case '7':case '8':case '9':
+        {
+            int n = 0;
+            float factor = 1.0;
+            if (sscanf(&(quals[i]), "%f %n", &factor, &n)) {
+                if (factor != 1.0) {
+                    cl->setWeight(factor);
+                }
+            }
+            if (n > 0)
+                i += n - 1;
+        }
+        default:
+            break;
+        }
+    }
+}
+
+
+// specialstartchars are special only at the beginning of a token
+// (e.g. doctor-who is a term, not 2 terms separated by '-')
+static const string specialstartchars("-");
+// specialinchars are special everywhere except inside a quoted string
+static const string specialinchars(":=<>()");
+
+// Called with the first dquote already read
+static int parseString(WasaParserDriver *d, yy::parser::semantic_type *yylval)
+{
+    string* value = new string();
+    d->qualifiers().clear();
+    int c;
+    while ((c = d->GETCHAR())) {
+        switch (c) {
+        case '\\':
+            /* Escape: get next char */
+            c = d->GETCHAR();
+            if (c == 0) {
+                value->push_back(c);
+                goto out;
+            }
+            value->push_back(c);
+            break;
+        case '"':
+            /* End of string. Look for qualifiers */
+            while ((c = d->GETCHAR()) && !isspace(c))
+                d->qualifiers().push_back(c);
+            goto out;
+        default:
+            value->push_back(c);
+        }
+    }
+out:
+    //cerr << "GOT QUOTED ["<<value<<"] quals [" << d->qualifiers() << "]" << endl;
+    yylval->str = value;
+    return yy::parser::token::QUOTED;
+}
+
+
+int yylex(yy::parser::semantic_type *yylval, WasaParserDriver *d)
+{
+    if (!d->qualifiers().empty()) {
+        yylval->str = new string();
+        yylval->str->swap(d->qualifiers());
+        return yy::parser::token::QUALIFIERS;
+    }
+
+    int c;
+
+    /* Skip white space.  */
+    while ((c = d->GETCHAR()) && isspace(c))
+        continue;
+
+    if (c == 0)
+        return 0;
+
+    if (specialstartchars.find_first_of(c) != string::npos) {
+        //cerr << "yylex: return " << c << endl;
+        return c;
+    }
+
+    // field-term relations
+    switch (c) {
+    case '=': return yy::parser::token::EQUALS;
+    case ':': return yy::parser::token::CONTAINS;
+    case '<': {
+        int c1 = d->GETCHAR();
+        if (c1 == '=') {
+            return yy::parser::token::SMALLEREQ;
+        } else {
+            d->UNGETCHAR(c1);
+            return yy::parser::token::SMALLER;
+        }
+    }
+    case '>': {
+        int c1 = d->GETCHAR();
+        if (c1 == '=') {
+            return yy::parser::token::GREATEREQ;
+        } else {
+            d->UNGETCHAR(c1);
+            return yy::parser::token::GREATER;
+        }
+    }
+    case '(': case ')':
+        return c;
+    }
+        
+    if (c == '"')
+        return parseString(d, yylval);
+
+    d->UNGETCHAR(c);
+
+    // Other chars start a term or field name or reserved word
+    string* word = new string();
+    while ((c = d->GETCHAR())) {
+        if (isspace(c)) {
+            //cerr << "Word broken by whitespace" << endl;
+            break;
+        } else if (specialinchars.find_first_of(c) != string::npos) {
+            //cerr << "Word broken by special char" << endl;
+            d->UNGETCHAR(c);
+            break;
+        } else if (c == 0) {
+            //cerr << "Word broken by EOF" << endl;
+            break;
+        } else {
+            word->push_back(c);
+        }
+    }
+    
+    if (!word->compare("AND") || !word->compare("&&")) {
+        delete word;
+        return yy::parser::token::AND;
+    } else if (!word->compare("OR") || !word->compare("||")) {
+        delete word;
+        return yy::parser::token::OR;
+    }
+
+//    cerr << "Got word [" << word << "]" << endl;
+    yylval->str = word;
+    return yy::parser::token::WORD;
+}

src/query/wasastringtoquery.h to src/query/wasaparserdriver.h

--- a/src/query/wasastringtoquery.h
+++ b/src/query/wasaparserdriver.h
@@ -14,99 +14,68 @@
  *   Free Software Foundation, Inc.,
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
-#ifndef _WASASTRINGTOQUERY_H_INCLUDED_
-#define _WASASTRINGTOQUERY_H_INCLUDED_
+#ifndef _WASAPARSERDRIVER_H_INCLUDED_
+#define _WASAPARSERDRIVER_H_INCLUDED_
 
 #include <string>
-#include <vector>
+#include <stack>
 
-using std::string;
-using std::vector;
-/* Note: Xesam used to be named wasabi. We changed the references to wasabi in
-   the comments, but not the code */
+class WasaParserDriver;
+namespace Rcl {
+    class SearchData;
+    class SearchDataClauseSimple;
+}
+namespace yy {
+    class parser;
+}
 
-/** 
- * A simple class to represent a parsed Xesam user language element. 
- * Can hold one leaf element or an array of subqueries to be joined by AND/OR
- *
- * The complete query is represented by a top WasaQuery holding a
- * chain of ANDed subclauses. Some of the subclauses may be themselves
- * OR'ed lists (it doesn't go deeper). Entries in the AND list may be
- * negated (AND NOT).
- *
- * For LEAF elements, the value can hold one or several words. In the
- * latter case, it should be interpreted as a phrase (comes from a
- * user-entered "quoted string"), except if the modifier flags say otherwise.
- * 
- * Some fields only make sense either for compound or LEAF queries. This 
- * is commented for each. We should subclass really.
- *
- * Note that wasaStringToQuery supposedly parses the whole Xesam 
- * User Search Language v 0.95, but that some elements are dropped or
- * ignored during the translation to a native Recoll query in wasaToRcl
- */
-class WasaQuery {
+class RclConfig;
+
+class WasaParserDriver {
 public:
-    /** Type of this element: leaf or AND/OR chain */
-    enum Op {OP_NULL, OP_LEAF, OP_OR, OP_AND};
-    /** Relation to be searched between field and value. Recoll actually only
-	supports "contain" except for a size field */
-    enum Rel {REL_NULL, REL_EQUALS, REL_CONTAINS, REL_LT, REL_LTE, 
-	      REL_GT, REL_GTE};
-    /** Modifiers for terms: case/diacritics handling,
-	stemming control... */
-    enum Modifier {WQM_CASESENS = 1, WQM_DIACSENS = 2, WQM_NOSTEM = 4, 
-		   WQM_BOOST = 8, WQM_PROX = 0x10, WQM_SLOPPY = 0x20, 
-		   WQM_WORDS = 0x40, WQM_PHRASESLACK = 0x80, WQM_REGEX = 0x100,
-		   WQM_FUZZY = 0x200, WQM_QUOTED = 0x400};
+    
+    WasaParserDriver(const RclConfig *c, const std::string sl, 
+                     const std::string& as)
+        : m_stemlang(sl), m_autosuffs(as), m_config(c),
+          m_index(0), m_result(0) {}
 
-    typedef vector<WasaQuery*> subqlist_t;
+    Rcl::SearchData *parse(const std::string&);
+    bool addClause(Rcl::SearchData *sd, Rcl::SearchDataClauseSimple* cl);
 
-    WasaQuery() 
-	: m_op(OP_NULL), m_rel(REL_NULL), m_exclude(false), 
-	  m_modifiers(0), m_slack(0), m_weight(1.0)
-    {}
+    int GETCHAR();
+    void UNGETCHAR(int c);
 
-    ~WasaQuery();
+    std::string& qualifiers() {
+        return m_qualifiers;
+    }
+    void setreason(const std::string& reason) {
+        m_reason = reason;
+    }
+    const std::string& getreason() const {
+        return m_reason;
+    }
+    
+private:
+    friend class yy::parser;
 
-    /** Get string describing the query tree from this point */
-    void describe(string &desc) const;
+    std::string m_stemlang;
+    std::string m_autosuffs;
+    const RclConfig  *m_config;
 
-    /** Op to be performed on either value (may be LEAF or EXCL, or subqs */
-    WasaQuery::Op      m_op;
+    std::string m_input;
+    unsigned int m_index;
+    std::stack<int> m_returns;
+    Rcl::SearchData *m_result;
 
-    /** Field specification if any (ie: title, author ...) Only OPT_LEAF */
-    string             m_fieldspec;
-    /** Relation between field and value: =, :, <,>,<=, >= */
-    WasaQuery::Rel     m_rel;
+    std::string m_reason;
 
-    /* Negating flag */
-    bool             m_exclude;
-
-    /* String value. Valid for op == OP_LEAF or EXCL */
-    string             m_value;
-
-    /** Subqueries. Valid for conjunctions */
-    vector<WasaQuery*> m_subs;
-    
-    unsigned int   m_modifiers;
-    int            m_slack;
-    float          m_weight;
+    // Let the quoted string reader store qualifiers in there, simpler
+    // than handling this in the parser, because their nature is
+    // determined by the absence of white space after the closing
+    // dquote. e.g "some term"abc. We could avoid this by making white
+    // space a token.
+    std::string m_qualifiers;
 };
 
-/**
- * Wasabi query string parser class. Could be a simple function
- * really, but there might be some parser initialization work done in
- * the constructor.
- */
-class StringToWasaQuery {
-public:
-    StringToWasaQuery();
-    ~StringToWasaQuery();
-    WasaQuery *stringToQuery(const string& str, string& reason);
-    class Internal;
-private:
-    Internal *internal;
-};
 
-#endif /* _WASASTRINGTOQUERY_H_INCLUDED_ */
+#endif /* _WASAPARSERDRIVER_H_INCLUDED_ */

src/query/wasatorcl.cpp to src/query/wasaparse.cpp

--- a/src/query/wasatorcl.cpp
+++ b/src/query/wasaparse.cpp
@@ -14,273 +14,222 @@
  *   Free Software Foundation, Inc.,
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
-#include <cstdio>
-#include <string>
-#include <list>
-#include <algorithm>
-using std::string;
-using std::list;
-
-#include "rclconfig.h"
-#include "wasastringtoquery.h"
-#include "rcldb.h"
+#include "autoconfig.h"
+
+#include <iostream>
+
+#include "wasatorcl.h"
+#include "wasaparserdriver.h"
 #include "searchdata.h"
-#include "wasatorcl.h"
 #include "debuglog.h"
-#include "smallut.h"
-#include "rclconfig.h"
-#include "refcntr.h"
-#include "textsplit.h"
-
-static Rcl::SearchData *wasaQueryToRcl(const RclConfig *config, 
-				       const string& stemlang,
-				       WasaQuery *wasa, 
-				       const string& autosuffs, string& reason)
-{
-    if (wasa == 0) {
-	reason = "NULL query";
-	return 0;
-    }
-    if (wasa->m_op != WasaQuery::OP_AND && wasa->m_op != WasaQuery::OP_OR) {
-	reason = "Top query neither AND nor OR ?";
-	LOGERR(("wasaQueryToRcl: top query neither AND nor OR!\n"));
-	return 0;
-    }
-
-    Rcl::SearchData *sdata = new 
-	Rcl::SearchData(wasa->m_op == WasaQuery::OP_AND ? Rcl::SCLT_AND : 
-			Rcl::SCLT_OR, stemlang);
-    LOGDEB2(("wasaQueryToRcl: %s chain\n", wasa->m_op == WasaQuery::OP_AND ? 
-	     "AND" : "OR"));
-
-    WasaQuery::subqlist_t::iterator it;
-    Rcl::SearchDataClause *nclause;
-
-    // Walk the list of clauses. Some pseudo-field types need special
-    // processing, which results in setting data in the top struct
-    // instead of adding a clause. We check for these first
-    for (it = wasa->m_subs.begin(); it != wasa->m_subs.end(); it++) {
-
-	if (!stringicmp("mime", (*it)->m_fieldspec) ||
-	    !stringicmp("format", (*it)->m_fieldspec)) {
-	    if ((*it)->m_op == WasaQuery::OP_LEAF) {
-		if ((*it)->m_exclude) {
-		    sdata->remFiletype((*it)->m_value);
-		} else {
-		    sdata->addFiletype((*it)->m_value);
-		}
-	    } else {
-		reason = "internal error: mime clause not leaf??";
-		return 0;
-	    }
-	    continue;
-	} 
-
-	// Xesam uses "type", we also support "rclcat", for broad
-	// categories like "audio", "presentation", etc.
-	if (!stringicmp("rclcat", (*it)->m_fieldspec) ||
-	    !stringicmp("type", (*it)->m_fieldspec)) {
-	    if ((*it)->m_op != WasaQuery::OP_LEAF) {
-		reason = "internal error: rclcat/type clause not leaf??";
-		return 0;
-	    }
-	    vector<string> mtypes;
-	    if (config && config->getMimeCatTypes((*it)->m_value, mtypes)
-		&& !mtypes.empty()) {
-		for (vector<string>::iterator mit = mtypes.begin();
-		     mit != mtypes.end(); mit++) {
-		    if ((*it)->m_exclude) {
-			sdata->remFiletype(*mit);
-		    } else {
-			sdata->addFiletype(*mit);
-		    }
-		}
-	    } else {
-		reason = "Unknown rclcat/type value: no mime types found";
-		return 0;
-	    }
-	    continue;
-	}
-
-	// Handle "date" spec
-	if (!stringicmp("date", (*it)->m_fieldspec)) {
-	    if ((*it)->m_op != WasaQuery::OP_LEAF) {
-		reason = "Negative date filtering not supported";
-		return 0;
-	    }
-	    DateInterval di;
-	    if (!parsedateinterval((*it)->m_value, &di)) {
-		LOGERR(("wasaQueryToRcl: bad date interval format\n"));
-		reason = "Bad date interval format";
-		return 0;
-	    }
-	    LOGDEB(("wasaQueryToRcl:: date span:  %d-%d-%d/%d-%d-%d\n",
-		    di.y1,di.m1,di.d1, di.y2,di.m2,di.d2));
-	    sdata->setDateSpan(&di);
-	    continue;
-	} 
-
-	// Handle "size" spec
-	if (!stringicmp("size", (*it)->m_fieldspec)) {
-	    if ((*it)->m_op != WasaQuery::OP_LEAF) {
-		reason = "Negative size filtering not supported";
-		return 0;
-	    }
-	    char *cp;
-	    size_t size = strtoll((*it)->m_value.c_str(), &cp, 10);
-	    if (*cp != 0) {
-		switch (*cp) {
-		case 'k': case 'K': size *= 1E3;break;
-		case 'm': case 'M': size *= 1E6;break;
-		case 'g': case 'G': size *= 1E9;break;
-		case 't': case 'T': size *= 1E12;break;
-		default: 
-		    reason = string("Bad multiplier suffix: ") + *cp;
-		    return 0;
-		}
-	    }
-
-	    switch ((*it)->m_rel) {
-	    case WasaQuery::REL_EQUALS:
-		sdata->setMaxSize(size);
-		sdata->setMinSize(size);
-		break;
-	    case WasaQuery::REL_LT:
-	    case WasaQuery::REL_LTE:
-		sdata->setMaxSize(size);
-		break;
-	    case WasaQuery::REL_GT: 
-	    case WasaQuery::REL_GTE:
-		sdata->setMinSize(size);
-		break;
-	    default:
-		reason = "Bad relation operator with size query. Use > < or =";
-		return 0;
-	    }
-	    continue;
-	} 
-
-	// "Regular" processing follows:
-	unsigned int mods = (unsigned int)(*it)->m_modifiers;
-	LOGDEB0(("wasaQueryToRcl: clause modifiers 0x%x\n", mods));
-	nclause = 0;
-
-	switch ((*it)->m_op) {
-	case WasaQuery::OP_NULL:
-	case WasaQuery::OP_AND:
-	default:
-	    reason = "Found bad NULL or AND query type in list";
-	    LOGERR(("wasaQueryToRcl: found bad NULL or AND q type in list\n"));
-	    continue;
-
-	case WasaQuery::OP_LEAF: {
-	    LOGDEB0(("wasaQueryToRcl: leaf clause [%s:%s] slack %d excl %d\n", 
-		     (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str(),
-		     (*it)->m_slack, (*it)->m_exclude));
-
-            // Change terms found in the "autosuffs" list into "ext"
-            // field queries
-            if ((*it)->m_fieldspec.empty() && !autosuffs.empty()) {
-                vector<string> asfv;
-                if (stringToStrings(autosuffs, asfv)) {
-                    if (find_if(asfv.begin(), asfv.end(), 
-                                StringIcmpPred((*it)->m_value)) != asfv.end()) {
-                        (*it)->m_fieldspec = "ext";
-                        (*it)->m_modifiers |= WasaQuery::WQM_NOSTEM;
-                    }
+
+#define YYDEBUG 1
+
+#include "wasaparse.tab.h"
+
+using namespace std;
+using namespace Rcl;
+
+
+void
+yy::parser::error (const location_type& l, const std::string& m)
+{
+    d->setreason(m);
+}
+
+
+SearchData *wasaStringToRcl(const RclConfig *config,
+                                 const std::string& stemlang,
+                                 const std::string& query, string &reason,
+                                 const std::string& autosuffs)
+{
+    WasaParserDriver d(config, stemlang, autosuffs);
+    SearchData *sd = d.parse(query);
+    if (!sd) 
+        reason = d.getreason();
+    return sd;
+}
+
+SearchData *WasaParserDriver::parse(const std::string& in)
+{
+    m_input = in;
+    m_index = 0;
+    delete m_result;
+    m_result = 0;
+    m_returns = stack<int>();
+
+    yy::parser parser(this);
+    parser.set_debug_level(0);
+
+    if (parser.parse() != 0) {
+        delete m_result;
+        m_result = 0;
+    }
+
+    return m_result;
+}
+
+int WasaParserDriver::GETCHAR()
+{
+    if (!m_returns.empty()) {
+        int c = m_returns.top();
+        m_returns.pop();
+        return c;
+    }
+    if (m_index < m_input.size())
+        return m_input[m_index++];
+    return 0;
+}
+void WasaParserDriver::UNGETCHAR(int c)
+{
+    m_returns.push(c);
+}
+
+// Add clause to query, handling special pseudo-clauses for size/date
+// etc. (mostly determined on field name).
+bool WasaParserDriver::addClause(SearchData *sd, 
+                                 SearchDataClauseSimple* cl)
+{
+    if (cl->getfield().empty()) {
+        // Simple clause with empty field spec.
+        // Possibly change terms found in the "autosuffs" list into "ext"
+        // field queries
+        if (!m_autosuffs.empty()) {
+            vector<string> asfv;
+            if (stringToStrings(m_autosuffs, asfv)) {
+                if (find_if(asfv.begin(), asfv.end(), 
+                            StringIcmpPred(cl->gettext())) != asfv.end()) {
+                    cl->setfield("ext");
+                    cl->addModifier(SearchDataClause::SDCM_NOSTEMMING);
                 }
             }
-
-	    if (!stringicmp("dir", (*it)->m_fieldspec)) {
-		// dir filtering special case
-		nclause = new Rcl::SearchDataClausePath((*it)->m_value, 
-							(*it)->m_exclude);
-	    } else {
-		if ((*it)->m_exclude && wasa->m_op != WasaQuery::OP_AND) {
-		    LOGERR(("wasaQueryToRcl: excl clause inside OR list!\n"));
-		    continue;
-		}
-
-		if (mods & WasaQuery::WQM_QUOTED) {
-		    Rcl::SClType tp = (mods & WasaQuery::WQM_PROX)  ?
-			Rcl::SCLT_NEAR :
-			Rcl::SCLT_PHRASE;
-		    nclause = new Rcl::SearchDataClauseDist(tp, (*it)->m_value,
-							    (*it)->m_slack,
-							    (*it)->m_fieldspec);
-		} else {
-                    // If term has commas or slashes inside, take it
-                    // as a list, turn the slashes/commas to spaces,
-                    // leave unquoted. Otherwise, this would end up as
-                    // a phrase query. This is a handy way to enter
-                    // multiple terms to be searched inside a
-                    // field. We interpret ',' as AND, and '/' as
-                    // OR. No mixes allowed and ',' wins.
-		    Rcl::SClType tp = (*it)->m_exclude ? Rcl::SCLT_OR:
-			Rcl::SCLT_AND;
-                    string ns = neutchars((*it)->m_value, ",");
-                    if (ns.compare((*it)->m_value)) {
-                        // had ','
-                        tp = Rcl::SCLT_AND;
-                    } else {
-                        ns = neutchars((*it)->m_value, "/");
-                        if (ns.compare((*it)->m_value)) {
-                            tp = Rcl::SCLT_OR;
-                        }
-                    }
-		    nclause = new Rcl::SearchDataClauseSimple(tp, ns,
-                                                            (*it)->m_fieldspec);
-		}
-		nclause->setexclude((*it)->m_exclude);
-	    }
-
-	    if (nclause == 0) {
-		reason = "Out of memory";
-		LOGERR(("wasaQueryToRcl: out of memory\n"));
-		return 0;
-	    }
-	}
-	    break;
-	    
-	case WasaQuery::OP_OR:
-	    LOGDEB2(("wasaQueryToRcl: OR clause [%s]:[%s]\n", 
-		     (*it)->m_fieldspec.c_str(), (*it)->m_value.c_str()));
-	    // Create a subquery.
-	    Rcl::SearchData *sub = 
-		wasaQueryToRcl(config, stemlang, *it, autosuffs, reason);
-	    if (sub == 0) {
-		continue;
-	    }
-	    nclause = 
-		new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>(sub));
-	    if (nclause == 0) {
-		LOGERR(("wasaQueryToRcl: out of memory\n"));
-		reason = "Out of memory";
-		return 0;
-	    }
-	}
-
-	if (mods & WasaQuery::WQM_NOSTEM)
-	    nclause->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
-	if (mods & WasaQuery::WQM_DIACSENS)
-	    nclause->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
-	if (mods & WasaQuery::WQM_CASESENS)
-	    nclause->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
-	if ((*it)->m_weight != 1.0)
-	    nclause->setWeight((*it)->m_weight);
-	sdata->addClause(nclause);
-    }
-
-    return sdata;
-}
-
-Rcl::SearchData *wasaStringToRcl(const RclConfig *config, const string& stemlang,
-				 const string &qs, string &reason, 
-                                 const string& autosuffs)
-{
-    StringToWasaQuery parser;
-    WasaQuery *wq = parser.stringToQuery(qs, reason);
-    if (wq == 0) 
-	return 0;
-    return wasaQueryToRcl(config, stemlang, wq, autosuffs, reason);
-}
+        }
+        return sd->addClause(cl);
+    }
+
+
+    const string& fld = cl->getfield();
+
+    // MIME types and categories
+    if (!stringicmp("mime", fld) ||!stringicmp("format", fld)) {
+        if (cl->getexclude()) {
+            sd->remFiletype(cl->gettext());
+        } else {
+            sd->addFiletype(cl->gettext());
+        }
+        delete cl;
+        return true;
+    } 
+
+    if (!stringicmp("rclcat", fld) || !stringicmp("type", fld)) {
+        vector<string> mtypes;
+        if (m_config && m_config->getMimeCatTypes(cl->gettext(), mtypes)) {
+            for (vector<string>::iterator mit = mtypes.begin();
+                 mit != mtypes.end(); mit++) {
+                if (cl->getexclude()) {
+                    sd->remFiletype(*mit);
+                } else {
+                    sd->addFiletype(*mit);
+                }
+            }
+        }
+        delete cl;
+        return true;
+    }
+
+    // Handle "date" spec
+    if (!stringicmp("date", fld)) {
+        DateInterval di;
+        if (!parsedateinterval(cl->gettext(), &di)) {
+            LOGERR(("Bad date interval format: %s\n",
+                    cl->gettext().c_str()));
+            m_reason = "Bad date interval format";
+            delete cl;
+            return false;
+        }
+        LOGDEB(("addClause:: date span:  %d-%d-%d/%d-%d-%d\n",
+                di.y1,di.m1,di.d1, di.y2,di.m2,di.d2));
+        sd->setDateSpan(&di);
+        delete cl;
+        return true;
+    } 
+
+    // Handle "size" spec
+    if (!stringicmp("size", fld)) {
+        char *cp;
+        size_t size = strtoll(cl->gettext().c_str(), &cp, 10);
+        if (*cp != 0) {
+            switch (*cp) {
+            case 'k': case 'K': size *= 1E3;break;
+            case 'm': case 'M': size *= 1E6;break;
+            case 'g': case 'G': size *= 1E9;break;
+            case 't': case 'T': size *= 1E12;break;
+            default: 
+                m_reason = string("Bad multiplier suffix: ") + *cp;
+                delete cl;
+                return false;
+            }
+        }
+
+        SearchDataClause::Relation rel = cl->getrel();
+
+        delete cl;
+
+        switch (rel) {
+        case SearchDataClause::REL_EQUALS:
+            sd->setMaxSize(size);
+            sd->setMinSize(size);
+            break;
+        case SearchDataClause::REL_LT:
+        case SearchDataClause::REL_LTE:
+            sd->setMaxSize(size);
+            break;
+        case SearchDataClause::REL_GT: 
+        case SearchDataClause::REL_GTE:
+            sd->setMinSize(size);
+            break;
+        default:
+            m_reason = "Bad relation operator with size query. Use > < or =";
+            return false;
+        }
+        return true;
+    }
+
+    if (!stringicmp("dir", fld)) {
+        // dir filtering special case
+        SearchDataClausePath *nclause = 
+            new SearchDataClausePath(cl->gettext(), cl->getexclude());
+        delete cl;
+        sd->addClause(nclause);
+    }
+
+    if (cl->getTp() == SCLT_OR || cl->getTp() == SCLT_AND) {
+        // If this is a normal clause and the term has commas or
+        // slashes inside, take it as a list, turn the slashes/commas
+        // to spaces, leave unquoted. Otherwise, this would end up as
+        // a phrase query. This is a handy way to enter multiple terms
+        // to be searched inside a field. We interpret ',' as AND, and
+        // '/' as OR. No mixes allowed and ',' wins.
+        SClType tp = SCLT_FILENAME;// impossible value
+        string ns = neutchars(cl->gettext(), ",");
+        if (ns.compare(cl->gettext())) {
+            // had ','
+            tp = SCLT_AND;
+        } else {
+            ns = neutchars(cl->gettext(), "/");
+            if (ns.compare(cl->gettext())) {
+                // had not ',' but has '/'
+                tp = SCLT_OR;
+            }
+        }
+
+        if (tp != SCLT_FILENAME) {
+            SearchDataClauseSimple *ncl = 
+                new SearchDataClauseSimple(tp, ns, fld);
+            delete cl;
+            return sd->addClause(ncl);
+        }
+    }
+    return sd->addClause(cl);
+}
+