|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.84 2006-10-25 10:52:02 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.85 2006-10-30 12:59:44 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
19 |
*/
|
19 |
*/
|
20 |
#include <stdio.h>
|
20 |
#include <stdio.h>
|
21 |
#include <unistd.h>
|
21 |
#include <unistd.h>
|
22 |
#include <sys/stat.h>
|
22 |
#include <sys/stat.h>
|
23 |
#include <fnmatch.h>
|
23 |
#include <fnmatch.h>
|
|
|
24 |
#include <regex.h>
|
24 |
|
25 |
|
25 |
#include <iostream>
|
26 |
#include <iostream>
|
26 |
#include <string>
|
27 |
#include <string>
|
27 |
#include <vector>
|
28 |
#include <vector>
|
28 |
#include <algorithm>
|
29 |
#include <algorithm>
|
|
... |
|
... |
1171 |
sdata.description = sdata.description.substr(strlen("Xapian::Query"));
|
1172 |
sdata.description = sdata.description.substr(strlen("Xapian::Query"));
|
1172 |
LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
|
1173 |
LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
|
1173 |
return true;
|
1174 |
return true;
|
1174 |
}
|
1175 |
}
|
1175 |
|
1176 |
|
1176 |
list<string> Db::completions(const string &root, const string &lang, int max)
|
1177 |
// Characters that can begin a wildcard or regexp expression. We use skipto
|
|
|
1178 |
// to begin the allterms search with terms that begin with the portion of
|
|
|
1179 |
// the input string prior to these chars.
|
|
|
1180 |
const string wildSpecChars = "*?[";
|
|
|
1181 |
const string regSpecChars = "(.[{^";
|
|
|
1182 |
|
|
|
1183 |
// Find all index terms that match a wildcard or regular expression
|
|
|
1184 |
bool Db::termMatch(MatchType typ, const string &root, list<string>& res,
|
|
|
1185 |
const string &lang, int max)
|
1177 |
{
|
1186 |
{
|
1178 |
Xapian::Database db;
|
|
|
1179 |
list<string> res;
|
|
|
1180 |
if (!m_ndb || !m_ndb->m_isopen)
|
1187 |
if (!m_ndb || !m_ndb->m_isopen)
|
1181 |
return res;
|
1188 |
return false;
|
|
|
1189 |
Xapian::Database db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
|
|
|
1190 |
res.clear();
|
|
|
1191 |
// Get rid of capitals and accents
|
1182 |
string droot;
|
1192 |
string droot;
|
1183 |
dumb_string(root, droot);
|
1193 |
dumb_string(root, droot);
|
1184 |
db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
|
1194 |
string nochars = typ == ET_WILD ? wildSpecChars : regSpecChars;
|
|
|
1195 |
|
|
|
1196 |
regex_t reg;
|
|
|
1197 |
int errcode;
|
|
|
1198 |
if (typ == ET_REGEXP && (errcode=regcomp(®, droot.c_str(), 0))) {
|
|
|
1199 |
char errbuf[200];
|
|
|
1200 |
regerror(errcode, ®, errbuf, 199);
|
|
|
1201 |
LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
|
|
|
1202 |
res.push_back(errbuf);
|
|
|
1203 |
regfree(®);
|
|
|
1204 |
return false;
|
|
|
1205 |
}
|
|
|
1206 |
|
|
|
1207 |
// Find the initial section before any special char
|
|
|
1208 |
string::size_type es = droot.find_first_of(nochars);
|
|
|
1209 |
string is;
|
|
|
1210 |
switch (es) {
|
|
|
1211 |
case string::npos: is = droot;break;
|
|
|
1212 |
case 0: break;
|
|
|
1213 |
default: is = droot.substr(0, es);break;
|
|
|
1214 |
}
|
|
|
1215 |
LOGDEB(("termMatch: initsec: [%s]\n", is.c_str()));
|
|
|
1216 |
|
1185 |
Xapian::TermIterator it = db.allterms_begin();
|
1217 |
Xapian::TermIterator it = db.allterms_begin();
|
|
|
1218 |
if (!is.empty())
|
1186 |
it.skip_to(droot.c_str());
|
1219 |
it.skip_to(is.c_str());
|
1187 |
for (int n = 0;it != db.allterms_end(); it++) {
|
1220 |
for (int n = 0;it != db.allterms_end(); it++) {
|
1188 |
if ((*it).find(droot) != 0)
|
1221 |
// If we're beyond the terms matching the initial string, end
|
|
|
1222 |
if (!is.empty() && (*it).find(is) != 0)
|
1189 |
break;
|
1223 |
break;
|
|
|
1224 |
// Don't match special internal terms beginning with uppercase ascii
|
|
|
1225 |
if ((*it).at(0) >= 'A' && (*it).at(0) <= 'Z')
|
|
|
1226 |
continue;
|
|
|
1227 |
if (typ == ET_WILD) {
|
|
|
1228 |
if (fnmatch(droot.c_str(), (*it).c_str(), 0) == FNM_NOMATCH)
|
|
|
1229 |
continue;
|
|
|
1230 |
} else {
|
|
|
1231 |
if (regexec(®, (*it).c_str(), 0, 0, 0))
|
|
|
1232 |
continue;
|
|
|
1233 |
}
|
1190 |
if (lang.empty()) {
|
1234 |
if (lang.empty()) {
|
1191 |
res.push_back(*it);
|
1235 |
res.push_back(*it);
|
1192 |
++n;
|
1236 |
++n;
|
1193 |
} else {
|
1237 |
} else {
|
1194 |
list<string> stemexps = stemExpand(lang, *it);
|
1238 |
list<string> stemexps = stemExpand(lang, *it);
|
|
... |
|
... |
1203 |
if (n >= max)
|
1247 |
if (n >= max)
|
1204 |
break;
|
1248 |
break;
|
1205 |
}
|
1249 |
}
|
1206 |
res.sort();
|
1250 |
res.sort();
|
1207 |
res.unique();
|
1251 |
res.unique();
|
|
|
1252 |
if (typ == ET_REGEXP) {
|
|
|
1253 |
regfree(®);
|
|
|
1254 |
}
|
1208 |
return res;
|
1255 |
return true;
|
1209 |
}
|
1256 |
}
|
1210 |
|
1257 |
|
1211 |
/** Term list walking. */
|
1258 |
/** Term list walking. */
|
1212 |
class TermIter {
|
1259 |
class TermIter {
|
1213 |
public:
|
1260 |
public:
|