|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.60 2006-04-05 06:26:56 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.61 2006-04-05 12:50:42 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
65 |
#define MA_ABSTRACT_SIZE 250
|
65 |
#define MA_ABSTRACT_SIZE 250
|
66 |
// This is how many words (context size) we keep around query terms
|
66 |
// This is how many words (context size) we keep around query terms
|
67 |
// when building the abstract
|
67 |
// when building the abstract
|
68 |
#define MA_EXTRACT_WIDTH 4
|
68 |
#define MA_EXTRACT_WIDTH 4
|
69 |
|
69 |
|
|
|
70 |
// Truncate longer path and uniquize with hash . The goal for this is
|
|
|
71 |
// to avoid xapian max term length limitations, not to gain space (we
|
|
|
72 |
// gain very little even with very short maxlens like 30)
|
|
|
73 |
#define PATHHASHLEN 150
|
|
|
74 |
|
|
|
75 |
// Synthetic abstract marker (to discriminate from abstract actually
|
|
|
76 |
// found in doc)
|
|
|
77 |
const static string rclSyntAbs = "?!#@";
|
|
|
78 |
|
70 |
// Data for a xapian database. There could actually be 2 different
|
79 |
// Data for a xapian database. There could actually be 2 different
|
71 |
// ones for indexing or query as there is not much in common.
|
80 |
// ones for indexing or query as there is not much in common.
|
72 |
class Native {
|
81 |
class Native {
|
73 |
public:
|
82 |
public:
|
74 |
bool isopen;
|
83 |
bool m_isopen;
|
75 |
bool iswritable;
|
84 |
bool m_iswritable;
|
|
|
85 |
Db::OpenMode m_mode;
|
76 |
string basedir;
|
86 |
string m_basedir;
|
|
|
87 |
|
|
|
88 |
// List of directories for additional databases to query
|
|
|
89 |
list<string> m_extraDbs;
|
77 |
|
90 |
|
78 |
// Indexing
|
91 |
// Indexing
|
79 |
Xapian::WritableDatabase wdb;
|
92 |
Xapian::WritableDatabase wdb;
|
80 |
vector<bool> updated;
|
93 |
vector<bool> updated;
|
81 |
|
94 |
|
|
... |
|
... |
90 |
bool dbDataToRclDoc(std::string &data, Doc &doc,
|
103 |
bool dbDataToRclDoc(std::string &data, Doc &doc,
|
91 |
int qopts,
|
104 |
int qopts,
|
92 |
Xapian::docid docid,
|
105 |
Xapian::docid docid,
|
93 |
const list<string>& terms);
|
106 |
const list<string>& terms);
|
94 |
|
107 |
|
95 |
Native() : isopen(false), iswritable(false), enquire(0) { }
|
108 |
Native()
|
|
|
109 |
: m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)
|
|
|
110 |
{ }
|
96 |
~Native() {
|
111 |
~Native() {
|
97 |
delete enquire;
|
112 |
delete enquire;
|
98 |
}
|
113 |
}
|
99 |
bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
|
114 |
bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
|
100 |
// Parse xapian document's data and populate doc fields
|
115 |
// Parse xapian document's data and populate doc fields
|
|
... |
|
... |
110 |
return false;
|
125 |
return false;
|
111 |
}
|
126 |
}
|
112 |
};
|
127 |
};
|
113 |
|
128 |
|
114 |
Db::Db()
|
129 |
Db::Db()
|
|
|
130 |
: m_qOpts(0)
|
115 |
{
|
131 |
{
|
116 |
ndb = new Native;
|
132 |
m_ndb = new Native;
|
117 |
m_qOpts = 0;
|
|
|
118 |
}
|
133 |
}
|
119 |
|
134 |
|
120 |
Db::~Db()
|
135 |
Db::~Db()
|
121 |
{
|
136 |
{
|
122 |
LOGDEB1(("Db::~Db\n"));
|
137 |
LOGDEB1(("Db::~Db\n"));
|
123 |
if (ndb == 0)
|
138 |
if (m_ndb == 0)
|
124 |
return;
|
139 |
return;
|
125 |
LOGDEB(("Db::~Db: isopen %d iswritable %d\n", ndb->isopen,
|
140 |
LOGDEB(("Db::~Db: isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
126 |
ndb->iswritable));
|
141 |
m_ndb->m_iswritable));
|
127 |
if (ndb->isopen == false)
|
142 |
if (m_ndb->m_isopen == false)
|
128 |
return;
|
143 |
return;
|
129 |
const char *ermsg = "Unknown error";
|
144 |
const char *ermsg = "Unknown error";
|
130 |
try {
|
145 |
try {
|
131 |
LOGDEB(("Db::~Db: closing native database\n"));
|
146 |
LOGDEB(("Db::~Db: closing native database\n"));
|
132 |
if (ndb->iswritable == true) {
|
147 |
if (m_ndb->m_iswritable == true) {
|
133 |
ndb->wdb.flush();
|
148 |
m_ndb->wdb.flush();
|
134 |
}
|
149 |
}
|
135 |
delete ndb;
|
150 |
delete m_ndb;
|
|
|
151 |
m_ndb = 0;
|
136 |
return;
|
152 |
return;
|
137 |
} catch (const Xapian::Error &e) {
|
153 |
} catch (const Xapian::Error &e) {
|
138 |
ermsg = e.get_msg().c_str();
|
154 |
ermsg = e.get_msg().c_str();
|
139 |
} catch (const string &s) {
|
155 |
} catch (const string &s) {
|
140 |
ermsg = s.c_str();
|
156 |
ermsg = s.c_str();
|
|
... |
|
... |
146 |
LOGERR(("Db::~Db: got exception: %s\n", ermsg));
|
162 |
LOGERR(("Db::~Db: got exception: %s\n", ermsg));
|
147 |
}
|
163 |
}
|
148 |
|
164 |
|
149 |
bool Db::open(const string& dir, OpenMode mode, int qops)
|
165 |
bool Db::open(const string& dir, OpenMode mode, int qops)
|
150 |
{
|
166 |
{
|
151 |
if (ndb == 0)
|
167 |
if (m_ndb == 0)
|
152 |
return false;
|
168 |
return false;
|
153 |
LOGDEB(("Db::open: isopen %d iswritable %d\n", ndb->isopen,
|
169 |
LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
154 |
ndb->iswritable));
|
170 |
m_ndb->m_iswritable));
|
155 |
m_qOpts = qops;
|
|
|
156 |
|
171 |
|
157 |
if (ndb->isopen) {
|
172 |
if (m_ndb->m_isopen) {
|
158 |
LOGERR(("Db::open: already open\n"));
|
173 |
// We used to return an error here but I see no reason to
|
|
|
174 |
if (!close())
|
159 |
return false;
|
175 |
return false;
|
160 |
}
|
176 |
}
|
161 |
const char *ermsg = "Unknown";
|
177 |
const char *ermsg = "Unknown";
|
162 |
try {
|
178 |
try {
|
163 |
switch (mode) {
|
179 |
switch (mode) {
|
164 |
case DbUpd:
|
180 |
case DbUpd:
|
165 |
case DbTrunc:
|
181 |
case DbTrunc:
|
166 |
{
|
182 |
{
|
167 |
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
|
183 |
int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
|
168 |
Xapian::DB_CREATE_OR_OVERWRITE;
|
184 |
Xapian::DB_CREATE_OR_OVERWRITE;
|
169 |
ndb->wdb = Xapian::WritableDatabase(dir, action);
|
185 |
m_ndb->wdb = Xapian::WritableDatabase(dir, action);
|
170 |
LOGDEB(("Db::open: lastdocid: %d\n",
|
186 |
LOGDEB(("Db::open: lastdocid: %d\n",
|
171 |
ndb->wdb.get_lastdocid()));
|
187 |
m_ndb->wdb.get_lastdocid()));
|
172 |
ndb->updated.resize(ndb->wdb.get_lastdocid() + 1);
|
188 |
m_ndb->updated.resize(m_ndb->wdb.get_lastdocid() + 1);
|
173 |
for (unsigned int i = 0; i < ndb->updated.size(); i++)
|
189 |
for (unsigned int i = 0; i < m_ndb->updated.size(); i++)
|
174 |
ndb->updated[i] = false;
|
190 |
m_ndb->updated[i] = false;
|
175 |
ndb->iswritable = true;
|
191 |
m_ndb->m_iswritable = true;
|
176 |
}
|
192 |
}
|
177 |
break;
|
193 |
break;
|
178 |
case DbRO:
|
194 |
case DbRO:
|
179 |
default:
|
195 |
default:
|
180 |
ndb->iswritable = false;
|
196 |
m_ndb->m_iswritable = false;
|
181 |
ndb->db = Xapian::Database(dir);
|
197 |
m_ndb->db = Xapian::Database(dir);
|
|
|
198 |
for (list<string>::iterator it = m_ndb->m_extraDbs.begin();
|
|
|
199 |
it != m_ndb->m_extraDbs.end(); it++) {
|
|
|
200 |
string aerr;
|
|
|
201 |
LOGDEB(("Db::Open: adding query db [%s]\n", it->c_str()));
|
|
|
202 |
aerr.clear();
|
|
|
203 |
try {
|
|
|
204 |
// Make this non-fatal
|
|
|
205 |
m_ndb->db.add_database(Xapian::Database(*it));
|
|
|
206 |
} catch (const Xapian::Error &e) {
|
|
|
207 |
aerr = e.get_msg().c_str();
|
|
|
208 |
} catch (const string &s) {
|
|
|
209 |
aerr = s.c_str();
|
|
|
210 |
} catch (const char *s) {
|
|
|
211 |
aerr = s;
|
|
|
212 |
} catch (...) {
|
|
|
213 |
aerr = "Caught unknown exception";
|
|
|
214 |
}
|
|
|
215 |
if (!aerr.empty())
|
|
|
216 |
LOGERR(("Db::Open: error while trying to add database "
|
|
|
217 |
"from [%s]: %s\n", it->c_str(), aerr.c_str()));
|
|
|
218 |
}
|
182 |
break;
|
219 |
break;
|
183 |
}
|
220 |
}
|
|
|
221 |
m_qOpts = qops;
|
|
|
222 |
m_ndb->m_mode = mode;
|
184 |
ndb->isopen = true;
|
223 |
m_ndb->m_isopen = true;
|
185 |
ndb->basedir = dir;
|
224 |
m_ndb->m_basedir = dir;
|
186 |
return true;
|
225 |
return true;
|
187 |
} catch (const Xapian::Error &e) {
|
226 |
} catch (const Xapian::Error &e) {
|
188 |
ermsg = e.get_msg().c_str();
|
227 |
ermsg = e.get_msg().c_str();
|
189 |
} catch (const string &s) {
|
228 |
} catch (const string &s) {
|
190 |
ermsg = s.c_str();
|
229 |
ermsg = s.c_str();
|
|
... |
|
... |
199 |
}
|
238 |
}
|
200 |
|
239 |
|
201 |
// Note: xapian has no close call, we delete and recreate the db
|
240 |
// Note: xapian has no close call, we delete and recreate the db
|
202 |
bool Db::close()
|
241 |
bool Db::close()
|
203 |
{
|
242 |
{
|
204 |
if (ndb == 0)
|
243 |
if (m_ndb == 0)
|
205 |
return false;
|
244 |
return false;
|
206 |
LOGDEB(("Db::close(): isopen %d iswritable %d\n", ndb->isopen,
|
245 |
LOGDEB(("Db::close(): m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
207 |
ndb->iswritable));
|
246 |
m_ndb->m_iswritable));
|
208 |
if (ndb->isopen == false)
|
247 |
if (m_ndb->m_isopen == false)
|
209 |
return true;
|
248 |
return true;
|
210 |
const char *ermsg = "Unknown";
|
249 |
const char *ermsg = "Unknown";
|
211 |
try {
|
250 |
try {
|
212 |
if (ndb->iswritable == true) {
|
251 |
if (m_ndb->m_iswritable == true) {
|
213 |
ndb->wdb.flush();
|
252 |
m_ndb->wdb.flush();
|
214 |
LOGDEB(("Rcl:Db: Called xapian flush\n"));
|
253 |
LOGDEB(("Rcl:Db: Called xapian flush\n"));
|
215 |
}
|
254 |
}
|
216 |
delete ndb;
|
255 |
delete m_ndb;
|
217 |
ndb = new Native;
|
256 |
m_ndb = new Native;
|
218 |
if (ndb)
|
257 |
if (m_ndb)
|
219 |
return true;
|
258 |
return true;
|
220 |
} catch (const Xapian::Error &e) {
|
259 |
} catch (const Xapian::Error &e) {
|
221 |
ermsg = e.get_msg().c_str();
|
260 |
ermsg = e.get_msg().c_str();
|
222 |
} catch (const string &s) {
|
261 |
} catch (const string &s) {
|
223 |
ermsg = s.c_str();
|
262 |
ermsg = s.c_str();
|
|
... |
|
... |
227 |
ermsg = "Caught unknown exception";
|
266 |
ermsg = "Caught unknown exception";
|
228 |
}
|
267 |
}
|
229 |
LOGERR(("Db:close: exception while deleting db: %s\n", ermsg));
|
268 |
LOGERR(("Db:close: exception while deleting db: %s\n", ermsg));
|
230 |
return false;
|
269 |
return false;
|
231 |
}
|
270 |
}
|
|
|
271 |
bool Db::reOpen()
|
|
|
272 |
{
|
|
|
273 |
if (m_ndb->m_isopen) {
|
|
|
274 |
if (!close())
|
|
|
275 |
return false;
|
|
|
276 |
if (!open(m_ndb->m_basedir, m_ndb->m_mode, m_qOpts)) {
|
|
|
277 |
return false;
|
|
|
278 |
}
|
|
|
279 |
}
|
|
|
280 |
return true;
|
|
|
281 |
}
|
|
|
282 |
bool Db::addQueryDb(const string &dir)
|
|
|
283 |
{
|
|
|
284 |
LOGDEB(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb,
|
|
|
285 |
(m_ndb)?m_ndb->m_iswritable:0, dir.c_str()));
|
|
|
286 |
if (!m_ndb)
|
|
|
287 |
return false;
|
|
|
288 |
if (m_ndb->m_iswritable)
|
|
|
289 |
return false;
|
|
|
290 |
if (find(m_ndb->m_extraDbs.begin(), m_ndb->m_extraDbs.end(), dir) ==
|
|
|
291 |
m_ndb->m_extraDbs.end()) {
|
|
|
292 |
m_ndb->m_extraDbs.push_back(dir);
|
|
|
293 |
}
|
|
|
294 |
return reOpen();
|
|
|
295 |
}
|
|
|
296 |
|
|
|
297 |
bool Db::rmQueryDb(const string &dir)
|
|
|
298 |
{
|
|
|
299 |
if (!m_ndb)
|
|
|
300 |
return false;
|
|
|
301 |
if (m_ndb->m_iswritable)
|
|
|
302 |
return false;
|
|
|
303 |
if (dir.empty()) {
|
|
|
304 |
m_ndb->m_extraDbs.clear();
|
|
|
305 |
} else {
|
|
|
306 |
list<string>::iterator it = find(m_ndb->m_extraDbs.begin(),
|
|
|
307 |
m_ndb->m_extraDbs.end(), dir);
|
|
|
308 |
if (it != m_ndb->m_extraDbs.end()) {
|
|
|
309 |
m_ndb->m_extraDbs.erase(it);
|
|
|
310 |
}
|
|
|
311 |
}
|
|
|
312 |
return reOpen();
|
|
|
313 |
}
|
|
|
314 |
bool Db::testDbDir(const string &dir)
|
|
|
315 |
{
|
|
|
316 |
string aerr;
|
|
|
317 |
LOGDEB(("Db::testDbDir: [%s]\n", dir.c_str()));
|
|
|
318 |
try {
|
|
|
319 |
Xapian::Database db(dir);
|
|
|
320 |
} catch (const Xapian::Error &e) {
|
|
|
321 |
aerr = e.get_msg().c_str();
|
|
|
322 |
} catch (const string &s) {
|
|
|
323 |
aerr = s.c_str();
|
|
|
324 |
} catch (const char *s) {
|
|
|
325 |
aerr = s;
|
|
|
326 |
} catch (...) {
|
|
|
327 |
aerr = "Caught unknown exception";
|
|
|
328 |
}
|
|
|
329 |
if (!aerr.empty()) {
|
|
|
330 |
LOGERR(("Db::Open: error while trying to open database "
|
|
|
331 |
"from [%s]: %s\n", dir.c_str(), aerr.c_str()));
|
|
|
332 |
return false;
|
|
|
333 |
}
|
|
|
334 |
return true;
|
|
|
335 |
}
|
232 |
|
336 |
|
233 |
bool Db::isopen()
|
337 |
bool Db::isopen()
|
234 |
{
|
338 |
{
|
235 |
if (ndb == 0)
|
339 |
if (m_ndb == 0)
|
236 |
return false;
|
340 |
return false;
|
237 |
return ndb->isopen;
|
341 |
return m_ndb->m_isopen;
|
238 |
}
|
342 |
}
|
239 |
|
343 |
|
240 |
// A small class to hold state while splitting text
|
344 |
// A small class to hold state while splitting text
|
241 |
class mySplitterCB : public TextSplitCB {
|
345 |
class mySplitterCB : public TextSplitCB {
|
242 |
public:
|
346 |
public:
|
|
... |
|
... |
333 |
output += " ...";
|
437 |
output += " ...";
|
334 |
}
|
438 |
}
|
335 |
return output;
|
439 |
return output;
|
336 |
}
|
440 |
}
|
337 |
|
441 |
|
338 |
// remove some chars and replace them with spaces
|
442 |
// Remove some chars and replace them with spaces
|
339 |
static string stripchars(const string &str, string delims)
|
443 |
static string stripchars(const string &str, string delims)
|
340 |
{
|
444 |
{
|
341 |
string out;
|
445 |
string out;
|
342 |
string::size_type startPos, pos;
|
446 |
string::size_type startPos, pos;
|
343 |
|
447 |
|
|
... |
|
... |
355 |
}
|
459 |
}
|
356 |
}
|
460 |
}
|
357 |
return out;
|
461 |
return out;
|
358 |
}
|
462 |
}
|
359 |
|
463 |
|
360 |
// Truncate longer path and uniquize with hash . The goal for this is
|
|
|
361 |
// to avoid xapian max term length limitations, not to gain space (we
|
|
|
362 |
// gain very little even with very short maxlens like 30)
|
|
|
363 |
#define PATHHASHLEN 150
|
|
|
364 |
|
|
|
365 |
const static string rclSyntAbs = "?!#@";
|
|
|
366 |
|
|
|
367 |
// Add document in internal form to the database: index the terms in
|
464 |
// Add document in internal form to the database: index the terms in
|
368 |
// the title abstract and body and add special terms for file name,
|
465 |
// the title abstract and body and add special terms for file name,
|
369 |
// date, mime type ... , create the document data record (more
|
466 |
// date, mime type ... , create the document data record (more
|
370 |
// metadata), and update database
|
467 |
// metadata), and update database
|
371 |
bool Db::add(const string &fn, const Doc &idoc,
|
468 |
bool Db::add(const string &fn, const Doc &idoc,
|
372 |
const struct stat *stp)
|
469 |
const struct stat *stp)
|
373 |
{
|
470 |
{
|
374 |
LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
|
471 |
LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
|
375 |
if (ndb == 0)
|
472 |
if (m_ndb == 0)
|
376 |
return false;
|
473 |
return false;
|
377 |
|
474 |
|
378 |
Doc doc = idoc;
|
475 |
Doc doc = idoc;
|
379 |
|
476 |
|
380 |
// Truncate abstract, title and keywords to reasonable lengths. If
|
477 |
// Truncate abstract, title and keywords to reasonable lengths. If
|
|
... |
|
... |
511 |
|
608 |
|
512 |
const char *fnc = fn.c_str();
|
609 |
const char *fnc = fn.c_str();
|
513 |
// Add db entry or update existing entry:
|
610 |
// Add db entry or update existing entry:
|
514 |
try {
|
611 |
try {
|
515 |
Xapian::docid did =
|
612 |
Xapian::docid did =
|
516 |
ndb->wdb.replace_document(uniterm.empty() ? pathterm : uniterm,
|
613 |
m_ndb->wdb.replace_document(uniterm.empty() ? pathterm : uniterm,
|
517 |
newdocument);
|
614 |
newdocument);
|
518 |
if (did < ndb->updated.size()) {
|
615 |
if (did < m_ndb->updated.size()) {
|
519 |
ndb->updated[did] = true;
|
616 |
m_ndb->updated[did] = true;
|
520 |
LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
617 |
LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
|
521 |
doc.ipath.c_str()));
|
618 |
doc.ipath.c_str()));
|
522 |
} else {
|
619 |
} else {
|
523 |
LOGDEB(("Db::add: docid %d added [%s , %s]\n", did, fnc,
|
620 |
LOGDEB(("Db::add: docid %d added [%s , %s]\n", did, fnc,
|
524 |
doc.ipath.c_str()));
|
621 |
doc.ipath.c_str()));
|
525 |
}
|
622 |
}
|
526 |
} catch (...) {
|
623 |
} catch (...) {
|
527 |
// FIXME: is this ever actually needed?
|
624 |
// FIXME: is this ever actually needed?
|
528 |
try {
|
625 |
try {
|
529 |
ndb->wdb.add_document(newdocument);
|
626 |
m_ndb->wdb.add_document(newdocument);
|
530 |
LOGDEB(("Db::add: %s added (failed re-seek for duplicate)\n",
|
627 |
LOGDEB(("Db::add: %s added (failed re-seek for duplicate)\n",
|
531 |
fnc));
|
628 |
fnc));
|
532 |
} catch (...) {
|
629 |
} catch (...) {
|
533 |
LOGERR(("Db::add: failed again after replace_document\n"));
|
630 |
LOGERR(("Db::add: failed again after replace_document\n"));
|
534 |
return false;
|
631 |
return false;
|
|
... |
|
... |
538 |
}
|
635 |
}
|
539 |
|
636 |
|
540 |
// Test if given filename has changed since last indexed:
|
637 |
// Test if given filename has changed since last indexed:
|
541 |
bool Db::needUpdate(const string &filename, const struct stat *stp)
|
638 |
bool Db::needUpdate(const string &filename, const struct stat *stp)
|
542 |
{
|
639 |
{
|
543 |
if (ndb == 0)
|
640 |
if (m_ndb == 0)
|
544 |
return false;
|
641 |
return false;
|
545 |
|
642 |
|
546 |
// If no document exist with this path, we do need update
|
643 |
// If no document exist with this path, we do need update
|
547 |
string hash;
|
644 |
string hash;
|
548 |
pathHash(filename, hash, PATHHASHLEN);
|
645 |
pathHash(filename, hash, PATHHASHLEN);
|
|
... |
|
... |
554 |
// fmtime field which will be identical for all docs inside a
|
651 |
// fmtime field which will be identical for all docs inside a
|
555 |
// multi-document file (we currently always reindex all if the
|
652 |
// multi-document file (we currently always reindex all if the
|
556 |
// file changed)
|
653 |
// file changed)
|
557 |
Xapian::PostingIterator doc;
|
654 |
Xapian::PostingIterator doc;
|
558 |
try {
|
655 |
try {
|
559 |
if (!ndb->wdb.term_exists(pathterm)) {
|
656 |
if (!m_ndb->wdb.term_exists(pathterm)) {
|
560 |
LOGDEB1(("Db::needUpdate: no such path: %s\n", pathterm.c_str()));
|
657 |
LOGDEB1(("Db::needUpdate: no such path: %s\n", pathterm.c_str()));
|
561 |
return true;
|
658 |
return true;
|
562 |
}
|
659 |
}
|
563 |
|
660 |
|
564 |
Xapian::PostingIterator docid0 = ndb->wdb.postlist_begin(pathterm);
|
661 |
Xapian::PostingIterator docid0 = m_ndb->wdb.postlist_begin(pathterm);
|
565 |
for (Xapian::PostingIterator docid = docid0;
|
662 |
for (Xapian::PostingIterator docid = docid0;
|
566 |
docid != ndb->wdb.postlist_end(pathterm); docid++) {
|
663 |
docid != m_ndb->wdb.postlist_end(pathterm); docid++) {
|
567 |
|
664 |
|
568 |
Xapian::Document doc = ndb->wdb.get_document(*docid);
|
665 |
Xapian::Document doc = m_ndb->wdb.get_document(*docid);
|
569 |
|
666 |
|
570 |
// Check the date once. no need to look at the others if
|
667 |
// Check the date once. no need to look at the others if
|
571 |
// the db needs updating. Note that the fmtime used to be
|
668 |
// the db needs updating. Note that the fmtime used to be
|
572 |
// called mtime, and we're keeping compat
|
669 |
// called mtime, and we're keeping compat
|
573 |
if (docid == docid0) {
|
670 |
if (docid == docid0) {
|
|
... |
|
... |
588 |
return true;
|
685 |
return true;
|
589 |
}
|
686 |
}
|
590 |
}
|
687 |
}
|
591 |
|
688 |
|
592 |
// Db is up to date. Make a note that this document exists.
|
689 |
// Db is up to date. Make a note that this document exists.
|
593 |
if (*docid < ndb->updated.size())
|
690 |
if (*docid < m_ndb->updated.size())
|
594 |
ndb->updated[*docid] = true;
|
691 |
m_ndb->updated[*docid] = true;
|
595 |
}
|
692 |
}
|
596 |
return false;
|
693 |
return false;
|
597 |
} catch (const Xapian::Error &e) {
|
694 |
} catch (const Xapian::Error &e) {
|
598 |
ermsg = e.get_msg().c_str();
|
695 |
ermsg = e.get_msg().c_str();
|
599 |
} catch (...) {
|
696 |
} catch (...) {
|
|
... |
|
... |
625 |
* Delete stem db for given language
|
722 |
* Delete stem db for given language
|
626 |
*/
|
723 |
*/
|
627 |
bool Db::deleteStemDb(const string& lang)
|
724 |
bool Db::deleteStemDb(const string& lang)
|
628 |
{
|
725 |
{
|
629 |
LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
|
726 |
LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
|
630 |
if (ndb == 0)
|
727 |
if (m_ndb == 0)
|
631 |
return false;
|
728 |
return false;
|
632 |
if (ndb->isopen == false)
|
729 |
if (m_ndb->m_isopen == false)
|
633 |
return false;
|
730 |
return false;
|
634 |
|
731 |
|
635 |
string dir = stemdbname(ndb->basedir, lang);
|
732 |
string dir = stemdbname(m_ndb->m_basedir, lang);
|
636 |
if (wipedir(dir) == 0 && rmdir(dir.c_str()) == 0)
|
733 |
if (wipedir(dir) == 0 && rmdir(dir.c_str()) == 0)
|
637 |
return true;
|
734 |
return true;
|
638 |
return false;
|
735 |
return false;
|
639 |
}
|
736 |
}
|
640 |
|
737 |
|
|
... |
|
... |
645 |
* parent terms in the document data.
|
742 |
* parent terms in the document data.
|
646 |
*/
|
743 |
*/
|
647 |
bool Db::createStemDb(const string& lang)
|
744 |
bool Db::createStemDb(const string& lang)
|
648 |
{
|
745 |
{
|
649 |
LOGDEB(("Db::createStemDb(%s)\n", lang.c_str()));
|
746 |
LOGDEB(("Db::createStemDb(%s)\n", lang.c_str()));
|
650 |
if (ndb == 0)
|
747 |
if (m_ndb == 0)
|
651 |
return false;
|
748 |
return false;
|
652 |
if (ndb->isopen == false)
|
749 |
if (m_ndb->m_isopen == false)
|
653 |
return false;
|
750 |
return false;
|
654 |
|
751 |
|
655 |
// First build the in-memory stem database:
|
752 |
// First build the in-memory stem database:
|
656 |
// We walk the list of all terms, and stem each.
|
753 |
// We walk the list of all terms, and stem each.
|
657 |
// If the stem is identical to the term, no need to create an entry
|
754 |
// If the stem is identical to the term, no need to create an entry
|
|
... |
|
... |
665 |
int stemdiff=0; // Count of all different stems
|
762 |
int stemdiff=0; // Count of all different stems
|
666 |
int stemmultiple = 0; // Count of stems with multiple derivatives
|
763 |
int stemmultiple = 0; // Count of stems with multiple derivatives
|
667 |
try {
|
764 |
try {
|
668 |
Xapian::Stem stemmer(lang);
|
765 |
Xapian::Stem stemmer(lang);
|
669 |
Xapian::TermIterator it;
|
766 |
Xapian::TermIterator it;
|
670 |
for (it = ndb->wdb.allterms_begin();
|
767 |
for (it = m_ndb->wdb.allterms_begin();
|
671 |
it != ndb->wdb.allterms_end(); it++) {
|
768 |
it != m_ndb->wdb.allterms_end(); it++) {
|
672 |
// If it has any non-lowercase 7bit char, cant be stemmable
|
769 |
// If it has any non-lowercase 7bit char, cant be stemmable
|
673 |
string::iterator sit = (*it).begin(), eit = sit + (*it).length();
|
770 |
string::iterator sit = (*it).begin(), eit = sit + (*it).length();
|
674 |
if ((sit = find_if(sit, eit, p_notlowerorutf)) != eit) {
|
771 |
if ((sit = find_if(sit, eit, p_notlowerorutf)) != eit) {
|
675 |
++nostem;
|
772 |
++nostem;
|
676 |
// LOGDEB(("stemskipped: [%s], because of 0x%x\n",
|
773 |
// LOGDEB(("stemskipped: [%s], because of 0x%x\n",
|
|
... |
|
... |
705 |
rmdir(dir.c_str());
|
802 |
rmdir(dir.c_str());
|
706 |
}
|
803 |
}
|
707 |
}
|
804 |
}
|
708 |
};
|
805 |
};
|
709 |
// Create xapian database for stem relations
|
806 |
// Create xapian database for stem relations
|
710 |
string stemdbdir = stemdbname(ndb->basedir, lang);
|
807 |
string stemdbdir = stemdbname(m_ndb->m_basedir, lang);
|
711 |
// We want to get rid of the db dir in case of error. This gets disarmed
|
808 |
// We want to get rid of the db dir in case of error. This gets disarmed
|
712 |
// just before success return.
|
809 |
// just before success return.
|
713 |
DirWiper wiper(stemdbdir);
|
810 |
DirWiper wiper(stemdbdir);
|
714 |
const char *ermsg = "NOERROR";
|
811 |
const char *ermsg = "NOERROR";
|
715 |
Xapian::WritableDatabase sdb;
|
812 |
Xapian::WritableDatabase sdb;
|
|
... |
|
... |
779 |
|
876 |
|
780 |
list<string> Db::getStemLangs()
|
877 |
list<string> Db::getStemLangs()
|
781 |
{
|
878 |
{
|
782 |
list<string> dirs;
|
879 |
list<string> dirs;
|
783 |
LOGDEB(("Db::getStemLang\n"));
|
880 |
LOGDEB(("Db::getStemLang\n"));
|
784 |
if (ndb == 0)
|
881 |
if (m_ndb == 0)
|
785 |
return dirs;
|
882 |
return dirs;
|
786 |
string pattern = stemdirstem + "*";
|
883 |
string pattern = stemdirstem + "*";
|
787 |
dirs = path_dirglob(ndb->basedir, pattern);
|
884 |
dirs = path_dirglob(m_ndb->m_basedir, pattern);
|
788 |
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
|
885 |
for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
|
789 |
*it = path_basename(*it);
|
886 |
*it = path_basename(*it);
|
790 |
*it = it->substr(stemdirstem.length(), string::npos);
|
887 |
*it = it->substr(stemdirstem.length(), string::npos);
|
791 |
}
|
888 |
}
|
792 |
return dirs;
|
889 |
return dirs;
|
|
... |
|
... |
799 |
* stem database while we are at it.
|
896 |
* stem database while we are at it.
|
800 |
*/
|
897 |
*/
|
801 |
bool Db::purge()
|
898 |
bool Db::purge()
|
802 |
{
|
899 |
{
|
803 |
LOGDEB(("Db::purge\n"));
|
900 |
LOGDEB(("Db::purge\n"));
|
804 |
if (ndb == 0)
|
901 |
if (m_ndb == 0)
|
805 |
return false;
|
902 |
return false;
|
806 |
LOGDEB(("Db::purge: isopen %d iswritable %d\n", ndb->isopen,
|
903 |
LOGDEB(("Db::purge: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen,
|
807 |
ndb->iswritable));
|
904 |
m_ndb->m_iswritable));
|
808 |
if (ndb->isopen == false || ndb->iswritable == false)
|
905 |
if (m_ndb->m_isopen == false || m_ndb->m_iswritable == false)
|
809 |
return false;
|
906 |
return false;
|
810 |
|
907 |
|
811 |
// There seems to be problems with the document delete code, when
|
908 |
// There seems to be problems with the document delete code, when
|
812 |
// we do this, the database is not actually updated. Especially,
|
909 |
// we do this, the database is not actually updated. Especially,
|
813 |
// if we delete a bunch of docs, so that there is a hole in the
|
910 |
// if we delete a bunch of docs, so that there is a hole in the
|
814 |
// docids at the beginning, we can't add anything (appears to work
|
911 |
// docids at the beginning, we can't add anything (appears to work
|
815 |
// and does nothing). Maybe related to the exceptions below when
|
912 |
// and does nothing). Maybe related to the exceptions below when
|
816 |
// trying to delete an unexistant document ?
|
913 |
// trying to delete an unexistant document ?
|
817 |
// Flushing before trying the deletes seeems to work around the problem
|
914 |
// Flushing before trying the deletes seeems to work around the problem
|
818 |
try {
|
915 |
try {
|
819 |
ndb->wdb.flush();
|
916 |
m_ndb->wdb.flush();
|
820 |
} catch (...) {
|
917 |
} catch (...) {
|
821 |
LOGDEB(("Db::purge: 1st flush failed\n"));
|
918 |
LOGDEB(("Db::purge: 1st flush failed\n"));
|
822 |
}
|
919 |
}
|
823 |
for (Xapian::docid docid = 1; docid < ndb->updated.size(); ++docid) {
|
920 |
for (Xapian::docid docid = 1; docid < m_ndb->updated.size(); ++docid) {
|
824 |
if (!ndb->updated[docid]) {
|
921 |
if (!m_ndb->updated[docid]) {
|
825 |
try {
|
922 |
try {
|
826 |
ndb->wdb.delete_document(docid);
|
923 |
m_ndb->wdb.delete_document(docid);
|
827 |
LOGDEB(("Db::purge: deleted document #%d\n", docid));
|
924 |
LOGDEB(("Db::purge: deleted document #%d\n", docid));
|
828 |
} catch (const Xapian::DocNotFoundError &) {
|
925 |
} catch (const Xapian::DocNotFoundError &) {
|
829 |
LOGDEB(("Db::purge: document #%d not found\n", docid));
|
926 |
LOGDEB(("Db::purge: document #%d not found\n", docid));
|
830 |
}
|
927 |
}
|
831 |
}
|
928 |
}
|
832 |
}
|
929 |
}
|
833 |
try {
|
930 |
try {
|
834 |
ndb->wdb.flush();
|
931 |
m_ndb->wdb.flush();
|
835 |
} catch (...) {
|
932 |
} catch (...) {
|
836 |
LOGDEB(("Db::purge: 2nd flush failed\n"));
|
933 |
LOGDEB(("Db::purge: 2nd flush failed\n"));
|
837 |
}
|
934 |
}
|
838 |
return true;
|
935 |
return true;
|
839 |
}
|
936 |
}
|
840 |
|
937 |
|
841 |
/**
|
938 |
/**
|
842 |
* Expand term to list of all terms which stem to the same term.
|
939 |
* Expand term to list of all terms which stem to the same term.
|
843 |
*/
|
940 |
*/
|
844 |
static list<string> stemexpand(Native *ndb, string term, const string& lang)
|
941 |
static list<string> stemexpand(Native *m_ndb, string term, const string& lang)
|
845 |
{
|
942 |
{
|
846 |
list<string> explist;
|
943 |
list<string> explist;
|
847 |
try {
|
944 |
try {
|
848 |
Xapian::Stem stemmer(lang);
|
945 |
Xapian::Stem stemmer(lang);
|
849 |
string stem = stemmer.stem_word(term);
|
946 |
string stem = stemmer.stem_word(term);
|
850 |
LOGDEB(("stemexpand: [%s] stem-> [%s]\n", term.c_str(), stem.c_str()));
|
947 |
LOGDEB(("stemexpand: [%s] stem-> [%s]\n", term.c_str(), stem.c_str()));
|
851 |
// Try to fetch the doc from the stem db
|
948 |
// Try to fetch the doc from the stem db
|
852 |
string stemdbdir = stemdbname(ndb->basedir, lang);
|
949 |
string stemdbdir = stemdbname(m_ndb->m_basedir, lang);
|
853 |
Xapian::Database sdb(stemdbdir);
|
950 |
Xapian::Database sdb(stemdbdir);
|
854 |
LOGDEB1(("stemexpand: %s lastdocid: %d\n",
|
951 |
LOGDEB1(("stemexpand: %s lastdocid: %d\n",
|
855 |
stemdbdir.c_str(), sdb.get_lastdocid()));
|
952 |
stemdbdir.c_str(), sdb.get_lastdocid()));
|
856 |
if (!sdb.term_exists(stem)) {
|
953 |
if (!sdb.term_exists(stem)) {
|
857 |
LOGDEB1(("Db::stemexpand: no term for %s\n", stem.c_str()));
|
954 |
LOGDEB1(("Db::stemexpand: no term for %s\n", stem.c_str()));
|
|
... |
|
... |
923 |
// composition of the stem-expanded terms (or a single term query).
|
1020 |
// composition of the stem-expanded terms (or a single term query).
|
924 |
// - Elements corresponding to a phrase are an OP_PHRASE composition of the
|
1021 |
// - Elements corresponding to a phrase are an OP_PHRASE composition of the
|
925 |
// phrase terms (no stem expansion in this case)
|
1022 |
// phrase terms (no stem expansion in this case)
|
926 |
static void stringToXapianQueries(const string &iq,
|
1023 |
static void stringToXapianQueries(const string &iq,
|
927 |
const string& stemlang,
|
1024 |
const string& stemlang,
|
928 |
Native *ndb,
|
1025 |
Native *m_ndb,
|
929 |
list<Xapian::Query> &pqueries,
|
1026 |
list<Xapian::Query> &pqueries,
|
930 |
Db::QueryOpts opts = Db::QO_NONE)
|
1027 |
Db::QueryOpts opts = Db::QO_NONE)
|
931 |
{
|
1028 |
{
|
932 |
string qstring = iq;
|
1029 |
string qstring = iq;
|
933 |
|
1030 |
|
|
... |
|
... |
971 |
list<string> exp;
|
1068 |
list<string> exp;
|
972 |
string term1;
|
1069 |
string term1;
|
973 |
dumb_string(term, term1);
|
1070 |
dumb_string(term, term1);
|
974 |
// Possibly perform stem compression/expansion
|
1071 |
// Possibly perform stem compression/expansion
|
975 |
if (!nostemexp && (opts & Db::QO_STEM)) {
|
1072 |
if (!nostemexp && (opts & Db::QO_STEM)) {
|
976 |
exp = stemexpand(ndb, term1, stemlang);
|
1073 |
exp = stemexpand(m_ndb, term1, stemlang);
|
977 |
} else {
|
1074 |
} else {
|
978 |
exp.push_back(term1);
|
1075 |
exp.push_back(term1);
|
979 |
}
|
1076 |
}
|
980 |
|
1077 |
|
981 |
// Push either term or OR of stem-expanded set
|
1078 |
// Push either term or OR of stem-expanded set
|
|
... |
|
... |
999 |
bool Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
1096 |
bool Db::setQuery(const std::string &iqstring, QueryOpts opts,
|
1000 |
const string& stemlang)
|
1097 |
const string& stemlang)
|
1001 |
{
|
1098 |
{
|
1002 |
LOGDEB(("Db::setQuery: q: [%s], opts 0x%x, stemlang %s\n",
|
1099 |
LOGDEB(("Db::setQuery: q: [%s], opts 0x%x, stemlang %s\n",
|
1003 |
iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
|
1100 |
iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
|
1004 |
if (!ndb)
|
1101 |
if (!m_ndb)
|
1005 |
return false;
|
1102 |
return false;
|
1006 |
m_asdata.erase();
|
1103 |
m_asdata.erase();
|
1007 |
dbindices.clear();
|
1104 |
m_dbindices.clear();
|
1008 |
list<Xapian::Query> pqueries;
|
1105 |
list<Xapian::Query> pqueries;
|
1009 |
stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
|
1106 |
stringToXapianQueries(iqstring, stemlang, m_ndb, pqueries, opts);
|
1010 |
ndb->query = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
1107 |
m_ndb->query = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
1011 |
pqueries.end());
|
1108 |
pqueries.end());
|
1012 |
delete ndb->enquire;
|
1109 |
delete m_ndb->enquire;
|
1013 |
ndb->enquire = new Xapian::Enquire(ndb->db);
|
1110 |
m_ndb->enquire = new Xapian::Enquire(m_ndb->db);
|
1014 |
ndb->enquire->set_query(ndb->query);
|
1111 |
m_ndb->enquire->set_query(m_ndb->query);
|
1015 |
ndb->mset = Xapian::MSet();
|
1112 |
m_ndb->mset = Xapian::MSet();
|
1016 |
return true;
|
1113 |
return true;
|
1017 |
}
|
1114 |
}
|
1018 |
|
1115 |
|
1019 |
// Prepare query out of "advanced search" data
|
1116 |
// Prepare query out of "advanced search" data
|
1020 |
bool Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
1117 |
bool Db::setQuery(AdvSearchData &sdata, QueryOpts opts,
|
|
... |
|
... |
1033 |
LOGDEB((" searched file types: %s\n", ft.c_str()));
|
1130 |
LOGDEB((" searched file types: %s\n", ft.c_str()));
|
1034 |
if (!sdata.topdir.empty())
|
1131 |
if (!sdata.topdir.empty())
|
1035 |
LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
|
1132 |
LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
|
1036 |
|
1133 |
|
1037 |
m_asdata = sdata;
|
1134 |
m_asdata = sdata;
|
1038 |
dbindices.clear();
|
1135 |
m_dbindices.clear();
|
1039 |
|
1136 |
|
1040 |
if (!ndb)
|
1137 |
if (!m_ndb)
|
1041 |
return false;
|
1138 |
return false;
|
1042 |
list<Xapian::Query> pqueries;
|
1139 |
list<Xapian::Query> pqueries;
|
1043 |
Xapian::Query xq;
|
1140 |
Xapian::Query xq;
|
1044 |
|
1141 |
|
1045 |
if (!sdata.filename.empty()) {
|
1142 |
if (!sdata.filename.empty()) {
|
|
... |
|
... |
1060 |
} // else let it be
|
1157 |
} // else let it be
|
1061 |
|
1158 |
|
1062 |
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
|
1159 |
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
|
1063 |
|
1160 |
|
1064 |
// Match pattern against all file names in the db
|
1161 |
// Match pattern against all file names in the db
|
1065 |
Xapian::TermIterator it = ndb->db.allterms_begin();
|
1162 |
Xapian::TermIterator it = m_ndb->db.allterms_begin();
|
1066 |
it.skip_to("XSFN");
|
1163 |
it.skip_to("XSFN");
|
1067 |
list<string> names;
|
1164 |
list<string> names;
|
1068 |
for (;it != ndb->db.allterms_end(); it++) {
|
1165 |
for (;it != m_ndb->db.allterms_end(); it++) {
|
1069 |
if ((*it).find("XSFN") != 0)
|
1166 |
if ((*it).find("XSFN") != 0)
|
1070 |
break;
|
1167 |
break;
|
1071 |
string fn = (*it).substr(4);
|
1168 |
string fn = (*it).substr(4);
|
1072 |
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
|
1169 |
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
|
1073 |
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
|
1170 |
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
|
|
... |
|
... |
1087 |
// Build a query out of the matching file name terms.
|
1184 |
// Build a query out of the matching file name terms.
|
1088 |
xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
|
1185 |
xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
|
1089 |
}
|
1186 |
}
|
1090 |
|
1187 |
|
1091 |
if (!sdata.allwords.empty()) {
|
1188 |
if (!sdata.allwords.empty()) {
|
1092 |
stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries, opts);
|
1189 |
stringToXapianQueries(sdata.allwords, stemlang, m_ndb, pqueries, opts);
|
1093 |
if (!pqueries.empty()) {
|
1190 |
if (!pqueries.empty()) {
|
1094 |
Xapian::Query nq =
|
1191 |
Xapian::Query nq =
|
1095 |
Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
1192 |
Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
|
1096 |
pqueries.end());
|
1193 |
pqueries.end());
|
1097 |
xq = xq.empty() ? nq :
|
1194 |
xq = xq.empty() ? nq :
|
|
... |
|
... |
1099 |
pqueries.clear();
|
1196 |
pqueries.clear();
|
1100 |
}
|
1197 |
}
|
1101 |
}
|
1198 |
}
|
1102 |
|
1199 |
|
1103 |
if (!sdata.orwords.empty()) {
|
1200 |
if (!sdata.orwords.empty()) {
|
1104 |
stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries, opts);
|
1201 |
stringToXapianQueries(sdata.orwords, stemlang, m_ndb, pqueries, opts);
|
1105 |
if (!pqueries.empty()) {
|
1202 |
if (!pqueries.empty()) {
|
1106 |
Xapian::Query nq =
|
1203 |
Xapian::Query nq =
|
1107 |
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
1204 |
Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
1108 |
pqueries.end());
|
1205 |
pqueries.end());
|
1109 |
xq = xq.empty() ? nq :
|
1206 |
xq = xq.empty() ? nq :
|
|
... |
|
... |
1112 |
}
|
1209 |
}
|
1113 |
}
|
1210 |
}
|
1114 |
|
1211 |
|
1115 |
// We do no stem expansion on 'No' words. Should we ?
|
1212 |
// We do no stem expansion on 'No' words. Should we ?
|
1116 |
if (!sdata.nowords.empty()) {
|
1213 |
if (!sdata.nowords.empty()) {
|
1117 |
stringToXapianQueries(sdata.nowords, stemlang, ndb, pqueries);
|
1214 |
stringToXapianQueries(sdata.nowords, stemlang, m_ndb, pqueries);
|
1118 |
if (!pqueries.empty()) {
|
1215 |
if (!pqueries.empty()) {
|
1119 |
Xapian::Query nq;
|
1216 |
Xapian::Query nq;
|
1120 |
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
1217 |
nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
|
1121 |
pqueries.end());
|
1218 |
pqueries.end());
|
1122 |
xq = xq.empty() ? nq :
|
1219 |
xq = xq.empty() ? nq :
|
|
... |
|
... |
1126 |
}
|
1223 |
}
|
1127 |
|
1224 |
|
1128 |
if (!sdata.phrase.empty()) {
|
1225 |
if (!sdata.phrase.empty()) {
|
1129 |
Xapian::Query nq;
|
1226 |
Xapian::Query nq;
|
1130 |
string s = string("\"") + sdata.phrase + string("\"");
|
1227 |
string s = string("\"") + sdata.phrase + string("\"");
|
1131 |
stringToXapianQueries(s, stemlang, ndb, pqueries);
|
1228 |
stringToXapianQueries(s, stemlang, m_ndb, pqueries);
|
1132 |
if (!pqueries.empty()) {
|
1229 |
if (!pqueries.empty()) {
|
1133 |
// There should be a single list element phrase query.
|
1230 |
// There should be a single list element phrase query.
|
1134 |
xq = xq.empty() ? *pqueries.begin() :
|
1231 |
xq = xq.empty() ? *pqueries.begin() :
|
1135 |
Xapian::Query(Xapian::Query::OP_AND, xq, *pqueries.begin());
|
1232 |
Xapian::Query(Xapian::Query::OP_AND, xq, *pqueries.begin());
|
1136 |
pqueries.clear();
|
1233 |
pqueries.clear();
|
|
... |
|
... |
1147 |
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
|
1244 |
Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
|
1148 |
}
|
1245 |
}
|
1149 |
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
|
1246 |
xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
|
1150 |
}
|
1247 |
}
|
1151 |
|
1248 |
|
1152 |
ndb->query = xq;
|
1249 |
m_ndb->query = xq;
|
1153 |
delete ndb->enquire;
|
1250 |
delete m_ndb->enquire;
|
1154 |
ndb->enquire = new Xapian::Enquire(ndb->db);
|
1251 |
m_ndb->enquire = new Xapian::Enquire(m_ndb->db);
|
1155 |
ndb->enquire->set_query(ndb->query);
|
1252 |
m_ndb->enquire->set_query(m_ndb->query);
|
1156 |
ndb->mset = Xapian::MSet();
|
1253 |
m_ndb->mset = Xapian::MSet();
|
1157 |
// Get the query description and trim the "Xapian::Query"
|
1254 |
// Get the query description and trim the "Xapian::Query"
|
1158 |
sdata.description = ndb->query.get_description();
|
1255 |
sdata.description = m_ndb->query.get_description();
|
1159 |
if (sdata.description.find("Xapian::Query") == 0)
|
1256 |
if (sdata.description.find("Xapian::Query") == 0)
|
1160 |
sdata.description = sdata.description.substr(strlen("Xapian::Query"));
|
1257 |
sdata.description = sdata.description.substr(strlen("Xapian::Query"));
|
1161 |
LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
|
1258 |
LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
|
1162 |
return true;
|
1259 |
return true;
|
1163 |
}
|
1260 |
}
|
1164 |
|
1261 |
|
1165 |
bool Db::getQueryTerms(list<string>& terms)
|
1262 |
bool Db::getQueryTerms(list<string>& terms)
|
1166 |
{
|
1263 |
{
|
1167 |
if (!ndb)
|
1264 |
if (!m_ndb)
|
1168 |
return false;
|
1265 |
return false;
|
1169 |
|
1266 |
|
1170 |
terms.clear();
|
1267 |
terms.clear();
|
1171 |
Xapian::TermIterator it;
|
1268 |
Xapian::TermIterator it;
|
1172 |
for (it = ndb->query.get_terms_begin(); it != ndb->query.get_terms_end();
|
1269 |
for (it = m_ndb->query.get_terms_begin(); it != m_ndb->query.get_terms_end();
|
1173 |
it++) {
|
1270 |
it++) {
|
1174 |
terms.push_back(*it);
|
1271 |
terms.push_back(*it);
|
1175 |
}
|
1272 |
}
|
1176 |
return true;
|
1273 |
return true;
|
1177 |
}
|
1274 |
}
|
1178 |
|
1275 |
|
|
|
1276 |
// Mset size
|
1179 |
static const int qquantum = 30;
|
1277 |
static const int qquantum = 30;
|
1180 |
|
1278 |
|
1181 |
int Db::getResCnt()
|
1279 |
int Db::getResCnt()
|
1182 |
{
|
1280 |
{
|
1183 |
if (!ndb || !ndb->enquire) {
|
1281 |
if (!m_ndb || !m_ndb->enquire) {
|
1184 |
LOGERR(("Db::getResCnt: no query opened\n"));
|
1282 |
LOGERR(("Db::getResCnt: no query opened\n"));
|
1185 |
return -1;
|
1283 |
return -1;
|
1186 |
}
|
1284 |
}
|
1187 |
if (ndb->mset.size() <= 0) {
|
1285 |
if (m_ndb->mset.size() <= 0) {
|
1188 |
try {
|
1286 |
try {
|
1189 |
ndb->mset = ndb->enquire->get_mset(0, qquantum);
|
1287 |
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
|
1190 |
} catch (const Xapian::DatabaseModifiedError &error) {
|
1288 |
} catch (const Xapian::DatabaseModifiedError &error) {
|
1191 |
ndb->db.reopen();
|
1289 |
m_ndb->db.reopen();
|
1192 |
ndb->mset = ndb->enquire->get_mset(0, qquantum);
|
1290 |
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
|
1193 |
} catch (const Xapian::Error & error) {
|
1291 |
} catch (const Xapian::Error & error) {
|
1194 |
LOGERR(("enquire->get_mset: exception: %s\n",
|
1292 |
LOGERR(("enquire->get_mset: exception: %s\n",
|
1195 |
error.get_msg().c_str()));
|
1293 |
error.get_msg().c_str()));
|
1196 |
return -1;
|
1294 |
return -1;
|
1197 |
}
|
1295 |
}
|
1198 |
}
|
1296 |
}
|
1199 |
|
1297 |
|
1200 |
return ndb->mset.get_matches_lower_bound();
|
1298 |
return m_ndb->mset.get_matches_lower_bound();
|
1201 |
}
|
|
|
1202 |
|
|
|
1203 |
// This class (friend to RclDb) exists so that we can have functions that
|
|
|
1204 |
// access private RclDb data and have Xapian-specific parameters (so that we
|
|
|
1205 |
// don't want them to appear in the public rcldb.h).
|
|
|
1206 |
class DbPops {
|
|
|
1207 |
public:
|
|
|
1208 |
};
|
1299 |
}
|
1209 |
|
1300 |
|
1210 |
bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
|
1301 |
bool Native::dbDataToRclDoc(std::string &data, Doc &doc,
|
1211 |
int qopts,
|
1302 |
int qopts,
|
1212 |
Xapian::docid docid, const list<string>& terms)
|
1303 |
Xapian::docid docid, const list<string>& terms)
|
1213 |
{
|
1304 |
{
|
|
... |
|
... |
1250 |
// sequence to the internal Xapian hole-y one (the holes being the documents
|
1341 |
// sequence to the internal Xapian hole-y one (the holes being the documents
|
1251 |
// that dont match the filter).
|
1342 |
// that dont match the filter).
|
1252 |
bool Db::getDoc(int exti, Doc &doc, int *percent)
|
1343 |
bool Db::getDoc(int exti, Doc &doc, int *percent)
|
1253 |
{
|
1344 |
{
|
1254 |
LOGDEB1(("Db::getDoc: exti %d\n", exti));
|
1345 |
LOGDEB1(("Db::getDoc: exti %d\n", exti));
|
1255 |
if (!ndb || !ndb->enquire) {
|
1346 |
if (!m_ndb || !m_ndb->enquire) {
|
1256 |
LOGERR(("Db::getDoc: no query opened\n"));
|
1347 |
LOGERR(("Db::getDoc: no query opened\n"));
|
1257 |
return false;
|
1348 |
return false;
|
1258 |
}
|
1349 |
}
|
1259 |
|
1350 |
|
1260 |
// For now the only post-query filter is on dir subtree
|
1351 |
// For now the only post-query filter is on dir subtree
|
|
... |
|
... |
1262 |
LOGDEB1(("Topdir %s postqflt %d\n", m_asdata.topdir.c_str(), postqfilter));
|
1353 |
LOGDEB1(("Topdir %s postqflt %d\n", m_asdata.topdir.c_str(), postqfilter));
|
1263 |
|
1354 |
|
1264 |
int xapi;
|
1355 |
int xapi;
|
1265 |
if (postqfilter) {
|
1356 |
if (postqfilter) {
|
1266 |
// There is a postquery filter, does this fall in already known area ?
|
1357 |
// There is a postquery filter, does this fall in already known area ?
|
1267 |
if (exti >= (int)dbindices.size()) {
|
1358 |
if (exti >= (int)m_dbindices.size()) {
|
1268 |
// Have to fetch xapian docs and filter until we get
|
1359 |
// Have to fetch xapian docs and filter until we get
|
1269 |
// enough or fail
|
1360 |
// enough or fail
|
1270 |
dbindices.reserve(exti+1);
|
1361 |
m_dbindices.reserve(exti+1);
|
1271 |
// First xapian doc we fetch is the one after last stored
|
1362 |
// First xapian doc we fetch is the one after last stored
|
1272 |
int first = dbindices.size() > 0 ? dbindices.back() + 1 : 0;
|
1363 |
int first = m_dbindices.size() > 0 ? m_dbindices.back() + 1 : 0;
|
1273 |
// Loop until we get enough docs
|
1364 |
// Loop until we get enough docs
|
1274 |
while (exti >= (int)dbindices.size()) {
|
1365 |
while (exti >= (int)m_dbindices.size()) {
|
1275 |
LOGDEB(("Db::getDoc: fetching %d starting at %d\n",
|
1366 |
LOGDEB(("Db::getDoc: fetching %d starting at %d\n",
|
1276 |
qquantum, first));
|
1367 |
qquantum, first));
|
1277 |
try {
|
1368 |
try {
|
1278 |
ndb->mset = ndb->enquire->get_mset(first, qquantum);
|
1369 |
m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
|
1279 |
} catch (const Xapian::DatabaseModifiedError &error) {
|
1370 |
} catch (const Xapian::DatabaseModifiedError &error) {
|
1280 |
ndb->db.reopen();
|
1371 |
m_ndb->db.reopen();
|
1281 |
ndb->mset = ndb->enquire->get_mset(first, qquantum);
|
1372 |
m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
|
1282 |
} catch (const Xapian::Error & error) {
|
1373 |
} catch (const Xapian::Error & error) {
|
1283 |
LOGERR(("enquire->get_mset: exception: %s\n",
|
1374 |
LOGERR(("enquire->get_mset: exception: %s\n",
|
1284 |
error.get_msg().c_str()));
|
1375 |
error.get_msg().c_str()));
|
1285 |
abort();
|
1376 |
abort();
|
1286 |
}
|
1377 |
}
|
1287 |
|
1378 |
|
1288 |
if (ndb->mset.empty()) {
|
1379 |
if (m_ndb->mset.empty()) {
|
1289 |
LOGDEB(("Db::getDoc: got empty mset\n"));
|
1380 |
LOGDEB(("Db::getDoc: got empty mset\n"));
|
1290 |
return false;
|
1381 |
return false;
|
1291 |
}
|
1382 |
}
|
1292 |
first = ndb->mset.get_firstitem();
|
1383 |
first = m_ndb->mset.get_firstitem();
|
1293 |
for (unsigned int i = 0; i < ndb->mset.size() ; i++) {
|
1384 |
for (unsigned int i = 0; i < m_ndb->mset.size() ; i++) {
|
1294 |
LOGDEB(("Db::getDoc: [%d]\n", i));
|
1385 |
LOGDEB(("Db::getDoc: [%d]\n", i));
|
1295 |
Xapian::Document xdoc = ndb->mset[i].get_document();
|
1386 |
Xapian::Document xdoc = m_ndb->mset[i].get_document();
|
1296 |
if (ndb->filterMatch(this, xdoc)) {
|
1387 |
if (m_ndb->filterMatch(this, xdoc)) {
|
1297 |
dbindices.push_back(first + i);
|
1388 |
m_dbindices.push_back(first + i);
|
1298 |
}
|
1389 |
}
|
1299 |
}
|
1390 |
}
|
1300 |
first = first + ndb->mset.size();
|
1391 |
first = first + m_ndb->mset.size();
|
1301 |
}
|
1392 |
}
|
1302 |
}
|
1393 |
}
|
1303 |
xapi = dbindices[exti];
|
1394 |
xapi = m_dbindices[exti];
|
1304 |
} else {
|
1395 |
} else {
|
1305 |
xapi = exti;
|
1396 |
xapi = exti;
|
1306 |
}
|
1397 |
}
|
1307 |
|
1398 |
|
1308 |
|
1399 |
|
1309 |
// From there on, we work with a xapian enquire item number. Fetch it
|
1400 |
// From there on, we work with a xapian enquire item number. Fetch it
|
1310 |
int first = ndb->mset.get_firstitem();
|
1401 |
int first = m_ndb->mset.get_firstitem();
|
1311 |
int last = first + ndb->mset.size() -1;
|
1402 |
int last = first + m_ndb->mset.size() -1;
|
1312 |
|
1403 |
|
1313 |
if (!(xapi >= first && xapi <= last)) {
|
1404 |
if (!(xapi >= first && xapi <= last)) {
|
1314 |
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
|
1405 |
LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
|
1315 |
try {
|
1406 |
try {
|
1316 |
ndb->mset = ndb->enquire->get_mset(xapi, qquantum);
|
1407 |
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum);
|
1317 |
} catch (const Xapian::DatabaseModifiedError &error) {
|
1408 |
} catch (const Xapian::DatabaseModifiedError &error) {
|
1318 |
ndb->db.reopen();
|
1409 |
m_ndb->db.reopen();
|
1319 |
ndb->mset = ndb->enquire->get_mset(xapi, qquantum);
|
1410 |
m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum);
|
1320 |
} catch (const Xapian::Error & error) {
|
1411 |
} catch (const Xapian::Error & error) {
|
1321 |
LOGERR(("enquire->get_mset: exception: %s\n",
|
1412 |
LOGERR(("enquire->get_mset: exception: %s\n",
|
1322 |
error.get_msg().c_str()));
|
1413 |
error.get_msg().c_str()));
|
1323 |
abort();
|
1414 |
abort();
|
1324 |
}
|
1415 |
}
|
1325 |
if (ndb->mset.empty())
|
1416 |
if (m_ndb->mset.empty())
|
1326 |
return false;
|
1417 |
return false;
|
1327 |
first = ndb->mset.get_firstitem();
|
1418 |
first = m_ndb->mset.get_firstitem();
|
1328 |
last = first + ndb->mset.size() -1;
|
1419 |
last = first + m_ndb->mset.size() -1;
|
1329 |
}
|
1420 |
}
|
1330 |
|
1421 |
|
1331 |
LOGDEB1(("Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
|
1422 |
LOGDEB1(("Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
|
1332 |
ndb->query.get_description().c_str(),
|
1423 |
m_ndb->query.get_description().c_str(),
|
1333 |
first, last,
|
1424 |
first, last,
|
1334 |
ndb->mset.get_matches_lower_bound()));
|
1425 |
m_ndb->mset.get_matches_lower_bound()));
|
1335 |
|
1426 |
|
1336 |
Xapian::Document xdoc = ndb->mset[xapi-first].get_document();
|
1427 |
Xapian::Document xdoc = m_ndb->mset[xapi-first].get_document();
|
1337 |
Xapian::docid docid = *(ndb->mset[xapi-first]);
|
1428 |
Xapian::docid docid = *(m_ndb->mset[xapi-first]);
|
1338 |
if (percent)
|
1429 |
if (percent)
|
1339 |
*percent = ndb->mset.convert_to_percent(ndb->mset[xapi-first]);
|
1430 |
*percent = m_ndb->mset.convert_to_percent(m_ndb->mset[xapi-first]);
|
1340 |
|
1431 |
|
1341 |
// Parse xapian document's data and populate doc fields
|
1432 |
// Parse xapian document's data and populate doc fields
|
1342 |
string data = xdoc.get_data();
|
1433 |
string data = xdoc.get_data();
|
1343 |
list<string> terms;
|
1434 |
list<string> terms;
|
1344 |
getQueryTerms(terms);
|
1435 |
getQueryTerms(terms);
|
1345 |
return ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
|
1436 |
return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
|
1346 |
}
|
1437 |
}
|
1347 |
|
1438 |
|
1348 |
// Retrieve document defined by file name and internal path. Very inefficient,
|
1439 |
// Retrieve document defined by file name and internal path. Very inefficient,
|
1349 |
// used only for history display. We'd need to enter path+ipath terms in the
|
1440 |
// used only for history display. We'd need to enter path+ipath terms in the
|
1350 |
// db if we wanted to make this more efficient.
|
1441 |
// db if we wanted to make this more efficient.
|
1351 |
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
1442 |
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
|
1352 |
{
|
1443 |
{
|
1353 |
LOGDEB(("Db:getDoc: [%s] (%d) [%s]\n", fn.c_str(), fn.length(),
|
1444 |
LOGDEB(("Db:getDoc: [%s] (%d) [%s]\n", fn.c_str(), fn.length(),
|
1354 |
ipath.c_str()));
|
1445 |
ipath.c_str()));
|
1355 |
if (ndb == 0)
|
1446 |
if (m_ndb == 0)
|
1356 |
return false;
|
1447 |
return false;
|
1357 |
|
1448 |
|
1358 |
// Initialize what we can in any case. If this is history, caller
|
1449 |
// Initialize what we can in any case. If this is history, caller
|
1359 |
// will make partial display in case of error
|
1450 |
// will make partial display in case of error
|
1360 |
doc.ipath = ipath;
|
1451 |
doc.ipath = ipath;
|
|
... |
|
... |
1367 |
string pathterm = "P" + hash;
|
1458 |
string pathterm = "P" + hash;
|
1368 |
// Look for all documents with this path, searching for the one
|
1459 |
// Look for all documents with this path, searching for the one
|
1369 |
// with the appropriate ipath. This is very inefficient.
|
1460 |
// with the appropriate ipath. This is very inefficient.
|
1370 |
const char *ermsg = "";
|
1461 |
const char *ermsg = "";
|
1371 |
try {
|
1462 |
try {
|
1372 |
if (!ndb->db.term_exists(pathterm)) {
|
1463 |
if (!m_ndb->db.term_exists(pathterm)) {
|
1373 |
// Document found in history no longer in the database.
|
1464 |
// Document found in history no longer in the database.
|
1374 |
// We return true (because their might be other ok docs further)
|
1465 |
// We return true (because their might be other ok docs further)
|
1375 |
// but indicate the error with pc = -1
|
1466 |
// but indicate the error with pc = -1
|
1376 |
if (*pc)
|
1467 |
if (*pc)
|
1377 |
*pc = -1;
|
1468 |
*pc = -1;
|
1378 |
LOGINFO(("Db:getDoc: path inexistant: [%s] length %d\n",
|
1469 |
LOGINFO(("Db:getDoc: path inexistant: [%s] length %d\n",
|
1379 |
pathterm.c_str(), pathterm.length()));
|
1470 |
pathterm.c_str(), pathterm.length()));
|
1380 |
return true;
|
1471 |
return true;
|
1381 |
}
|
1472 |
}
|
1382 |
for (Xapian::PostingIterator docid =
|
1473 |
for (Xapian::PostingIterator docid =
|
1383 |
ndb->db.postlist_begin(pathterm);
|
1474 |
m_ndb->db.postlist_begin(pathterm);
|
1384 |
docid != ndb->db.postlist_end(pathterm); docid++) {
|
1475 |
docid != m_ndb->db.postlist_end(pathterm); docid++) {
|
1385 |
|
1476 |
|
1386 |
Xapian::Document xdoc = ndb->db.get_document(*docid);
|
1477 |
Xapian::Document xdoc = m_ndb->db.get_document(*docid);
|
1387 |
string data = xdoc.get_data();
|
1478 |
string data = xdoc.get_data();
|
1388 |
list<string> terms;
|
1479 |
list<string> terms;
|
1389 |
if (ndb->dbDataToRclDoc(data, doc, QO_NONE, *docid, terms)
|
1480 |
if (m_ndb->dbDataToRclDoc(data, doc, QO_NONE, *docid, terms)
|
1390 |
&& doc.ipath == ipath)
|
1481 |
&& doc.ipath == ipath)
|
1391 |
return true;
|
1482 |
return true;
|
1392 |
}
|
1483 |
}
|
1393 |
} catch (const Xapian::Error &e) {
|
1484 |
} catch (const Xapian::Error &e) {
|
1394 |
ermsg = e.get_msg().c_str();
|
1485 |
ermsg = e.get_msg().c_str();
|