|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
152 |
, m_wqueue("DbUpd",
|
152 |
, m_wqueue("DbUpd",
|
153 |
m_rcldb->m_config->getThrConf(RclConfig::ThrDbWrite).first),
|
153 |
m_rcldb->m_config->getThrConf(RclConfig::ThrDbWrite).first),
|
154 |
m_totalworkns(0LL), m_havewriteq(false)
|
154 |
m_totalworkns(0LL), m_havewriteq(false)
|
155 |
#endif // IDX_THREADS
|
155 |
#endif // IDX_THREADS
|
156 |
{
|
156 |
{
|
157 |
LOGDEB1("Native::Native: me " << (this) << "\n" );
|
157 |
LOGDEB1("Native::Native: me " << this << "\n");
|
158 |
}
|
158 |
}
|
159 |
|
159 |
|
160 |
Db::Native::~Native()
|
160 |
Db::Native::~Native()
|
161 |
{
|
161 |
{
|
162 |
LOGDEB1("Native::~Native: me " << (this) << "\n" );
|
162 |
LOGDEB1("Native::~Native: me " << this << "\n");
|
163 |
#ifdef IDX_THREADS
|
163 |
#ifdef IDX_THREADS
|
164 |
if (m_havewriteq) {
|
164 |
if (m_havewriteq) {
|
165 |
void *status = m_wqueue.setTerminateAndWait();
|
165 |
void *status = m_wqueue.setTerminateAndWait();
|
166 |
if (status) {
|
166 |
if (status) {
|
167 |
LOGDEB1("Native::~Native: worker status " << status << "\n");
|
167 |
LOGDEB1("Native::~Native: worker status " << status << "\n");
|
168 |
}
|
168 |
}
|
169 |
}
|
169 |
}
|
170 |
#endif // IDX_THREADS
|
170 |
#endif // IDX_THREADS
|
171 |
}
|
171 |
}
|
172 |
|
172 |
|
|
... |
|
... |
185 |
return (void*)1;
|
185 |
return (void*)1;
|
186 |
}
|
186 |
}
|
187 |
bool status = false;
|
187 |
bool status = false;
|
188 |
switch (tsk->op) {
|
188 |
switch (tsk->op) {
|
189 |
case DbUpdTask::AddOrUpdate:
|
189 |
case DbUpdTask::AddOrUpdate:
|
190 |
LOGDEB("DbUpdWorker: got add/update task, ql " << (int(qsz)) << "\n" );
|
190 |
LOGDEB("DbUpdWorker: got add/update task, ql " << qsz << "\n");
|
191 |
status = ndbp->addOrUpdateWrite(tsk->udi, tsk->uniterm,
|
191 |
status = ndbp->addOrUpdateWrite(tsk->udi, tsk->uniterm,
|
192 |
tsk->doc, tsk->txtlen);
|
192 |
tsk->doc, tsk->txtlen);
|
193 |
break;
|
193 |
break;
|
194 |
case DbUpdTask::Delete:
|
194 |
case DbUpdTask::Delete:
|
195 |
LOGDEB("DbUpdWorker: got delete task, ql " << (int(qsz)) << "\n" );
|
195 |
LOGDEB("DbUpdWorker: got delete task, ql " << qsz << "\n");
|
196 |
status = ndbp->purgeFileWrite(false, tsk->udi, tsk->uniterm);
|
196 |
status = ndbp->purgeFileWrite(false, tsk->udi, tsk->uniterm);
|
197 |
break;
|
197 |
break;
|
198 |
case DbUpdTask::PurgeOrphans:
|
198 |
case DbUpdTask::PurgeOrphans:
|
199 |
LOGDEB("DbUpdWorker: got orphans purge task, ql " << (int(qsz)) << "\n" );
|
199 |
LOGDEB("DbUpdWorker: got orphans purge task, ql " << qsz << "\n");
|
200 |
status = ndbp->purgeFileWrite(true, tsk->udi, tsk->uniterm);
|
200 |
status = ndbp->purgeFileWrite(true, tsk->udi, tsk->uniterm);
|
201 |
break;
|
201 |
break;
|
202 |
default:
|
202 |
default:
|
203 |
LOGERR("DbUpdWorker: unknown op " << (tsk->op) << " !!\n" );
|
203 |
LOGERR("DbUpdWorker: unknown op " << tsk->op << " !!\n");
|
204 |
break;
|
204 |
break;
|
205 |
}
|
205 |
}
|
206 |
if (!status) {
|
206 |
if (!status) {
|
207 |
LOGERR("DbUpdWorker: xxWrite failed\n" );
|
207 |
LOGERR("DbUpdWorker: xxWrite failed\n");
|
208 |
tqp->workerExit();
|
208 |
tqp->workerExit();
|
209 |
delete tsk;
|
209 |
delete tsk;
|
210 |
return (void*)0;
|
210 |
return (void*)0;
|
211 |
}
|
211 |
}
|
212 |
delete tsk;
|
212 |
delete tsk;
|
|
... |
|
... |
218 |
m_havewriteq = false;
|
218 |
m_havewriteq = false;
|
219 |
const RclConfig *cnf = m_rcldb->m_config;
|
219 |
const RclConfig *cnf = m_rcldb->m_config;
|
220 |
int writeqlen = cnf->getThrConf(RclConfig::ThrDbWrite).first;
|
220 |
int writeqlen = cnf->getThrConf(RclConfig::ThrDbWrite).first;
|
221 |
int writethreads = cnf->getThrConf(RclConfig::ThrDbWrite).second;
|
221 |
int writethreads = cnf->getThrConf(RclConfig::ThrDbWrite).second;
|
222 |
if (writethreads > 1) {
|
222 |
if (writethreads > 1) {
|
223 |
LOGINFO("RclDb: write threads count was forced down to 1\n" );
|
223 |
LOGINFO("RclDb: write threads count was forced down to 1\n");
|
224 |
writethreads = 1;
|
224 |
writethreads = 1;
|
225 |
}
|
225 |
}
|
226 |
if (writeqlen >= 0 && writethreads > 0) {
|
226 |
if (writeqlen >= 0 && writethreads > 0) {
|
227 |
if (!m_wqueue.start(writethreads, DbUpdWorker, this)) {
|
227 |
if (!m_wqueue.start(writethreads, DbUpdWorker, this)) {
|
228 |
LOGERR("Db::Db: Worker start failed\n" );
|
228 |
LOGERR("Db::Db: Worker start failed\n");
|
229 |
return;
|
229 |
return;
|
230 |
}
|
230 |
}
|
231 |
m_havewriteq = true;
|
231 |
m_havewriteq = true;
|
232 |
}
|
232 |
}
|
233 |
LOGDEB("RclDb:: threads: haveWriteQ " << (m_havewriteq) << ", wqlen " << (writeqlen) << " wqts " << (writethreads) << "\n" );
|
233 |
LOGDEB("RclDb:: threads: haveWriteQ " << m_havewriteq << ", wqlen " <<
|
|
|
234 |
writeqlen << " wqts " << writethreads << "\n");
|
234 |
}
|
235 |
}
|
235 |
|
236 |
|
236 |
#endif // IDX_THREADS
|
237 |
#endif // IDX_THREADS
|
237 |
|
238 |
|
238 |
/* See comment in class declaration: return all subdocuments of a
|
239 |
/* See comment in class declaration: return all subdocuments of a
|
239 |
* document given by its unique id.
|
240 |
* document given by its unique id.
|
240 |
*/
|
241 |
*/
|
241 |
bool Db::Native::subDocs(const string &udi, int idxi,
|
242 |
bool Db::Native::subDocs(const string &udi, int idxi,
|
242 |
vector<Xapian::docid>& docids)
|
243 |
vector<Xapian::docid>& docids)
|
243 |
{
|
244 |
{
|
244 |
LOGDEB2("subDocs: [" << (uniterm) << "]\n" );
|
245 |
LOGDEB2("subDocs: [" << uniterm << "]\n");
|
245 |
string pterm = make_parentterm(udi);
|
246 |
string pterm = make_parentterm(udi);
|
246 |
vector<Xapian::docid> candidates;
|
247 |
vector<Xapian::docid> candidates;
|
247 |
XAPTRY(docids.clear();
|
248 |
XAPTRY(docids.clear();
|
248 |
candidates.insert(candidates.begin(), xrdb.postlist_begin(pterm),
|
249 |
candidates.insert(candidates.begin(), xrdb.postlist_begin(pterm),
|
249 |
xrdb.postlist_end(pterm)),
|
250 |
xrdb.postlist_end(pterm)),
|
250 |
xrdb, m_rcldb->m_reason);
|
251 |
xrdb, m_rcldb->m_reason);
|
251 |
if (!m_rcldb->m_reason.empty()) {
|
252 |
if (!m_rcldb->m_reason.empty()) {
|
252 |
LOGERR("Rcl::Db::subDocs: " << (m_rcldb->m_reason) << "\n" );
|
253 |
LOGERR("Rcl::Db::subDocs: " << m_rcldb->m_reason << "\n");
|
253 |
return false;
|
254 |
return false;
|
254 |
} else {
|
255 |
} else {
|
255 |
for (unsigned int i = 0; i < candidates.size(); i++) {
|
256 |
for (unsigned int i = 0; i < candidates.size(); i++) {
|
256 |
if (whatDbIdx(candidates[i]) == (size_t)idxi) {
|
257 |
if (whatDbIdx(candidates[i]) == (size_t)idxi) {
|
257 |
docids.push_back(candidates[i]);
|
258 |
docids.push_back(candidates[i]);
|
258 |
}
|
259 |
}
|
259 |
}
|
260 |
}
|
260 |
LOGDEB0("Db::Native::subDocs: returning " << (docids.size()) << " ids\n" );
|
261 |
LOGDEB0("Db::Native::subDocs: returning " << docids.size() << " ids\n");
|
261 |
return true;
|
262 |
return true;
|
262 |
}
|
263 |
}
|
263 |
}
|
264 |
}
|
264 |
|
265 |
|
265 |
bool Db::Native::xdocToUdi(Xapian::Document& xdoc, string &udi)
|
266 |
bool Db::Native::xdocToUdi(Xapian::Document& xdoc, string &udi)
|
|
... |
|
... |
267 |
Xapian::TermIterator xit;
|
268 |
Xapian::TermIterator xit;
|
268 |
XAPTRY(xit = xdoc.termlist_begin();
|
269 |
XAPTRY(xit = xdoc.termlist_begin();
|
269 |
xit.skip_to(wrap_prefix(udi_prefix)),
|
270 |
xit.skip_to(wrap_prefix(udi_prefix)),
|
270 |
xrdb, m_rcldb->m_reason);
|
271 |
xrdb, m_rcldb->m_reason);
|
271 |
if (!m_rcldb->m_reason.empty()) {
|
272 |
if (!m_rcldb->m_reason.empty()) {
|
272 |
LOGERR("xdocToUdi: xapian error: " << (m_rcldb->m_reason) << "\n" );
|
273 |
LOGERR("xdocToUdi: xapian error: " << m_rcldb->m_reason << "\n");
|
273 |
return false;
|
274 |
return false;
|
274 |
}
|
275 |
}
|
275 |
if (xit != xdoc.termlist_end()) {
|
276 |
if (xit != xdoc.termlist_end()) {
|
276 |
udi = *xit;
|
277 |
udi = *xit;
|
277 |
if (!udi.empty()) {
|
278 |
if (!udi.empty()) {
|
|
... |
|
... |
285 |
// Clear term from document if its frequency is 0. This should
|
286 |
// Clear term from document if its frequency is 0. This should
|
286 |
// probably be done by Xapian when the freq goes to 0 when removing a
|
287 |
// probably be done by Xapian when the freq goes to 0 when removing a
|
287 |
// posting, but we have to do it ourselves
|
288 |
// posting, but we have to do it ourselves
|
288 |
bool Db::Native::clearDocTermIfWdf0(Xapian::Document& xdoc, const string& term)
|
289 |
bool Db::Native::clearDocTermIfWdf0(Xapian::Document& xdoc, const string& term)
|
289 |
{
|
290 |
{
|
290 |
LOGDEB1("Db::clearDocTermIfWdf0: [" << (term) << "]\n" );
|
291 |
LOGDEB1("Db::clearDocTermIfWdf0: [" << term << "]\n");
|
291 |
|
292 |
|
292 |
// Find the term
|
293 |
// Find the term
|
293 |
Xapian::TermIterator xit;
|
294 |
Xapian::TermIterator xit;
|
294 |
XAPTRY(xit = xdoc.termlist_begin(); xit.skip_to(term);,
|
295 |
XAPTRY(xit = xdoc.termlist_begin(); xit.skip_to(term);,
|
295 |
xrdb, m_rcldb->m_reason);
|
296 |
xrdb, m_rcldb->m_reason);
|
296 |
if (!m_rcldb->m_reason.empty()) {
|
297 |
if (!m_rcldb->m_reason.empty()) {
|
297 |
LOGERR("Db::clearDocTerm...: [" << (term) << "] skip failed: " << (m_rcldb->m_reason) << "\n" );
|
298 |
LOGERR("Db::clearDocTerm...: [" << term << "] skip failed: " <<
|
|
|
299 |
m_rcldb->m_reason << "\n");
|
298 |
return false;
|
300 |
return false;
|
299 |
}
|
301 |
}
|
300 |
if (xit == xdoc.termlist_end() || term.compare(*xit)) {
|
302 |
if (xit == xdoc.termlist_end() || term.compare(*xit)) {
|
301 |
LOGDEB0("Db::clearDocTermIFWdf0: term [" << (term) << "] not found. xit: [" << (xit == xdoc.termlist_end() ? "EOL":(*xit)) << "]\n" );
|
303 |
LOGDEB0("Db::clearDocTermIFWdf0: term [" << term <<
|
|
|
304 |
"] not found. xit: [" <<
|
|
|
305 |
(xit == xdoc.termlist_end() ? "EOL": *xit) << "]\n");
|
302 |
return false;
|
306 |
return false;
|
303 |
}
|
307 |
}
|
304 |
|
308 |
|
305 |
// Clear the term if its frequency is 0
|
309 |
// Clear the term if its frequency is 0
|
306 |
if (xit.get_wdf() == 0) {
|
310 |
if (xit.get_wdf() == 0) {
|
307 |
LOGDEB1("Db::clearDocTermIfWdf0: clearing [" << (term) << "]\n" );
|
311 |
LOGDEB1("Db::clearDocTermIfWdf0: clearing [" << term << "]\n");
|
308 |
XAPTRY(xdoc.remove_term(term), xwdb, m_rcldb->m_reason);
|
312 |
XAPTRY(xdoc.remove_term(term), xwdb, m_rcldb->m_reason);
|
309 |
if (!m_rcldb->m_reason.empty()) {
|
313 |
if (!m_rcldb->m_reason.empty()) {
|
310 |
LOGDEB0("Db::clearDocTermIfWdf0: failed [" << (term) << "]: " << (m_rcldb->m_reason) << "\n" );
|
314 |
LOGDEB0("Db::clearDocTermIfWdf0: failed [" << term << "]: " <<
|
|
|
315 |
m_rcldb->m_reason << "\n");
|
311 |
}
|
316 |
}
|
312 |
}
|
317 |
}
|
313 |
return true;
|
318 |
return true;
|
314 |
}
|
319 |
}
|
315 |
|
320 |
|
|
... |
|
... |
326 |
// prefix. We also remove the postings for the unprefixed terms (that
|
331 |
// prefix. We also remove the postings for the unprefixed terms (that
|
327 |
// is, we undo what we did when indexing).
|
332 |
// is, we undo what we did when indexing).
|
328 |
bool Db::Native::clearField(Xapian::Document& xdoc, const string& pfx,
|
333 |
bool Db::Native::clearField(Xapian::Document& xdoc, const string& pfx,
|
329 |
Xapian::termcount wdfdec)
|
334 |
Xapian::termcount wdfdec)
|
330 |
{
|
335 |
{
|
331 |
LOGDEB1("Db::clearField: clearing prefix [" << (pfx) << "] for docid " << (unsigned(xdoc.get_docid())) << "\n" );
|
336 |
LOGDEB1("Db::clearField: clearing prefix [" << pfx << "] for docid " <<
|
|
|
337 |
xdoc.get_docid() << "\n");
|
332 |
|
338 |
|
333 |
vector<DocPosting> eraselist;
|
339 |
vector<DocPosting> eraselist;
|
334 |
|
340 |
|
335 |
string wrapd = wrap_prefix(pfx);
|
341 |
string wrapd = wrap_prefix(pfx);
|
336 |
|
342 |
|
|
... |
|
... |
340 |
Xapian::TermIterator xit;
|
346 |
Xapian::TermIterator xit;
|
341 |
xit = xdoc.termlist_begin();
|
347 |
xit = xdoc.termlist_begin();
|
342 |
xit.skip_to(wrapd);
|
348 |
xit.skip_to(wrapd);
|
343 |
while (xit != xdoc.termlist_end() &&
|
349 |
while (xit != xdoc.termlist_end() &&
|
344 |
!(*xit).compare(0, wrapd.size(), wrapd)) {
|
350 |
!(*xit).compare(0, wrapd.size(), wrapd)) {
|
345 |
LOGDEB1("Db::clearfield: erasing for [" << ((*xit)) << "]\n" );
|
351 |
LOGDEB1("Db::clearfield: erasing for [" << *xit << "]\n");
|
346 |
Xapian::PositionIterator posit;
|
352 |
Xapian::PositionIterator posit;
|
347 |
for (posit = xit.positionlist_begin();
|
353 |
for (posit = xit.positionlist_begin();
|
348 |
posit != xit.positionlist_end(); posit++) {
|
354 |
posit != xit.positionlist_end(); posit++) {
|
349 |
eraselist.push_back(DocPosting(*xit, *posit));
|
355 |
eraselist.push_back(DocPosting(*xit, *posit));
|
350 |
eraselist.push_back(DocPosting(strip_prefix(*xit), *posit));
|
356 |
eraselist.push_back(DocPosting(strip_prefix(*xit), *posit));
|
|
... |
|
... |
357 |
continue;
|
363 |
continue;
|
358 |
} XCATCHERROR(m_rcldb->m_reason);
|
364 |
} XCATCHERROR(m_rcldb->m_reason);
|
359 |
break;
|
365 |
break;
|
360 |
}
|
366 |
}
|
361 |
if (!m_rcldb->m_reason.empty()) {
|
367 |
if (!m_rcldb->m_reason.empty()) {
|
362 |
LOGERR("Db::clearField: failed building erase list: " << (m_rcldb->m_reason) << "\n" );
|
368 |
LOGERR("Db::clearField: failed building erase list: " <<
|
|
|
369 |
m_rcldb->m_reason << "\n");
|
363 |
return false;
|
370 |
return false;
|
364 |
}
|
371 |
}
|
365 |
|
372 |
|
366 |
// Now remove the found positions, and the terms if the wdf is 0
|
373 |
// Now remove the found positions, and the terms if the wdf is 0
|
367 |
for (vector<DocPosting>::const_iterator it = eraselist.begin();
|
374 |
for (vector<DocPosting>::const_iterator it = eraselist.begin();
|
368 |
it != eraselist.end(); it++) {
|
375 |
it != eraselist.end(); it++) {
|
369 |
LOGDEB1("Db::clearField: remove posting: [" << (it->term) << "] pos [" << (int(it->pos)) << "]\n" );
|
376 |
LOGDEB1("Db::clearField: remove posting: [" << it->term << "] pos [" <<
|
|
|
377 |
it->pos << "]\n");
|
370 |
XAPTRY(xdoc.remove_posting(it->term, it->pos, wdfdec);,
|
378 |
XAPTRY(xdoc.remove_posting(it->term, it->pos, wdfdec);,
|
371 |
xwdb,m_rcldb->m_reason);
|
379 |
xwdb,m_rcldb->m_reason);
|
372 |
if (!m_rcldb->m_reason.empty()) {
|
380 |
if (!m_rcldb->m_reason.empty()) {
|
373 |
// Not that this normally fails for non-prefixed XXST and
|
381 |
// Not that this normally fails for non-prefixed XXST and
|
374 |
// ND, don't make a fuss
|
382 |
// ND, don't make a fuss
|
375 |
LOGDEB1("Db::clearFiedl: remove_posting failed for [" << (it->term) << "]," << (int(it->pos)) << ": " << (m_rcldb->m_reason) << "\n" );
|
383 |
LOGDEB1("Db::clearFiedl: remove_posting failed for [" << it->term <<
|
|
|
384 |
"]," << it->pos << ": " << m_rcldb->m_reason << "\n");
|
376 |
}
|
385 |
}
|
377 |
clearDocTermIfWdf0(xdoc, it->term);
|
386 |
clearDocTermIfWdf0(xdoc, it->term);
|
378 |
}
|
387 |
}
|
379 |
return true;
|
388 |
return true;
|
380 |
}
|
389 |
}
|
381 |
|
390 |
|
382 |
// Check if doc given by udi is indexed by term
|
391 |
// Check if doc given by udi is indexed by term
|
383 |
bool Db::Native::hasTerm(const string& udi, int idxi, const string& term)
|
392 |
bool Db::Native::hasTerm(const string& udi, int idxi, const string& term)
|
384 |
{
|
393 |
{
|
385 |
LOGDEB2("Native::hasTerm: udi [" << (udi) << "] term [" << (term) << "]\n" );
|
394 |
LOGDEB2("Native::hasTerm: udi [" << udi << "] term [" << term << "]\n");
|
386 |
Xapian::Document xdoc;
|
395 |
Xapian::Document xdoc;
|
387 |
if (getDoc(udi, idxi, xdoc)) {
|
396 |
if (getDoc(udi, idxi, xdoc)) {
|
388 |
Xapian::TermIterator xit;
|
397 |
Xapian::TermIterator xit;
|
389 |
XAPTRY(xit = xdoc.termlist_begin();
|
398 |
XAPTRY(xit = xdoc.termlist_begin();
|
390 |
xit.skip_to(term);,
|
399 |
xit.skip_to(term);,
|
391 |
xrdb, m_rcldb->m_reason);
|
400 |
xrdb, m_rcldb->m_reason);
|
392 |
if (!m_rcldb->m_reason.empty()) {
|
401 |
if (!m_rcldb->m_reason.empty()) {
|
393 |
LOGERR("Rcl::Native::hasTerm: " << (m_rcldb->m_reason) << "\n" );
|
402 |
LOGERR("Rcl::Native::hasTerm: " << m_rcldb->m_reason << "\n");
|
394 |
return false;
|
403 |
return false;
|
395 |
}
|
404 |
}
|
396 |
if (xit != xdoc.termlist_end() && !term.compare(*xit)) {
|
405 |
if (xit != xdoc.termlist_end() && !term.compare(*xit)) {
|
397 |
return true;
|
406 |
return true;
|
398 |
}
|
407 |
}
|
|
... |
|
... |
422 |
xrdb.reopen();
|
431 |
xrdb.reopen();
|
423 |
continue;
|
432 |
continue;
|
424 |
} XCATCHERROR(m_rcldb->m_reason);
|
433 |
} XCATCHERROR(m_rcldb->m_reason);
|
425 |
break;
|
434 |
break;
|
426 |
}
|
435 |
}
|
427 |
LOGERR("Db::Native::getDoc: Xapian error: " << (m_rcldb->m_reason) << "\n" );
|
436 |
LOGERR("Db::Native::getDoc: Xapian error: " << m_rcldb->m_reason << "\n");
|
428 |
return 0;
|
437 |
return 0;
|
429 |
}
|
438 |
}
|
430 |
|
439 |
|
431 |
// Turn data record from db into document fields
|
440 |
// Turn data record from db into document fields
|
432 |
bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
441 |
bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data,
|
433 |
Doc &doc)
|
442 |
Doc &doc)
|
434 |
{
|
443 |
{
|
435 |
LOGDEB2("Db::dbDataToRclDoc: data:\n" << (data) << "\n" );
|
444 |
LOGDEB2("Db::dbDataToRclDoc: data:\n" << data << "\n");
|
436 |
ConfSimple parms(data);
|
445 |
ConfSimple parms(data);
|
437 |
if (!parms.ok())
|
446 |
if (!parms.ok())
|
438 |
return false;
|
447 |
return false;
|
439 |
|
448 |
|
440 |
doc.xdocid = docid;
|
449 |
doc.xdocid = docid;
|
|
... |
|
... |
501 |
if (pos != xrdb.positionlist_end(docid, page_break_term)) {
|
510 |
if (pos != xrdb.positionlist_end(docid, page_break_term)) {
|
502 |
return true;
|
511 |
return true;
|
503 |
},
|
512 |
},
|
504 |
xrdb, ermsg);
|
513 |
xrdb, ermsg);
|
505 |
if (!ermsg.empty()) {
|
514 |
if (!ermsg.empty()) {
|
506 |
LOGERR("Db::Native::hasPages: xapian error: " << (ermsg) << "\n" );
|
515 |
LOGERR("Db::Native::hasPages: xapian error: " << ermsg << "\n");
|
507 |
}
|
516 |
}
|
508 |
return false;
|
517 |
return false;
|
509 |
}
|
518 |
}
|
510 |
|
519 |
|
511 |
// Return the positions list for the page break term
|
520 |
// Return the positions list for the page break term
|
|
... |
|
... |
538 |
try {
|
547 |
try {
|
539 |
for (pos = xrdb.positionlist_begin(docid, qterm);
|
548 |
for (pos = xrdb.positionlist_begin(docid, qterm);
|
540 |
pos != xrdb.positionlist_end(docid, qterm); pos++) {
|
549 |
pos != xrdb.positionlist_end(docid, qterm); pos++) {
|
541 |
int ipos = *pos;
|
550 |
int ipos = *pos;
|
542 |
if (ipos < int(baseTextPosition)) {
|
551 |
if (ipos < int(baseTextPosition)) {
|
543 |
LOGDEB("getPagePositions: got page position " << (ipos) << " not in body\n" );
|
552 |
LOGDEB("getPagePositions: got page position " << ipos
|
|
|
553 |
<< " not in body\n");
|
544 |
// Not in text body. Strange...
|
554 |
// Not in text body. Strange...
|
545 |
continue;
|
555 |
continue;
|
546 |
}
|
556 |
}
|
547 |
map<int, int>::iterator it = mbreaksmap.find(ipos);
|
557 |
map<int, int>::iterator it = mbreaksmap.find(ipos);
|
548 |
if (it != mbreaksmap.end()) {
|
558 |
if (it != mbreaksmap.end()) {
|
549 |
LOGDEB1("getPagePositions: found multibreak at " << (ipos) << " incr " << (it->second) << "\n" );
|
559 |
LOGDEB1("getPagePositions: found multibreak at " << ipos <<
|
|
|
560 |
" incr " << it->second << "\n");
|
550 |
for (int i = 0 ; i < it->second; i++)
|
561 |
for (int i = 0 ; i < it->second; i++)
|
551 |
vpos.push_back(ipos);
|
562 |
vpos.push_back(ipos);
|
552 |
}
|
563 |
}
|
553 |
vpos.push_back(ipos);
|
564 |
vpos.push_back(ipos);
|
554 |
}
|
565 |
}
|
|
... |
|
... |
584 |
// to do this after having prepared the document, but it needs to be in
|
595 |
// to do this after having prepared the document, but it needs to be in
|
585 |
// the single-threaded section.
|
596 |
// the single-threaded section.
|
586 |
if (m_rcldb->m_maxFsOccupPc > 0 &&
|
597 |
if (m_rcldb->m_maxFsOccupPc > 0 &&
|
587 |
(m_rcldb->m_occFirstCheck ||
|
598 |
(m_rcldb->m_occFirstCheck ||
|
588 |
(m_rcldb->m_curtxtsz - m_rcldb->m_occtxtsz) / MB >= 1)) {
|
599 |
(m_rcldb->m_curtxtsz - m_rcldb->m_occtxtsz) / MB >= 1)) {
|
589 |
LOGDEB("Db::add: checking file system usage\n" );
|
600 |
LOGDEB("Db::add: checking file system usage\n");
|
590 |
int pc;
|
601 |
int pc;
|
591 |
m_rcldb->m_occFirstCheck = 0;
|
602 |
m_rcldb->m_occFirstCheck = 0;
|
592 |
if (fsocc(m_rcldb->m_basedir, &pc) && pc >= m_rcldb->m_maxFsOccupPc) {
|
603 |
if (fsocc(m_rcldb->m_basedir, &pc) && pc >= m_rcldb->m_maxFsOccupPc) {
|
593 |
LOGERR("Db::add: stop indexing: file system " << pc << " %" <<
|
604 |
LOGERR("Db::add: stop indexing: file system " << pc << " %" <<
|
594 |
" full > max " << m_rcldb->m_maxFsOccupPc << " %" << "\n");
|
605 |
" full > max " << m_rcldb->m_maxFsOccupPc << " %" << "\n");
|
595 |
return false;
|
606 |
return false;
|
596 |
}
|
607 |
}
|
597 |
m_rcldb->m_occtxtsz = m_rcldb->m_curtxtsz;
|
608 |
m_rcldb->m_occtxtsz = m_rcldb->m_curtxtsz;
|
598 |
}
|
609 |
}
|
599 |
|
610 |
|
|
... |
|
... |
607 |
if (did < m_rcldb->updated.size()) {
|
618 |
if (did < m_rcldb->updated.size()) {
|
608 |
// This is necessary because only the file-level docs are tested
|
619 |
// This is necessary because only the file-level docs are tested
|
609 |
// by needUpdate(), so the subdocs existence flags are only set
|
620 |
// by needUpdate(), so the subdocs existence flags are only set
|
610 |
// here.
|
621 |
// here.
|
611 |
m_rcldb->updated[did] = true;
|
622 |
m_rcldb->updated[did] = true;
|
612 |
LOGINFO("Db::add: docid " << (did) << " updated [" << (fnc) << "]\n" );
|
623 |
LOGINFO("Db::add: docid " << did << " updated [" << fnc << "]\n");
|
613 |
} else {
|
624 |
} else {
|
614 |
LOGINFO("Db::add: docid " << (did) << " added [" << (fnc) << "]\n" );
|
625 |
LOGINFO("Db::add: docid " << did << " added [" << fnc << "]\n");
|
615 |
}
|
626 |
}
|
616 |
} XCATCHERROR(ermsg);
|
627 |
} XCATCHERROR(ermsg);
|
617 |
|
628 |
|
618 |
if (!ermsg.empty()) {
|
629 |
if (!ermsg.empty()) {
|
619 |
LOGERR("Db::add: replace_document failed: " << (ermsg) << "\n" );
|
630 |
LOGERR("Db::add: replace_document failed: " << ermsg << "\n");
|
620 |
ermsg.erase();
|
631 |
ermsg.erase();
|
621 |
// FIXME: is this ever actually needed?
|
632 |
// FIXME: is this ever actually needed?
|
622 |
try {
|
633 |
try {
|
623 |
xwdb.add_document(*newdocument_ptr);
|
634 |
xwdb.add_document(*newdocument_ptr);
|
624 |
LOGDEB("Db::add: " << (fnc) << " added (failed re-seek for duplicate)\n" );
|
635 |
LOGDEB("Db::add: " << fnc <<
|
|
|
636 |
" added (failed re-seek for duplicate)\n");
|
625 |
} XCATCHERROR(ermsg);
|
637 |
} XCATCHERROR(ermsg);
|
626 |
if (!ermsg.empty()) {
|
638 |
if (!ermsg.empty()) {
|
627 |
LOGERR("Db::add: add_document failed: " << (ermsg) << "\n" );
|
639 |
LOGERR("Db::add: add_document failed: " << ermsg << "\n");
|
628 |
return false;
|
640 |
return false;
|
629 |
}
|
641 |
}
|
630 |
}
|
642 |
}
|
631 |
|
643 |
|
632 |
// Test if we're over the flush threshold (limit memory usage):
|
644 |
// Test if we're over the flush threshold (limit memory usage):
|
|
... |
|
... |
661 |
string sig;
|
673 |
string sig;
|
662 |
if (orphansOnly) {
|
674 |
if (orphansOnly) {
|
663 |
Xapian::Document doc = xwdb.get_document(*docid);
|
675 |
Xapian::Document doc = xwdb.get_document(*docid);
|
664 |
sig = doc.get_value(VALUE_SIG);
|
676 |
sig = doc.get_value(VALUE_SIG);
|
665 |
if (sig.empty()) {
|
677 |
if (sig.empty()) {
|
666 |
LOGINFO("purgeFileWrite: got empty sig\n" );
|
678 |
LOGINFO("purgeFileWrite: got empty sig\n");
|
667 |
return false;
|
679 |
return false;
|
668 |
}
|
680 |
}
|
669 |
} else {
|
681 |
} else {
|
670 |
LOGDEB("purgeFile: delete docid " << (*docid) << "\n" );
|
682 |
LOGDEB("purgeFile: delete docid " << *docid << "\n");
|
671 |
xwdb.delete_document(*docid);
|
683 |
xwdb.delete_document(*docid);
|
672 |
}
|
684 |
}
|
673 |
vector<Xapian::docid> docids;
|
685 |
vector<Xapian::docid> docids;
|
674 |
subDocs(udi, 0, docids);
|
686 |
subDocs(udi, 0, docids);
|
675 |
LOGDEB("purgeFile: subdocs cnt " << (docids.size()) << "\n" );
|
687 |
LOGDEB("purgeFile: subdocs cnt " << docids.size() << "\n");
|
676 |
for (vector<Xapian::docid>::iterator it = docids.begin();
|
688 |
for (vector<Xapian::docid>::iterator it = docids.begin();
|
677 |
it != docids.end(); it++) {
|
689 |
it != docids.end(); it++) {
|
678 |
if (m_rcldb->m_flushMb > 0) {
|
690 |
if (m_rcldb->m_flushMb > 0) {
|
679 |
Xapian::termcount trms = xwdb.get_doclength(*it);
|
691 |
Xapian::termcount trms = xwdb.get_doclength(*it);
|
680 |
m_rcldb->maybeflush(trms * 5);
|
692 |
m_rcldb->maybeflush(trms * 5);
|
|
... |
|
... |
682 |
string subdocsig;
|
694 |
string subdocsig;
|
683 |
if (orphansOnly) {
|
695 |
if (orphansOnly) {
|
684 |
Xapian::Document doc = xwdb.get_document(*it);
|
696 |
Xapian::Document doc = xwdb.get_document(*it);
|
685 |
subdocsig = doc.get_value(VALUE_SIG);
|
697 |
subdocsig = doc.get_value(VALUE_SIG);
|
686 |
if (subdocsig.empty()) {
|
698 |
if (subdocsig.empty()) {
|
687 |
LOGINFO("purgeFileWrite: got empty sig for subdoc??\n" );
|
699 |
LOGINFO("purgeFileWrite: got empty sig for subdoc??\n");
|
688 |
continue;
|
700 |
continue;
|
689 |
}
|
701 |
}
|
690 |
}
|
702 |
}
|
691 |
|
703 |
|
692 |
if (!orphansOnly || sig != subdocsig) {
|
704 |
if (!orphansOnly || sig != subdocsig) {
|
693 |
LOGDEB("Db::purgeFile: delete subdoc " << (*it) << "\n" );
|
705 |
LOGDEB("Db::purgeFile: delete subdoc " << *it << "\n");
|
694 |
xwdb.delete_document(*it);
|
706 |
xwdb.delete_document(*it);
|
695 |
}
|
707 |
}
|
696 |
}
|
708 |
}
|
697 |
return true;
|
709 |
return true;
|
698 |
} XCATCHERROR(ermsg);
|
710 |
} XCATCHERROR(ermsg);
|
699 |
if (!ermsg.empty()) {
|
711 |
if (!ermsg.empty()) {
|
700 |
LOGERR("Db::purgeFileWrite: " << (ermsg) << "\n" );
|
712 |
LOGERR("Db::purgeFileWrite: " << ermsg << "\n");
|
701 |
}
|
713 |
}
|
702 |
return false;
|
714 |
return false;
|
703 |
}
|
715 |
}
|
704 |
|
716 |
|
705 |
|
717 |
|
|
... |
|
... |
758 |
|
770 |
|
759 |
if (m_ndb == 0 || m_config == 0) {
|
771 |
if (m_ndb == 0 || m_config == 0) {
|
760 |
m_reason = "Null configuration or Xapian Db";
|
772 |
m_reason = "Null configuration or Xapian Db";
|
761 |
return false;
|
773 |
return false;
|
762 |
}
|
774 |
}
|
763 |
LOGDEB("Db::open: m_isopen " << (m_ndb->m_isopen) << " m_iswritable " << (m_ndb->m_iswritable) << " mode " << (mode) << "\n" );
|
775 |
LOGDEB("Db::open: m_isopen " << m_ndb->m_isopen << " m_iswritable " <<
|
|
|
776 |
m_ndb->m_iswritable << " mode " << mode << "\n");
|
764 |
|
777 |
|
765 |
if (m_ndb->m_isopen) {
|
778 |
if (m_ndb->m_isopen) {
|
766 |
// We used to return an error here but I see no reason to
|
779 |
// We used to return an error here but I see no reason to
|
767 |
if (!close())
|
780 |
if (!close())
|
768 |
return false;
|
781 |
return false;
|
|
... |
|
... |
796 |
// subDocs(). This issue has been gone for a long time
|
809 |
// subDocs(). This issue has been gone for a long time
|
797 |
// (now: Xapian 1.2) and the separate objects seem to
|
810 |
// (now: Xapian 1.2) and the separate objects seem to
|
798 |
// trigger other Xapian issues, so the query db is now
|
811 |
// trigger other Xapian issues, so the query db is now
|
799 |
// a clone of the update one.
|
812 |
// a clone of the update one.
|
800 |
m_ndb->xrdb = m_ndb->xwdb;
|
813 |
m_ndb->xrdb = m_ndb->xwdb;
|
801 |
LOGDEB("Db::open: lastdocid: " << (m_ndb->xwdb.get_lastdocid()) << "\n" );
|
814 |
LOGDEB("Db::open: lastdocid: " << m_ndb->xwdb.get_lastdocid() <<
|
|
|
815 |
"\n");
|
802 |
LOGDEB2("Db::open: resetting updated\n" );
|
816 |
LOGDEB2("Db::open: resetting updated\n");
|
803 |
updated.resize(m_ndb->xwdb.get_lastdocid() + 1);
|
817 |
updated.resize(m_ndb->xwdb.get_lastdocid() + 1);
|
804 |
for (unsigned int i = 0; i < updated.size(); i++)
|
818 |
for (unsigned int i = 0; i < updated.size(); i++)
|
805 |
updated[i] = false;
|
819 |
updated[i] = false;
|
806 |
}
|
820 |
}
|
807 |
break;
|
821 |
break;
|
|
... |
|
... |
811 |
m_ndb->xrdb = Xapian::Database(dir);
|
825 |
m_ndb->xrdb = Xapian::Database(dir);
|
812 |
for (vector<string>::iterator it = m_extraDbs.begin();
|
826 |
for (vector<string>::iterator it = m_extraDbs.begin();
|
813 |
it != m_extraDbs.end(); it++) {
|
827 |
it != m_extraDbs.end(); it++) {
|
814 |
if (error)
|
828 |
if (error)
|
815 |
*error = DbOpenExtraDb;
|
829 |
*error = DbOpenExtraDb;
|
816 |
LOGDEB("Db::Open: adding query db [" << &(*it) << "]\n" );
|
830 |
LOGDEB("Db::Open: adding query db [" << &(*it) << "]\n");
|
817 |
// An error here used to be non-fatal (1.13 and older)
|
831 |
// An error here used to be non-fatal (1.13 and older)
|
818 |
// but I can't see why
|
832 |
// but I can't see why
|
819 |
m_ndb->xrdb.add_database(Xapian::Database(*it));
|
833 |
m_ndb->xrdb.add_database(Xapian::Database(*it));
|
820 |
}
|
834 |
}
|
821 |
break;
|
835 |
break;
|
|
... |
|
... |
827 |
// truncated db
|
841 |
// truncated db
|
828 |
if (mode != DbTrunc && m_ndb->xrdb.get_doccount() > 0) {
|
842 |
if (mode != DbTrunc && m_ndb->xrdb.get_doccount() > 0) {
|
829 |
string version = m_ndb->xrdb.get_metadata(cstr_RCL_IDX_VERSION_KEY);
|
843 |
string version = m_ndb->xrdb.get_metadata(cstr_RCL_IDX_VERSION_KEY);
|
830 |
if (version.compare(cstr_RCL_IDX_VERSION)) {
|
844 |
if (version.compare(cstr_RCL_IDX_VERSION)) {
|
831 |
m_ndb->m_noversionwrite = true;
|
845 |
m_ndb->m_noversionwrite = true;
|
832 |
LOGERR("Rcl::Db::open: file index [" << (version) << "], software [" << (cstr_RCL_IDX_VERSION) << "]\n" );
|
846 |
LOGERR("Rcl::Db::open: file index [" << version <<
|
|
|
847 |
"], software [" << cstr_RCL_IDX_VERSION << "]\n");
|
833 |
throw Xapian::DatabaseError("Recoll index version mismatch",
|
848 |
throw Xapian::DatabaseError("Recoll index version mismatch",
|
834 |
"", "");
|
849 |
"", "");
|
835 |
}
|
850 |
}
|
836 |
}
|
851 |
}
|
837 |
m_mode = mode;
|
852 |
m_mode = mode;
|
|
... |
|
... |
841 |
*error = DbOpenNoError;
|
856 |
*error = DbOpenNoError;
|
842 |
return true;
|
857 |
return true;
|
843 |
} XCATCHERROR(ermsg);
|
858 |
} XCATCHERROR(ermsg);
|
844 |
|
859 |
|
845 |
m_reason = ermsg;
|
860 |
m_reason = ermsg;
|
846 |
LOGERR("Db::open: exception while opening [" << (dir) << "]: " << (ermsg) << "\n" );
|
861 |
LOGERR("Db::open: exception while opening [" <<dir<< "]: " << ermsg << "\n");
|
847 |
return false;
|
862 |
return false;
|
848 |
}
|
863 |
}
|
849 |
|
864 |
|
850 |
// Note: xapian has no close call, we delete and recreate the db
|
865 |
// Note: xapian has no close call, we delete and recreate the db
|
851 |
bool Db::close()
|
866 |
bool Db::close()
|
852 |
{
|
867 |
{
|
853 |
LOGDEB1("Db::close()\n" );
|
868 |
LOGDEB1("Db::close()\n");
|
854 |
return i_close(false);
|
869 |
return i_close(false);
|
855 |
}
|
870 |
}
|
856 |
bool Db::i_close(bool final)
|
871 |
bool Db::i_close(bool final)
|
857 |
{
|
872 |
{
|
858 |
if (m_ndb == 0)
|
873 |
if (m_ndb == 0)
|
859 |
return false;
|
874 |
return false;
|
860 |
LOGDEB("Db::i_close(" << (final) << "): m_isopen " << (m_ndb->m_isopen) << " m_iswritable " << (m_ndb->m_iswritable) << "\n" );
|
875 |
LOGDEB("Db::i_close(" << final << "): m_isopen " << m_ndb->m_isopen <<
|
|
|
876 |
" m_iswritable " << m_ndb->m_iswritable << "\n");
|
861 |
if (m_ndb->m_isopen == false && !final)
|
877 |
if (m_ndb->m_isopen == false && !final)
|
862 |
return true;
|
878 |
return true;
|
863 |
|
879 |
|
864 |
string ermsg;
|
880 |
string ermsg;
|
865 |
try {
|
881 |
try {
|
|
... |
|
... |
869 |
waitUpdIdle();
|
885 |
waitUpdIdle();
|
870 |
#endif
|
886 |
#endif
|
871 |
if (!m_ndb->m_noversionwrite)
|
887 |
if (!m_ndb->m_noversionwrite)
|
872 |
m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY,
|
888 |
m_ndb->xwdb.set_metadata(cstr_RCL_IDX_VERSION_KEY,
|
873 |
cstr_RCL_IDX_VERSION);
|
889 |
cstr_RCL_IDX_VERSION);
|
874 |
LOGDEB("Rcl::Db:close: xapian will close. May take some time\n" );
|
890 |
LOGDEB("Rcl::Db:close: xapian will close. May take some time\n");
|
875 |
}
|
891 |
}
|
876 |
deleteZ(m_ndb);
|
892 |
deleteZ(m_ndb);
|
877 |
if (w)
|
893 |
if (w)
|
878 |
LOGDEB("Rcl::Db:close() xapian close done.\n" );
|
894 |
LOGDEB("Rcl::Db:close() xapian close done.\n");
|
879 |
if (final) {
|
895 |
if (final) {
|
880 |
return true;
|
896 |
return true;
|
881 |
}
|
897 |
}
|
882 |
m_ndb = new Native(this);
|
898 |
m_ndb = new Native(this);
|
883 |
if (m_ndb) {
|
899 |
if (m_ndb) {
|
884 |
return true;
|
900 |
return true;
|
885 |
}
|
901 |
}
|
886 |
LOGERR("Rcl::Db::close(): cant recreate db object\n" );
|
902 |
LOGERR("Rcl::Db::close(): cant recreate db object\n");
|
887 |
return false;
|
903 |
return false;
|
888 |
} XCATCHERROR(ermsg);
|
904 |
} XCATCHERROR(ermsg);
|
889 |
LOGERR("Db:close: exception while deleting db: " << (ermsg) << "\n" );
|
905 |
LOGERR("Db:close: exception while deleting db: " << ermsg << "\n");
|
890 |
return false;
|
906 |
return false;
|
891 |
}
|
907 |
}
|
892 |
|
908 |
|
893 |
// Reopen the db with a changed list of additional dbs
|
909 |
// Reopen the db with a changed list of additional dbs
|
894 |
bool Db::adjustdbs()
|
910 |
bool Db::adjustdbs()
|
895 |
{
|
911 |
{
|
896 |
if (m_mode != DbRO) {
|
912 |
if (m_mode != DbRO) {
|
897 |
LOGERR("Db::adjustdbs: mode not RO\n" );
|
913 |
LOGERR("Db::adjustdbs: mode not RO\n");
|
898 |
return false;
|
914 |
return false;
|
899 |
}
|
915 |
}
|
900 |
if (m_ndb && m_ndb->m_isopen) {
|
916 |
if (m_ndb && m_ndb->m_isopen) {
|
901 |
if (!close())
|
917 |
if (!close())
|
902 |
return false;
|
918 |
return false;
|
|
... |
|
... |
914 |
return -1;
|
930 |
return -1;
|
915 |
|
931 |
|
916 |
XAPTRY(res = m_ndb->xrdb.get_doccount(), m_ndb->xrdb, m_reason);
|
932 |
XAPTRY(res = m_ndb->xrdb.get_doccount(), m_ndb->xrdb, m_reason);
|
917 |
|
933 |
|
918 |
if (!m_reason.empty()) {
|
934 |
if (!m_reason.empty()) {
|
919 |
LOGERR("Db::docCnt: got error: " << (m_reason) << "\n" );
|
935 |
LOGERR("Db::docCnt: got error: " << m_reason << "\n");
|
920 |
return -1;
|
936 |
return -1;
|
921 |
}
|
937 |
}
|
922 |
return res;
|
938 |
return res;
|
923 |
}
|
939 |
}
|
924 |
|
940 |
|
|
... |
|
... |
929 |
return -1;
|
945 |
return -1;
|
930 |
|
946 |
|
931 |
string term = _term;
|
947 |
string term = _term;
|
932 |
if (o_index_stripchars)
|
948 |
if (o_index_stripchars)
|
933 |
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
949 |
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
934 |
LOGINFO("Db::termDocCnt: unac failed for [" << (_term) << "]\n" );
|
950 |
LOGINFO("Db::termDocCnt: unac failed for [" << _term << "]\n");
|
935 |
return 0;
|
951 |
return 0;
|
936 |
}
|
952 |
}
|
937 |
|
953 |
|
938 |
if (m_stops.isStop(term)) {
|
954 |
if (m_stops.isStop(term)) {
|
939 |
LOGDEB1("Db::termDocCnt [" << (term) << "] in stop list\n" );
|
955 |
LOGDEB1("Db::termDocCnt [" << term << "] in stop list\n");
|
940 |
return 0;
|
956 |
return 0;
|
941 |
}
|
957 |
}
|
942 |
|
958 |
|
943 |
XAPTRY(res = m_ndb->xrdb.get_termfreq(term), m_ndb->xrdb, m_reason);
|
959 |
XAPTRY(res = m_ndb->xrdb.get_termfreq(term), m_ndb->xrdb, m_reason);
|
944 |
|
960 |
|
945 |
if (!m_reason.empty()) {
|
961 |
if (!m_reason.empty()) {
|
946 |
LOGERR("Db::termDocCnt: got error: " << (m_reason) << "\n" );
|
962 |
LOGERR("Db::termDocCnt: got error: " << m_reason << "\n");
|
947 |
return -1;
|
963 |
return -1;
|
948 |
}
|
964 |
}
|
949 |
return res;
|
965 |
return res;
|
950 |
}
|
966 |
}
|
951 |
|
967 |
|
952 |
bool Db::addQueryDb(const string &_dir)
|
968 |
bool Db::addQueryDb(const string &_dir)
|
953 |
{
|
969 |
{
|
954 |
string dir = _dir;
|
970 |
string dir = _dir;
|
955 |
LOGDEB0("Db::addQueryDb: ndb " << (m_ndb) << " iswritable " << ((m_ndb)?m_ndb->m_iswritable:0) << " db [" << (dir) << "]\n" );
|
971 |
LOGDEB0("Db::addQueryDb: ndb " << m_ndb << " iswritable " <<
|
|
|
972 |
((m_ndb)?m_ndb->m_iswritable:0) << " db [" << dir << "]\n");
|
956 |
if (!m_ndb)
|
973 |
if (!m_ndb)
|
957 |
return false;
|
974 |
return false;
|
958 |
if (m_ndb->m_iswritable)
|
975 |
if (m_ndb->m_iswritable)
|
959 |
return false;
|
976 |
return false;
|
960 |
dir = path_canon(dir);
|
977 |
dir = path_canon(dir);
|
|
... |
|
... |
990 |
return m_ndb->whatDbIdx(doc.xdocid);
|
1007 |
return m_ndb->whatDbIdx(doc.xdocid);
|
991 |
}
|
1008 |
}
|
992 |
|
1009 |
|
993 |
size_t Db::Native::whatDbIdx(Xapian::docid id)
|
1010 |
size_t Db::Native::whatDbIdx(Xapian::docid id)
|
994 |
{
|
1011 |
{
|
995 |
LOGDEB1("Db::whatDbIdx: xdocid " << ((unsigned long)id) << ", " <<
|
1012 |
LOGDEB1("Db::whatDbIdx: xdocid " << id << ", " <<
|
996 |
(m_rcldb->m_extraDbs.size()) << " extraDbs\n" );
|
1013 |
m_rcldb->m_extraDbs.size() << " extraDbs\n");
|
997 |
if (id == 0)
|
1014 |
if (id == 0)
|
998 |
return (size_t)-1;
|
1015 |
return (size_t)-1;
|
999 |
if (m_rcldb->m_extraDbs.size() == 0)
|
1016 |
if (m_rcldb->m_extraDbs.size() == 0)
|
1000 |
return 0;
|
1017 |
return 0;
|
1001 |
return (id - 1) % (m_rcldb->m_extraDbs.size() + 1);
|
1018 |
return (id - 1) % (m_rcldb->m_extraDbs.size() + 1);
|
|
... |
|
... |
1003 |
|
1020 |
|
1004 |
bool Db::testDbDir(const string &dir, bool *stripped_p)
|
1021 |
bool Db::testDbDir(const string &dir, bool *stripped_p)
|
1005 |
{
|
1022 |
{
|
1006 |
string aerr;
|
1023 |
string aerr;
|
1007 |
bool mstripped = true;
|
1024 |
bool mstripped = true;
|
1008 |
LOGDEB("Db::testDbDir: [" << (dir) << "]\n" );
|
1025 |
LOGDEB("Db::testDbDir: [" << dir << "]\n");
|
1009 |
try {
|
1026 |
try {
|
1010 |
Xapian::Database db(dir);
|
1027 |
Xapian::Database db(dir);
|
1011 |
// If we have terms with a leading ':' it's an
|
1028 |
// If we have terms with a leading ':' it's an
|
1012 |
// unstripped index
|
1029 |
// unstripped index
|
1013 |
Xapian::TermIterator term = db.allterms_begin(":");
|
1030 |
Xapian::TermIterator term = db.allterms_begin(":");
|
|
... |
|
... |
1015 |
mstripped = true;
|
1032 |
mstripped = true;
|
1016 |
else
|
1033 |
else
|
1017 |
mstripped = false;
|
1034 |
mstripped = false;
|
1018 |
} XCATCHERROR(aerr);
|
1035 |
} XCATCHERROR(aerr);
|
1019 |
if (!aerr.empty()) {
|
1036 |
if (!aerr.empty()) {
|
1020 |
LOGERR("Db::Open: error while trying to open database from [" << (dir) << "]: " << (aerr) << "\n" );
|
1037 |
LOGERR("Db::Open: error while trying to open database from [" <<
|
|
|
1038 |
dir << "]: " << aerr << "\n");
|
1021 |
return false;
|
1039 |
return false;
|
1022 |
}
|
1040 |
}
|
1023 |
if (stripped_p)
|
1041 |
if (stripped_p)
|
1024 |
*stripped_p = mstripped;
|
1042 |
*stripped_p = mstripped;
|
1025 |
|
1043 |
|
|
... |
|
... |
1076 |
// Index the possibly prefixed start term.
|
1094 |
// Index the possibly prefixed start term.
|
1077 |
doc.add_posting(ft.pfx + start_of_field_term, basepos, ft.wdfinc);
|
1095 |
doc.add_posting(ft.pfx + start_of_field_term, basepos, ft.wdfinc);
|
1078 |
++basepos;
|
1096 |
++basepos;
|
1079 |
} XCATCHERROR(ermsg);
|
1097 |
} XCATCHERROR(ermsg);
|
1080 |
if (!ermsg.empty()) {
|
1098 |
if (!ermsg.empty()) {
|
1081 |
LOGERR("Db: xapian add_posting error " << (ermsg) << "\n" );
|
1099 |
LOGERR("Db: xapian add_posting error " << ermsg << "\n");
|
1082 |
goto out;
|
1100 |
goto out;
|
1083 |
}
|
1101 |
}
|
1084 |
|
1102 |
|
1085 |
if (!TextSplitP::text_to_words(in)) {
|
1103 |
if (!TextSplitP::text_to_words(in)) {
|
1086 |
LOGDEB("TextSplitDb: TextSplit::text_to_words failed\n" );
|
1104 |
LOGDEB("TextSplitDb: TextSplit::text_to_words failed\n");
|
1087 |
goto out;
|
1105 |
goto out;
|
1088 |
}
|
1106 |
}
|
1089 |
|
1107 |
|
1090 |
try {
|
1108 |
try {
|
1091 |
// Index the possibly prefixed end term.
|
1109 |
// Index the possibly prefixed end term.
|
1092 |
doc.add_posting(ft.pfx + end_of_field_term, basepos + curpos + 1,
|
1110 |
doc.add_posting(ft.pfx + end_of_field_term, basepos + curpos + 1,
|
1093 |
ft.wdfinc);
|
1111 |
ft.wdfinc);
|
1094 |
++basepos;
|
1112 |
++basepos;
|
1095 |
} XCATCHERROR(ermsg);
|
1113 |
} XCATCHERROR(ermsg);
|
1096 |
if (!ermsg.empty()) {
|
1114 |
if (!ermsg.empty()) {
|
1097 |
LOGERR("Db: xapian add_posting error " << (ermsg) << "\n" );
|
1115 |
LOGERR("Db: xapian add_posting error " << ermsg << "\n");
|
1098 |
goto out;
|
1116 |
goto out;
|
1099 |
}
|
1117 |
}
|
1100 |
|
1118 |
|
1101 |
out:
|
1119 |
out:
|
1102 |
basepos += curpos + 100;
|
1120 |
basepos += curpos + 100;
|
|
... |
|
... |
1132 |
if (term.empty())
|
1150 |
if (term.empty())
|
1133 |
return true;
|
1151 |
return true;
|
1134 |
string ermsg;
|
1152 |
string ermsg;
|
1135 |
try {
|
1153 |
try {
|
1136 |
// Index without prefix, using the field-specific weighting
|
1154 |
// Index without prefix, using the field-specific weighting
|
1137 |
LOGDEB1("Emitting term at " << pos << " : [" << term << "]\n" );
|
1155 |
LOGDEB1("Emitting term at " << pos << " : [" << term << "]\n");
|
1138 |
if (!m_ts->ft.pfxonly)
|
1156 |
if (!m_ts->ft.pfxonly)
|
1139 |
m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
|
1157 |
m_ts->doc.add_posting(term, pos, m_ts->ft.wdfinc);
|
1140 |
|
1158 |
|
1141 |
#ifdef TESTING_XAPIAN_SPELL
|
1159 |
#ifdef TESTING_XAPIAN_SPELL
|
1142 |
if (Db::isSpellingCandidate(term, false)) {
|
1160 |
if (Db::isSpellingCandidate(term, false)) {
|
|
... |
|
... |
1148 |
m_ts->doc.add_posting(m_ts->ft.pfx + term, pos,
|
1166 |
m_ts->doc.add_posting(m_ts->ft.pfx + term, pos,
|
1149 |
m_ts->ft.wdfinc);
|
1167 |
m_ts->ft.wdfinc);
|
1150 |
}
|
1168 |
}
|
1151 |
return true;
|
1169 |
return true;
|
1152 |
} XCATCHERROR(ermsg);
|
1170 |
} XCATCHERROR(ermsg);
|
1153 |
LOGERR("Db: xapian add_posting error " << (ermsg) << "\n" );
|
1171 |
LOGERR("Db: xapian add_posting error " << ermsg << "\n");
|
1154 |
return false;
|
1172 |
return false;
|
1155 |
}
|
1173 |
}
|
1156 |
void newpage(int pos)
|
1174 |
void newpage(int pos)
|
1157 |
{
|
1175 |
{
|
1158 |
pos += m_ts->basepos;
|
1176 |
pos += m_ts->basepos;
|
1159 |
if (pos < int(baseTextPosition)) {
|
1177 |
if (pos < int(baseTextPosition)) {
|
1160 |
LOGDEB("newpage: not in body: " << (pos) << "\n" );
|
1178 |
LOGDEB("newpage: not in body: " << pos << "\n");
|
1161 |
return;
|
1179 |
return;
|
1162 |
}
|
1180 |
}
|
1163 |
|
1181 |
|
1164 |
m_ts->doc.add_posting(m_ts->ft.pfx + page_break_term, pos);
|
1182 |
m_ts->doc.add_posting(m_ts->ft.pfx + page_break_term, pos);
|
1165 |
if (pos == m_lastpagepos) {
|
1183 |
if (pos == m_lastpagepos) {
|
1166 |
m_pageincr++;
|
1184 |
m_pageincr++;
|
1167 |
LOGDEB2("newpage: same pos, pageincr " << (m_pageincr) << " lastpagepos " << (m_lastpagepos) << "\n" );
|
1185 |
LOGDEB2("newpage: same pos, pageincr " << m_pageincr <<
|
|
|
1186 |
" lastpagepos " << m_lastpagepos << "\n");
|
1168 |
} else {
|
1187 |
} else {
|
1169 |
LOGDEB2("newpage: pos change, pageincr " << (m_pageincr) << " lastpagepos " << (m_lastpagepos) << "\n" );
|
1188 |
LOGDEB2("newpage: pos change, pageincr " << m_pageincr <<
|
|
|
1189 |
" lastpagepos " << m_lastpagepos << "\n");
|
1170 |
if (m_pageincr > 0) {
|
1190 |
if (m_pageincr > 0) {
|
1171 |
// Remember the multiple page break at this position
|
1191 |
// Remember the multiple page break at this position
|
1172 |
unsigned int relpos = m_lastpagepos - baseTextPosition;
|
1192 |
unsigned int relpos = m_lastpagepos - baseTextPosition;
|
1173 |
LOGDEB2("Remembering multiple page break. Relpos " << (relpos) << " cnt " << (m_pageincr) << "\n" );
|
1193 |
LOGDEB2("Remembering multiple page break. Relpos " << relpos <<
|
|
|
1194 |
" cnt " << m_pageincr << "\n");
|
1174 |
m_pageincrvec.push_back(pair<int, int>(relpos, m_pageincr));
|
1195 |
m_pageincrvec.push_back(pair<int, int>(relpos, m_pageincr));
|
1175 |
}
|
1196 |
}
|
1176 |
m_pageincr = 0;
|
1197 |
m_pageincr = 0;
|
1177 |
}
|
1198 |
}
|
1178 |
m_lastpagepos = pos;
|
1199 |
m_lastpagepos = pos;
|
|
... |
|
... |
1180 |
|
1201 |
|
1181 |
virtual bool flush()
|
1202 |
virtual bool flush()
|
1182 |
{
|
1203 |
{
|
1183 |
if (m_pageincr > 0) {
|
1204 |
if (m_pageincr > 0) {
|
1184 |
unsigned int relpos = m_lastpagepos - baseTextPosition;
|
1205 |
unsigned int relpos = m_lastpagepos - baseTextPosition;
|
1185 |
LOGDEB2("Remembering multiple page break. Position " << (relpos) << " cnt " << (m_pageincr) << "\n" );
|
1206 |
LOGDEB2("Remembering multiple page break. Position " << relpos <<
|
|
|
1207 |
" cnt " << m_pageincr << "\n");
|
1186 |
m_pageincrvec.push_back(pair<int, int>(relpos, m_pageincr));
|
1208 |
m_pageincrvec.push_back(pair<int, int>(relpos, m_pageincr));
|
1187 |
m_pageincr = 0;
|
1209 |
m_pageincr = 0;
|
1188 |
}
|
1210 |
}
|
1189 |
return TermProc::flush();
|
1211 |
return TermProc::flush();
|
1190 |
}
|
1212 |
}
|
|
... |
|
... |
1201 |
|
1223 |
|
1202 |
// At the moment, we normally use the Xapian speller for Katakana and
|
1224 |
// At the moment, we normally use the Xapian speller for Katakana and
|
1203 |
// aspell for everything else
|
1225 |
// aspell for everything else
|
1204 |
bool Db::getSpellingSuggestions(const string& word, vector<string>& suggs)
|
1226 |
bool Db::getSpellingSuggestions(const string& word, vector<string>& suggs)
|
1205 |
{
|
1227 |
{
|
1206 |
LOGDEB("Db::getSpellingSuggestions:[" << word << "]\n" );
|
1228 |
LOGDEB("Db::getSpellingSuggestions:[" << word << "]\n");
|
1207 |
suggs.clear();
|
1229 |
suggs.clear();
|
1208 |
if (nullptr == m_ndb) {
|
1230 |
if (nullptr == m_ndb) {
|
1209 |
return false;
|
1231 |
return false;
|
1210 |
}
|
1232 |
}
|
1211 |
|
1233 |
|
|
... |
|
... |
1223 |
m_aspell = new Aspell(m_config);
|
1245 |
m_aspell = new Aspell(m_config);
|
1224 |
if (m_aspell) {
|
1246 |
if (m_aspell) {
|
1225 |
string reason;
|
1247 |
string reason;
|
1226 |
m_aspell->init(reason);
|
1248 |
m_aspell->init(reason);
|
1227 |
if (!m_aspell->ok()) {
|
1249 |
if (!m_aspell->ok()) {
|
1228 |
LOGDEB(("Aspell speller init failed %s\n", reason.c_str()));
|
1250 |
LOGDEB("Aspell speller init failed: " << reason << endl);
|
1229 |
delete m_aspell;
|
1251 |
delete m_aspell;
|
1230 |
m_aspell = 0;
|
1252 |
m_aspell = 0;
|
1231 |
}
|
1253 |
}
|
1232 |
}
|
1254 |
}
|
1233 |
}
|
1255 |
}
|
|
... |
|
... |
1393 |
if (!meta_it->second.empty()) {
|
1415 |
if (!meta_it->second.empty()) {
|
1394 |
const FieldTraits *ftp;
|
1416 |
const FieldTraits *ftp;
|
1395 |
// We don't test for an empty prefix here. Some fields are part
|
1417 |
// We don't test for an empty prefix here. Some fields are part
|
1396 |
// of the internal conf with an empty prefix (ie: abstract).
|
1418 |
// of the internal conf with an empty prefix (ie: abstract).
|
1397 |
if (!fieldToTraits(meta_it->first, &ftp)) {
|
1419 |
if (!fieldToTraits(meta_it->first, &ftp)) {
|
1398 |
LOGDEB0("Db::add: no prefix for field [" << (meta_it->first) << "], no indexing\n" );
|
1420 |
LOGDEB0("Db::add: no prefix for field [" <<
|
|
|
1421 |
meta_it->first << "], no indexing\n");
|
1399 |
continue;
|
1422 |
continue;
|
1400 |
}
|
1423 |
}
|
1401 |
LOGDEB0("Db::add: field [" << (meta_it->first) << "] pfx [" << (ftp->pfx) << "] inc " << (ftp->wdfinc) << ": [" << (meta_it->second) << "]\n" );
|
1424 |
LOGDEB0("Db::add: field [" << meta_it->first << "] pfx [" <<
|
|
|
1425 |
ftp->pfx << "] inc " << ftp->wdfinc << ": [" <<
|
|
|
1426 |
meta_it->second << "]\n");
|
1402 |
splitter.setTraits(*ftp);
|
1427 |
splitter.setTraits(*ftp);
|
1403 |
if (!splitter.text_to_words(meta_it->second))
|
1428 |
if (!splitter.text_to_words(meta_it->second)) {
|
1404 |
LOGDEB("Db::addOrUpdate: split failed for " << (meta_it->first) << "\n" );
|
1429 |
LOGDEB("Db::addOrUpdate: split failed for " <<
|
|
|
1430 |
meta_it->first << "\n");
|
|
|
1431 |
}
|
1405 |
}
|
1432 |
}
|
1406 |
}
|
1433 |
}
|
1407 |
|
1434 |
|
1408 |
// Reset to no prefix and default params
|
1435 |
// Reset to no prefix and default params
|
1409 |
splitter.setTraits(FieldTraits());
|
1436 |
splitter.setTraits(FieldTraits());
|
1410 |
|
1437 |
|
1411 |
if (splitter.curpos < baseTextPosition)
|
1438 |
if (splitter.curpos < baseTextPosition)
|
1412 |
splitter.basepos = baseTextPosition;
|
1439 |
splitter.basepos = baseTextPosition;
|
1413 |
|
1440 |
|
1414 |
// Split and index body text
|
1441 |
// Split and index body text
|
1415 |
LOGDEB2("Db::add: split body: [" << (doc.text) << "]\n" );
|
1442 |
LOGDEB2("Db::add: split body: [" << doc.text << "]\n");
|
1416 |
|
1443 |
|
1417 |
#ifdef TEXTSPLIT_STATS
|
1444 |
#ifdef TEXTSPLIT_STATS
|
1418 |
splitter.resetStats();
|
1445 |
splitter.resetStats();
|
1419 |
#endif
|
1446 |
#endif
|
1420 |
if (!splitter.text_to_words(doc.text))
|
1447 |
if (!splitter.text_to_words(doc.text))
|
1421 |
LOGDEB("Db::addOrUpdate: split failed for main text\n" );
|
1448 |
LOGDEB("Db::addOrUpdate: split failed for main text\n");
|
1422 |
|
1449 |
|
1423 |
#ifdef TEXTSPLIT_STATS
|
1450 |
#ifdef TEXTSPLIT_STATS
|
1424 |
// Reject bad data. unrecognized base64 text is characterized by
|
1451 |
// Reject bad data. unrecognized base64 text is characterized by
|
1425 |
// high avg word length and high variation (because there are
|
1452 |
// high avg word length and high variation (because there are
|
1426 |
// word-splitters like +/ inside the data).
|
1453 |
// word-splitters like +/ inside the data).
|
1427 |
TextSplit::Stats::Values v = splitter.getStats();
|
1454 |
TextSplit::Stats::Values v = splitter.getStats();
|
1428 |
// v.avglen > 15 && v.sigma > 12
|
1455 |
// v.avglen > 15 && v.sigma > 12
|
1429 |
if (v.count > 200 && (v.avglen > 10 && v.sigma / v.avglen > 0.8)) {
|
1456 |
if (v.count > 200 && (v.avglen > 10 && v.sigma / v.avglen > 0.8)) {
|
1430 |
LOGINFO("RclDb::addOrUpdate: rejecting doc for bad stats count " << (v.count) << " avglen " << (v.avglen) << " sigma " << (v.sigma) << " url [" << (doc.url) << "] ipath [" << (doc.ipath) << "] text " << (doc.text) << "\n" );
|
1457 |
LOGINFO("RclDb::addOrUpdate: rejecting doc for bad stats count " <<
|
|
|
1458 |
v.count << " avglen " << v.avglen << " sigma " << v.sigma <<
|
|
|
1459 |
" url [" << doc.url << "] ipath [" << doc.ipath <<
|
|
|
1460 |
"] text " << doc.text << "\n");
|
1431 |
delete newdocument_ptr;
|
1461 |
delete newdocument_ptr;
|
1432 |
return true;
|
1462 |
return true;
|
1433 |
}
|
1463 |
}
|
1434 |
#endif
|
1464 |
#endif
|
1435 |
|
1465 |
|
|
... |
|
... |
1624 |
MD5HexScan(*md5, digest);
|
1654 |
MD5HexScan(*md5, digest);
|
1625 |
newdocument.add_value(VALUE_MD5, digest);
|
1655 |
newdocument.add_value(VALUE_MD5, digest);
|
1626 |
newdocument.add_boolean_term(wrap_prefix("XM") + *md5);
|
1656 |
newdocument.add_boolean_term(wrap_prefix("XM") + *md5);
|
1627 |
}
|
1657 |
}
|
1628 |
|
1658 |
|
1629 |
LOGDEB0("Rcl::Db::add: new doc record:\n" << (record) << "\n" );
|
1659 |
LOGDEB0("Rcl::Db::add: new doc record:\n" << record << "\n");
|
1630 |
newdocument.set_data(record);
|
1660 |
newdocument.set_data(record);
|
1631 |
}
|
1661 |
}
|
1632 |
#ifdef IDX_THREADS
|
1662 |
#ifdef IDX_THREADS
|
1633 |
if (m_ndb->m_havewriteq) {
|
1663 |
if (m_ndb->m_havewriteq) {
|
1634 |
DbUpdTask *tp = new DbUpdTask(DbUpdTask::AddOrUpdate, udi, uniterm,
|
1664 |
DbUpdTask *tp = new DbUpdTask(DbUpdTask::AddOrUpdate, udi, uniterm,
|
1635 |
newdocument_ptr, doc.text.length());
|
1665 |
newdocument_ptr, doc.text.length());
|
1636 |
if (!m_ndb->m_wqueue.put(tp)) {
|
1666 |
if (!m_ndb->m_wqueue.put(tp)) {
|
1637 |
LOGERR("Db::addOrUpdate:Cant queue task\n" );
|
1667 |
LOGERR("Db::addOrUpdate:Cant queue task\n");
|
1638 |
delete newdocument_ptr;
|
1668 |
delete newdocument_ptr;
|
1639 |
return false;
|
1669 |
return false;
|
1640 |
} else {
|
1670 |
} else {
|
1641 |
return true;
|
1671 |
return true;
|
1642 |
}
|
1672 |
}
|
|
... |
|
... |
1648 |
}
|
1678 |
}
|
1649 |
|
1679 |
|
1650 |
bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi,
|
1680 |
bool Db::Native::docToXdocXattrOnly(TextSplitDb *splitter, const string &udi,
|
1651 |
Doc &doc, Xapian::Document& xdoc)
|
1681 |
Doc &doc, Xapian::Document& xdoc)
|
1652 |
{
|
1682 |
{
|
1653 |
LOGDEB0("Db::docToXdocXattrOnly\n" );
|
1683 |
LOGDEB0("Db::docToXdocXattrOnly\n");
|
1654 |
#ifdef IDX_THREADS
|
1684 |
#ifdef IDX_THREADS
|
1655 |
std::unique_lock<std::mutex> lock(m_mutex);
|
1685 |
std::unique_lock<std::mutex> lock(m_mutex);
|
1656 |
#endif
|
1686 |
#endif
|
1657 |
|
1687 |
|
1658 |
// Read existing document and its data record
|
1688 |
// Read existing document and its data record
|
1659 |
if (getDoc(udi, 0, xdoc) == 0) {
|
1689 |
if (getDoc(udi, 0, xdoc) == 0) {
|
1660 |
LOGERR("docToXdocXattrOnly: existing doc not found\n" );
|
1690 |
LOGERR("docToXdocXattrOnly: existing doc not found\n");
|
1661 |
return false;
|
1691 |
return false;
|
1662 |
}
|
1692 |
}
|
1663 |
string data;
|
1693 |
string data;
|
1664 |
XAPTRY(data = xdoc.get_data(), xrdb, m_rcldb->m_reason);
|
1694 |
XAPTRY(data = xdoc.get_data(), xrdb, m_rcldb->m_reason);
|
1665 |
if (!m_rcldb->m_reason.empty()) {
|
1695 |
if (!m_rcldb->m_reason.empty()) {
|
1666 |
LOGERR("Db::xattrOnly: got error: " << (m_rcldb->m_reason) << "\n" );
|
1696 |
LOGERR("Db::xattrOnly: got error: " << m_rcldb->m_reason << "\n");
|
1667 |
return false;
|
1697 |
return false;
|
1668 |
}
|
1698 |
}
|
1669 |
|
1699 |
|
1670 |
// Clear the term lists for the incoming fields and index the new values
|
1700 |
// Clear the term lists for the incoming fields and index the new values
|
1671 |
map<string, string>::iterator meta_it;
|
1701 |
map<string, string>::iterator meta_it;
|
1672 |
for (meta_it = doc.meta.begin(); meta_it != doc.meta.end(); meta_it++) {
|
1702 |
for (meta_it = doc.meta.begin(); meta_it != doc.meta.end(); meta_it++) {
|
1673 |
const FieldTraits *ftp;
|
1703 |
const FieldTraits *ftp;
|
1674 |
if (!m_rcldb->fieldToTraits(meta_it->first, &ftp) || ftp->pfx.empty()) {
|
1704 |
if (!m_rcldb->fieldToTraits(meta_it->first, &ftp) || ftp->pfx.empty()) {
|
1675 |
LOGDEB0("Db::xattrOnly: no prefix for field [" << (meta_it->first) << "], skipped\n" );
|
1705 |
LOGDEB0("Db::xattrOnly: no prefix for field [" <<
|
|
|
1706 |
meta_it->first << "], skipped\n");
|
1676 |
continue;
|
1707 |
continue;
|
1677 |
}
|
1708 |
}
|
1678 |
// Clear the previous terms for the field
|
1709 |
// Clear the previous terms for the field
|
1679 |
clearField(xdoc, ftp->pfx, ftp->wdfinc);
|
1710 |
clearField(xdoc, ftp->pfx, ftp->wdfinc);
|
1680 |
LOGDEB0("Db::xattrOnly: field [" << (meta_it->first) << "] pfx [" << (ftp->pfx) << "] inc " << (ftp->wdfinc) << ": [" << (meta_it->second) << "]\n" );
|
1711 |
LOGDEB0("Db::xattrOnly: field [" << meta_it->first << "] pfx [" <<
|
|
|
1712 |
ftp->pfx << "] inc " << ftp->wdfinc << ": [" <<
|
|
|
1713 |
meta_it->second << "]\n");
|
1681 |
splitter->setTraits(*ftp);
|
1714 |
splitter->setTraits(*ftp);
|
1682 |
if (!splitter->text_to_words(meta_it->second))
|
1715 |
if (!splitter->text_to_words(meta_it->second)) {
|
1683 |
LOGDEB("Db::xattrOnly: split failed for " << (meta_it->first) << "\n" );
|
1716 |
LOGDEB("Db::xattrOnly: split failed for " << meta_it->first << "\n");
|
|
|
1717 |
}
|
1684 |
}
|
1718 |
}
|
1685 |
xdoc.add_value(VALUE_SIG, doc.sig);
|
1719 |
xdoc.add_value(VALUE_SIG, doc.sig);
|
1686 |
|
1720 |
|
1687 |
// Parse current data record into a dict for ease of processing
|
1721 |
// Parse current data record into a dict for ease of processing
|
1688 |
ConfSimple datadic(data);
|
1722 |
ConfSimple datadic(data);
|
1689 |
if (!datadic.ok()) {
|
1723 |
if (!datadic.ok()) {
|
1690 |
LOGERR("db::docToXdocXattrOnly: failed turning data rec to dict\n" );
|
1724 |
LOGERR("db::docToXdocXattrOnly: failed turning data rec to dict\n");
|
1691 |
return false;
|
1725 |
return false;
|
1692 |
}
|
1726 |
}
|
1693 |
|
1727 |
|
1694 |
// For each "stored" field, check if set in doc metadata and
|
1728 |
// For each "stored" field, check if set in doc metadata and
|
1695 |
// update the value if it is
|
1729 |
// update the value if it is
|
|
... |
|
... |
1730 |
string ermsg;
|
1764 |
string ermsg;
|
1731 |
try {
|
1765 |
try {
|
1732 |
m_ndb->xwdb.commit();
|
1766 |
m_ndb->xwdb.commit();
|
1733 |
} XCATCHERROR(ermsg);
|
1767 |
} XCATCHERROR(ermsg);
|
1734 |
if (!ermsg.empty()) {
|
1768 |
if (!ermsg.empty()) {
|
1735 |
LOGERR("Db::waitUpdIdle: flush() failed: " << (ermsg) << "\n" );
|
1769 |
LOGERR("Db::waitUpdIdle: flush() failed: " << ermsg << "\n");
|
1736 |
}
|
1770 |
}
|
1737 |
m_ndb->m_totalworkns += chron.nanos();
|
1771 |
m_ndb->m_totalworkns += chron.nanos();
|
1738 |
LOGINFO("Db::waitUpdIdle: total xapian work " << (lltodecstr(m_ndb->m_totalworkns/1000000)) << " mS\n" );
|
1772 |
LOGINFO("Db::waitUpdIdle: total xapian work " <<
|
|
|
1773 |
lltodecstr(m_ndb->m_totalworkns/1000000) << " mS\n");
|
1739 |
}
|
1774 |
}
|
1740 |
}
|
1775 |
}
|
1741 |
#endif
|
1776 |
#endif
|
1742 |
|
1777 |
|
1743 |
// Flush when idxflushmbs is reached
|
1778 |
// Flush when idxflushmbs is reached
|
1744 |
bool Db::maybeflush(int64_t moretext)
|
1779 |
bool Db::maybeflush(int64_t moretext)
|
1745 |
{
|
1780 |
{
|
1746 |
if (m_flushMb > 0) {
|
1781 |
if (m_flushMb > 0) {
|
1747 |
m_curtxtsz += moretext;
|
1782 |
m_curtxtsz += moretext;
|
1748 |
if ((m_curtxtsz - m_flushtxtsz) / MB >= m_flushMb) {
|
1783 |
if ((m_curtxtsz - m_flushtxtsz) / MB >= m_flushMb) {
|
1749 |
LOGDEB("Db::add/delete: txt size >= " << (m_flushMb) << " Mb, flushing\n" );
|
1784 |
LOGDEB("Db::add/delete: txt size >= " << m_flushMb <<
|
|
|
1785 |
" Mb, flushing\n");
|
1750 |
return doFlush();
|
1786 |
return doFlush();
|
1751 |
}
|
1787 |
}
|
1752 |
}
|
1788 |
}
|
1753 |
return true;
|
1789 |
return true;
|
1754 |
}
|
1790 |
}
|
1755 |
|
1791 |
|
1756 |
bool Db::doFlush()
|
1792 |
bool Db::doFlush()
|
1757 |
{
|
1793 |
{
|
1758 |
if (!m_ndb) {
|
1794 |
if (!m_ndb) {
|
1759 |
LOGERR("Db::doFLush: no ndb??\n" );
|
1795 |
LOGERR("Db::doFLush: no ndb??\n");
|
1760 |
return false;
|
1796 |
return false;
|
1761 |
}
|
1797 |
}
|
1762 |
string ermsg;
|
1798 |
string ermsg;
|
1763 |
try {
|
1799 |
try {
|
1764 |
m_ndb->xwdb.commit();
|
1800 |
m_ndb->xwdb.commit();
|
1765 |
} XCATCHERROR(ermsg);
|
1801 |
} XCATCHERROR(ermsg);
|
1766 |
if (!ermsg.empty()) {
|
1802 |
if (!ermsg.empty()) {
|
1767 |
LOGERR("Db::doFlush: flush() failed: " << (ermsg) << "\n" );
|
1803 |
LOGERR("Db::doFlush: flush() failed: " << ermsg << "\n");
|
1768 |
return false;
|
1804 |
return false;
|
1769 |
}
|
1805 |
}
|
1770 |
m_flushtxtsz = m_curtxtsz;
|
1806 |
m_flushtxtsz = m_curtxtsz;
|
1771 |
return true;
|
1807 |
return true;
|
1772 |
}
|
1808 |
}
|
|
... |
|
... |
1774 |
void Db::setExistingFlags(const string& udi, unsigned int docid)
|
1810 |
void Db::setExistingFlags(const string& udi, unsigned int docid)
|
1775 |
{
|
1811 |
{
|
1776 |
if (m_mode == DbRO)
|
1812 |
if (m_mode == DbRO)
|
1777 |
return;
|
1813 |
return;
|
1778 |
if (docid == (unsigned int)-1) {
|
1814 |
if (docid == (unsigned int)-1) {
|
1779 |
LOGERR("Db::setExistingFlags: called with bogus docid !!\n" );
|
1815 |
LOGERR("Db::setExistingFlags: called with bogus docid !!\n");
|
1780 |
return;
|
1816 |
return;
|
1781 |
}
|
1817 |
}
|
1782 |
#ifdef IDX_THREADS
|
1818 |
#ifdef IDX_THREADS
|
1783 |
std::unique_lock<std::mutex> lock(m_ndb->m_mutex);
|
1819 |
std::unique_lock<std::mutex> lock(m_ndb->m_mutex);
|
1784 |
#endif
|
1820 |
#endif
|
|
... |
|
... |
1787 |
|
1823 |
|
1788 |
void Db::i_setExistingFlags(const string& udi, unsigned int docid)
|
1824 |
void Db::i_setExistingFlags(const string& udi, unsigned int docid)
|
1789 |
{
|
1825 |
{
|
1790 |
// Set the up to date flag for the document and its subdocs
|
1826 |
// Set the up to date flag for the document and its subdocs
|
1791 |
if (docid >= updated.size()) {
|
1827 |
if (docid >= updated.size()) {
|
1792 |
LOGERR("needUpdate: existing docid beyond updated.size(). Udi [" << (udi) << "], docid " << (unsigned(docid)) << ", updated.size() " << ((unsigned)updated.size()) << "\n" );
|
1828 |
LOGERR("needUpdate: existing docid beyond updated.size(). Udi [" <<
|
|
|
1829 |
udi << "], docid " << docid << ", updated.size() " <<
|
|
|
1830 |
updated.size() << "\n");
|
1793 |
return;
|
1831 |
return;
|
1794 |
} else {
|
1832 |
} else {
|
1795 |
updated[docid] = true;
|
1833 |
updated[docid] = true;
|
1796 |
}
|
1834 |
}
|
1797 |
|
1835 |
|
1798 |
// Set the existence flag for all the subdocs (if any)
|
1836 |
// Set the existence flag for all the subdocs (if any)
|
1799 |
vector<Xapian::docid> docids;
|
1837 |
vector<Xapian::docid> docids;
|
1800 |
if (!m_ndb->subDocs(udi, 0, docids)) {
|
1838 |
if (!m_ndb->subDocs(udi, 0, docids)) {
|
1801 |
LOGERR("Rcl::Db::needUpdate: can't get subdocs\n" );
|
1839 |
LOGERR("Rcl::Db::needUpdate: can't get subdocs\n");
|
1802 |
return;
|
1840 |
return;
|
1803 |
}
|
1841 |
}
|
1804 |
for (vector<Xapian::docid>::iterator it = docids.begin();
|
1842 |
for (vector<Xapian::docid>::iterator it = docids.begin();
|
1805 |
it != docids.end(); it++) {
|
1843 |
it != docids.end(); it++) {
|
1806 |
if (*it < updated.size()) {
|
1844 |
if (*it < updated.size()) {
|
1807 |
LOGDEB2("Db::needUpdate: docid " << (*it) << " set\n" );
|
1845 |
LOGDEB2("Db::needUpdate: docid " << (*it) << " set\n");
|
1808 |
updated[*it] = true;
|
1846 |
updated[*it] = true;
|
1809 |
}
|
1847 |
}
|
1810 |
}
|
1848 |
}
|
1811 |
}
|
1849 |
}
|
1812 |
|
1850 |
|
|
... |
|
... |
1845 |
|
1883 |
|
1846 |
// Try to find the document indexed by the uniterm.
|
1884 |
// Try to find the document indexed by the uniterm.
|
1847 |
Xapian::PostingIterator docid;
|
1885 |
Xapian::PostingIterator docid;
|
1848 |
XAPTRY(docid = m_ndb->xrdb.postlist_begin(uniterm), m_ndb->xrdb, m_reason);
|
1886 |
XAPTRY(docid = m_ndb->xrdb.postlist_begin(uniterm), m_ndb->xrdb, m_reason);
|
1849 |
if (!m_reason.empty()) {
|
1887 |
if (!m_reason.empty()) {
|
1850 |
LOGERR("Db::needUpdate: xapian::postlist_begin failed: " << (m_reason) << "\n" );
|
1888 |
LOGERR("Db::needUpdate: xapian::postlist_begin failed: " <<
|
|
|
1889 |
m_reason << "\n");
|
1851 |
return false;
|
1890 |
return false;
|
1852 |
}
|
1891 |
}
|
1853 |
if (docid == m_ndb->xrdb.postlist_end(uniterm)) {
|
1892 |
if (docid == m_ndb->xrdb.postlist_end(uniterm)) {
|
1854 |
// No document exists with this path: we do need update
|
1893 |
// No document exists with this path: we do need update
|
1855 |
LOGDEB("Db::needUpdate:yes (new): [" << (uniterm) << "]\n" );
|
1894 |
LOGDEB("Db::needUpdate:yes (new): [" << uniterm << "]\n");
|
1856 |
return true;
|
1895 |
return true;
|
1857 |
}
|
1896 |
}
|
1858 |
Xapian::Document xdoc;
|
1897 |
Xapian::Document xdoc;
|
1859 |
XAPTRY(xdoc = m_ndb->xrdb.get_document(*docid), m_ndb->xrdb, m_reason);
|
1898 |
XAPTRY(xdoc = m_ndb->xrdb.get_document(*docid), m_ndb->xrdb, m_reason);
|
1860 |
if (!m_reason.empty()) {
|
1899 |
if (!m_reason.empty()) {
|
1861 |
LOGERR("Db::needUpdate: get_document error: " << (m_reason) << "\n" );
|
1900 |
LOGERR("Db::needUpdate: get_document error: " << m_reason << "\n");
|
1862 |
return true;
|
1901 |
return true;
|
1863 |
}
|
1902 |
}
|
1864 |
|
1903 |
|
1865 |
if (docidp) {
|
1904 |
if (docidp) {
|
1866 |
*docidp = *docid;
|
1905 |
*docidp = *docid;
|
|
... |
|
... |
1868 |
|
1907 |
|
1869 |
// Retrieve old file/doc signature from value
|
1908 |
// Retrieve old file/doc signature from value
|
1870 |
string osig;
|
1909 |
string osig;
|
1871 |
XAPTRY(osig = xdoc.get_value(VALUE_SIG), m_ndb->xrdb, m_reason);
|
1910 |
XAPTRY(osig = xdoc.get_value(VALUE_SIG), m_ndb->xrdb, m_reason);
|
1872 |
if (!m_reason.empty()) {
|
1911 |
if (!m_reason.empty()) {
|
1873 |
LOGERR("Db::needUpdate: get_value error: " << (m_reason) << "\n" );
|
1912 |
LOGERR("Db::needUpdate: get_value error: " << m_reason << "\n");
|
1874 |
return true;
|
1913 |
return true;
|
1875 |
}
|
1914 |
}
|
1876 |
LOGDEB2("Db::needUpdate: oldsig [" << (osig) << "] new [" << (sig) << "]\n" );
|
1915 |
LOGDEB2("Db::needUpdate: oldsig [" << osig << "] new [" << sig << "]\n");
|
1877 |
|
1916 |
|
1878 |
if (osigp) {
|
1917 |
if (osigp) {
|
1879 |
*osigp = osig;
|
1918 |
*osigp = osig;
|
1880 |
}
|
1919 |
}
|
1881 |
|
1920 |
|
1882 |
// Compare new/old sig
|
1921 |
// Compare new/old sig
|
1883 |
if (sig != osig) {
|
1922 |
if (sig != osig) {
|
1884 |
LOGDEB("Db::needUpdate:yes: olsig [" << (osig) << "] new [" << (sig) << "] [" << (uniterm) << "]\n" );
|
1923 |
LOGDEB("Db::needUpdate:yes: olsig [" << osig << "] new [" << sig <<
|
|
|
1924 |
"] [" << uniterm << "]\n");
|
1885 |
// Db is not up to date. Let's index the file
|
1925 |
// Db is not up to date. Let's index the file
|
1886 |
return true;
|
1926 |
return true;
|
1887 |
}
|
1927 |
}
|
1888 |
|
1928 |
|
1889 |
// Up to date. Set the existance flags in the map for the doc and
|
1929 |
// Up to date. Set the existance flags in the map for the doc and
|
1890 |
// its subdocs.
|
1930 |
// its subdocs.
|
1891 |
LOGDEB("Db::needUpdate:no: [" << (uniterm) << "]\n" );
|
1931 |
LOGDEB("Db::needUpdate:no: [" << uniterm << "]\n");
|
1892 |
i_setExistingFlags(udi, *docid);
|
1932 |
i_setExistingFlags(udi, *docid);
|
1893 |
return false;
|
1933 |
return false;
|
1894 |
}
|
1934 |
}
|
1895 |
|
1935 |
|
1896 |
// Return existing stem db languages
|
1936 |
// Return existing stem db languages
|
1897 |
vector<string> Db::getStemLangs()
|
1937 |
vector<string> Db::getStemLangs()
|
1898 |
{
|
1938 |
{
|
1899 |
LOGDEB("Db::getStemLang\n" );
|
1939 |
LOGDEB("Db::getStemLang\n");
|
1900 |
vector<string> langs;
|
1940 |
vector<string> langs;
|
1901 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1941 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1902 |
return langs;
|
1942 |
return langs;
|
1903 |
StemDb db(m_ndb->xrdb);
|
1943 |
StemDb db(m_ndb->xrdb);
|
1904 |
db.getMembers(langs);
|
1944 |
db.getMembers(langs);
|
|
... |
|
... |
1908 |
/**
|
1948 |
/**
|
1909 |
* Delete stem db for given language
|
1949 |
* Delete stem db for given language
|
1910 |
*/
|
1950 |
*/
|
1911 |
bool Db::deleteStemDb(const string& lang)
|
1951 |
bool Db::deleteStemDb(const string& lang)
|
1912 |
{
|
1952 |
{
|
1913 |
LOGDEB("Db::deleteStemDb(" << (lang) << ")\n" );
|
1953 |
LOGDEB("Db::deleteStemDb(" << lang << ")\n");
|
1914 |
if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable)
|
1954 |
if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable)
|
1915 |
return false;
|
1955 |
return false;
|
1916 |
XapWritableSynFamily db(m_ndb->xwdb, synFamStem);
|
1956 |
XapWritableSynFamily db(m_ndb->xwdb, synFamStem);
|
1917 |
return db.deleteMember(lang);
|
1957 |
return db.deleteMember(lang);
|
1918 |
}
|
1958 |
}
|
|
... |
|
... |
1923 |
* with documents indexed by a single term (the stem), and with the list of
|
1963 |
* with documents indexed by a single term (the stem), and with the list of
|
1924 |
* parent terms in the document data.
|
1964 |
* parent terms in the document data.
|
1925 |
*/
|
1965 |
*/
|
1926 |
bool Db::createStemDbs(const vector<string>& langs)
|
1966 |
bool Db::createStemDbs(const vector<string>& langs)
|
1927 |
{
|
1967 |
{
|
1928 |
LOGDEB("Db::createStemDbs\n" );
|
1968 |
LOGDEB("Db::createStemDbs\n");
|
1929 |
if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable) {
|
1969 |
if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable) {
|
1930 |
LOGERR("createStemDb: db not open or not writable\n" );
|
1970 |
LOGERR("createStemDb: db not open or not writable\n");
|
1931 |
return false;
|
1971 |
return false;
|
1932 |
}
|
1972 |
}
|
1933 |
|
1973 |
|
1934 |
return createExpansionDbs(m_ndb->xwdb, langs);
|
1974 |
return createExpansionDbs(m_ndb->xwdb, langs);
|
1935 |
}
|
1975 |
}
|
|
... |
|
... |
1940 |
* after a full file-system tree walk, else the file existence flags will
|
1980 |
* after a full file-system tree walk, else the file existence flags will
|
1941 |
* be wrong.
|
1981 |
* be wrong.
|
1942 |
*/
|
1982 |
*/
|
1943 |
bool Db::purge()
|
1983 |
bool Db::purge()
|
1944 |
{
|
1984 |
{
|
1945 |
LOGDEB("Db::purge\n" );
|
1985 |
LOGDEB("Db::purge\n");
|
1946 |
if (m_ndb == 0)
|
1986 |
if (m_ndb == 0)
|
1947 |
return false;
|
1987 |
return false;
|
1948 |
LOGDEB("Db::purge: m_isopen " << (m_ndb->m_isopen) << " m_iswritable " << (m_ndb->m_iswritable) << "\n" );
|
1988 |
LOGDEB("Db::purge: m_isopen " << m_ndb->m_isopen << " m_iswritable " <<
|
|
|
1989 |
m_ndb->m_iswritable << "\n");
|
1949 |
if (m_ndb->m_isopen == false || m_ndb->m_iswritable == false)
|
1990 |
if (m_ndb->m_isopen == false || m_ndb->m_iswritable == false)
|
1950 |
return false;
|
1991 |
return false;
|
1951 |
|
1992 |
|
1952 |
#ifdef IDX_THREADS
|
1993 |
#ifdef IDX_THREADS
|
1953 |
// If we manage our own write queue, make sure it's drained and closed
|
1994 |
// If we manage our own write queue, make sure it's drained and closed
|
|
... |
|
... |
1966 |
// that any added document would go to the index. Kept here
|
2007 |
// that any added document would go to the index. Kept here
|
1967 |
// because it doesn't really hurt.
|
2008 |
// because it doesn't really hurt.
|
1968 |
try {
|
2009 |
try {
|
1969 |
m_ndb->xwdb.commit();
|
2010 |
m_ndb->xwdb.commit();
|
1970 |
} catch (...) {
|
2011 |
} catch (...) {
|
1971 |
LOGERR("Db::purge: 1st flush failed\n" );
|
2012 |
LOGERR("Db::purge: 1st flush failed\n");
|
1972 |
|
2013 |
|
1973 |
}
|
2014 |
}
|
1974 |
|
2015 |
|
1975 |
// Walk the document array and delete any xapian document whose
|
2016 |
// Walk the document array and delete any xapian document whose
|
1976 |
// flag is not set (we did not see its source during indexing).
|
2017 |
// flag is not set (we did not see its source during indexing).
|
|
... |
|
... |
1979 |
if (!updated[docid]) {
|
2020 |
if (!updated[docid]) {
|
1980 |
if ((purgecount+1) % 100 == 0) {
|
2021 |
if ((purgecount+1) % 100 == 0) {
|
1981 |
try {
|
2022 |
try {
|
1982 |
CancelCheck::instance().checkCancel();
|
2023 |
CancelCheck::instance().checkCancel();
|
1983 |
} catch(CancelExcept) {
|
2024 |
} catch(CancelExcept) {
|
1984 |
LOGINFO("Db::purge: partially cancelled\n" );
|
2025 |
LOGINFO("Db::purge: partially cancelled\n");
|
1985 |
break;
|
2026 |
break;
|
1986 |
}
|
2027 |
}
|
1987 |
}
|
2028 |
}
|
1988 |
|
2029 |
|
1989 |
try {
|
2030 |
try {
|
|
... |
|
... |
1996 |
// bad for performance.
|
2037 |
// bad for performance.
|
1997 |
Xapian::termcount trms = m_ndb->xwdb.get_doclength(docid);
|
2038 |
Xapian::termcount trms = m_ndb->xwdb.get_doclength(docid);
|
1998 |
maybeflush(trms * 5);
|
2039 |
maybeflush(trms * 5);
|
1999 |
}
|
2040 |
}
|
2000 |
m_ndb->xwdb.delete_document(docid);
|
2041 |
m_ndb->xwdb.delete_document(docid);
|
2001 |
LOGDEB("Db::purge: deleted document #" << (docid) << "\n" );
|
2042 |
LOGDEB("Db::purge: deleted document #" << docid << "\n");
|
2002 |
} catch (const Xapian::DocNotFoundError &) {
|
2043 |
} catch (const Xapian::DocNotFoundError &) {
|
2003 |
LOGDEB0("Db::purge: document #" << (docid) << " not found\n" );
|
2044 |
LOGDEB0("Db::purge: document #" << docid << " not found\n");
|
2004 |
} catch (const Xapian::Error &e) {
|
2045 |
} catch (const Xapian::Error &e) {
|
2005 |
LOGERR("Db::purge: document #" << (docid) << ": " << (e.get_msg()) << "\n" );
|
2046 |
LOGERR("Db::purge: document #" << docid << ": " <<
|
|
|
2047 |
e.get_msg() << "\n");
|
2006 |
} catch (...) {
|
2048 |
} catch (...) {
|
2007 |
LOGERR("Db::purge: document #" << (docid) << ": unknown error\n" );
|
2049 |
LOGERR("Db::purge: document #" << docid << ": unknown error\n");
|
2008 |
}
|
2050 |
}
|
2009 |
purgecount++;
|
2051 |
purgecount++;
|
2010 |
}
|
2052 |
}
|
2011 |
}
|
2053 |
}
|
2012 |
|
2054 |
|
2013 |
try {
|
2055 |
try {
|
2014 |
m_ndb->xwdb.commit();
|
2056 |
m_ndb->xwdb.commit();
|
2015 |
} catch (...) {
|
2057 |
} catch (...) {
|
2016 |
LOGERR("Db::purge: 2nd flush failed\n" );
|
2058 |
LOGERR("Db::purge: 2nd flush failed\n");
|
2017 |
}
|
2059 |
}
|
2018 |
return true;
|
2060 |
return true;
|
2019 |
}
|
2061 |
}
|
2020 |
|
2062 |
|
2021 |
// Test for doc existence.
|
2063 |
// Test for doc existence.
|
|
... |
|
... |
2034 |
} else {
|
2076 |
} else {
|
2035 |
return true;
|
2077 |
return true;
|
2036 |
}
|
2078 |
}
|
2037 |
} XCATCHERROR(ermsg);
|
2079 |
} XCATCHERROR(ermsg);
|
2038 |
if (!ermsg.empty()) {
|
2080 |
if (!ermsg.empty()) {
|
2039 |
LOGERR("Db::docExists(" << (uniterm) << ") " << (ermsg) << "\n" );
|
2081 |
LOGERR("Db::docExists(" << uniterm << ") " << ermsg << "\n");
|
2040 |
}
|
2082 |
}
|
2041 |
return false;
|
2083 |
return false;
|
2042 |
}
|
2084 |
}
|
2043 |
|
2085 |
|
2044 |
/* Delete document(s) for given unique identifier (doc and descendents) */
|
2086 |
/* Delete document(s) for given unique identifier (doc and descendents) */
|
2045 |
bool Db::purgeFile(const string &udi, bool *existed)
|
2087 |
bool Db::purgeFile(const string &udi, bool *existed)
|
2046 |
{
|
2088 |
{
|
2047 |
LOGDEB("Db:purgeFile: [" << (udi) << "]\n" );
|
2089 |
LOGDEB("Db:purgeFile: [" << udi << "]\n");
|
2048 |
if (m_ndb == 0 || !m_ndb->m_iswritable)
|
2090 |
if (m_ndb == 0 || !m_ndb->m_iswritable)
|
2049 |
return false;
|
2091 |
return false;
|
2050 |
|
2092 |
|
2051 |
string uniterm = make_uniterm(udi);
|
2093 |
string uniterm = make_uniterm(udi);
|
2052 |
bool exists = docExists(uniterm);
|
2094 |
bool exists = docExists(uniterm);
|
|
... |
|
... |
2058 |
#ifdef IDX_THREADS
|
2100 |
#ifdef IDX_THREADS
|
2059 |
if (m_ndb->m_havewriteq) {
|
2101 |
if (m_ndb->m_havewriteq) {
|
2060 |
DbUpdTask *tp = new DbUpdTask(DbUpdTask::Delete, udi, uniterm,
|
2102 |
DbUpdTask *tp = new DbUpdTask(DbUpdTask::Delete, udi, uniterm,
|
2061 |
0, (size_t)-1);
|
2103 |
0, (size_t)-1);
|
2062 |
if (!m_ndb->m_wqueue.put(tp)) {
|
2104 |
if (!m_ndb->m_wqueue.put(tp)) {
|
2063 |
LOGERR("Db::purgeFile:Cant queue task\n" );
|
2105 |
LOGERR("Db::purgeFile:Cant queue task\n");
|
2064 |
return false;
|
2106 |
return false;
|
2065 |
} else {
|
2107 |
} else {
|
2066 |
return true;
|
2108 |
return true;
|
2067 |
}
|
2109 |
}
|
2068 |
}
|
2110 |
}
|
|
... |
|
... |
2074 |
/* Delete subdocs with an out of date sig. We do this to purge
|
2116 |
/* Delete subdocs with an out of date sig. We do this to purge
|
2075 |
obsolete subdocs during a partial update where no general purge
|
2117 |
obsolete subdocs during a partial update where no general purge
|
2076 |
will be done */
|
2118 |
will be done */
|
2077 |
bool Db::purgeOrphans(const string &udi)
|
2119 |
bool Db::purgeOrphans(const string &udi)
|
2078 |
{
|
2120 |
{
|
2079 |
LOGDEB("Db:purgeOrphans: [" << (udi) << "]\n" );
|
2121 |
LOGDEB("Db:purgeOrphans: [" << udi << "]\n");
|
2080 |
if (m_ndb == 0 || !m_ndb->m_iswritable)
|
2122 |
if (m_ndb == 0 || !m_ndb->m_iswritable)
|
2081 |
return false;
|
2123 |
return false;
|
2082 |
|
2124 |
|
2083 |
string uniterm = make_uniterm(udi);
|
2125 |
string uniterm = make_uniterm(udi);
|
2084 |
|
2126 |
|
2085 |
#ifdef IDX_THREADS
|
2127 |
#ifdef IDX_THREADS
|
2086 |
if (m_ndb->m_havewriteq) {
|
2128 |
if (m_ndb->m_havewriteq) {
|
2087 |
DbUpdTask *tp = new DbUpdTask(DbUpdTask::PurgeOrphans, udi, uniterm,
|
2129 |
DbUpdTask *tp = new DbUpdTask(DbUpdTask::PurgeOrphans, udi, uniterm,
|
2088 |
0, (size_t)-1);
|
2130 |
0, (size_t)-1);
|
2089 |
if (!m_ndb->m_wqueue.put(tp)) {
|
2131 |
if (!m_ndb->m_wqueue.put(tp)) {
|
2090 |
LOGERR("Db::purgeFile:Cant queue task\n" );
|
2132 |
LOGERR("Db::purgeFile:Cant queue task\n");
|
2091 |
return false;
|
2133 |
return false;
|
2092 |
} else {
|
2134 |
} else {
|
2093 |
return true;
|
2135 |
return true;
|
2094 |
}
|
2136 |
}
|
2095 |
}
|
2137 |
}
|
|
... |
|
... |
2118 |
// by the GUI history feature and by open parent/getenclosing
|
2160 |
// by the GUI history feature and by open parent/getenclosing
|
2119 |
// ! The return value is always true except for fatal errors. Document
|
2161 |
// ! The return value is always true except for fatal errors. Document
|
2120 |
// existence should be tested by looking at doc.pc
|
2162 |
// existence should be tested by looking at doc.pc
|
2121 |
bool Db::getDoc(const string &udi, const Doc& idxdoc, Doc &doc)
|
2163 |
bool Db::getDoc(const string &udi, const Doc& idxdoc, Doc &doc)
|
2122 |
{
|
2164 |
{
|
2123 |
LOGDEB("Db:getDoc: [" << (udi) << "]\n" );
|
2165 |
LOGDEB("Db:getDoc: [" << udi << "]\n");
|
2124 |
if (m_ndb == 0)
|
2166 |
if (m_ndb == 0)
|
2125 |
return false;
|
2167 |
return false;
|
2126 |
|
2168 |
|
2127 |
// Initialize what we can in any case. If this is history, caller
|
2169 |
// Initialize what we can in any case. If this is history, caller
|
2128 |
// will make partial display in case of error
|
2170 |
// will make partial display in case of error
|
|
... |
|
... |
2139 |
// Document found in history no longer in the
|
2181 |
// Document found in history no longer in the
|
2140 |
// database. We return true (because their might be
|
2182 |
// database. We return true (because their might be
|
2141 |
// other ok docs further) but indicate the error with
|
2183 |
// other ok docs further) but indicate the error with
|
2142 |
// pc = -1
|
2184 |
// pc = -1
|
2143 |
doc.pc = -1;
|
2185 |
doc.pc = -1;
|
2144 |
LOGINFO("Db:getDoc: no such doc in index: [" << (udi) << "]\n" );
|
2186 |
LOGINFO("Db:getDoc: no such doc in index: [" << udi << "]\n");
|
2145 |
return true;
|
2187 |
return true;
|
2146 |
}
|
2188 |
}
|
2147 |
}
|
2189 |
}
|
2148 |
|
2190 |
|
2149 |
bool Db::hasSubDocs(const Doc &idoc)
|
2191 |
bool Db::hasSubDocs(const Doc &idoc)
|
2150 |
{
|
2192 |
{
|
2151 |
if (m_ndb == 0)
|
2193 |
if (m_ndb == 0)
|
2152 |
return false;
|
2194 |
return false;
|
2153 |
string inudi;
|
2195 |
string inudi;
|
2154 |
if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) {
|
2196 |
if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) {
|
2155 |
LOGERR("Db::hasSubDocs: no input udi or empty\n" );
|
2197 |
LOGERR("Db::hasSubDocs: no input udi or empty\n");
|
2156 |
return false;
|
2198 |
return false;
|
2157 |
}
|
2199 |
}
|
2158 |
LOGDEB1("Db::hasSubDocs: idxi " << (idoc.idxi) << " inudi [" << (inudi) << "]\n" );
|
2200 |
LOGDEB1("Db::hasSubDocs: idxi " << idoc.idxi << " inudi [" <<inudi << "]\n");
|
2159 |
|
2201 |
|
2160 |
// Not sure why we perform both the subDocs() call and the test on
|
2202 |
// Not sure why we perform both the subDocs() call and the test on
|
2161 |
// has_children. The former will return docs if the input is a
|
2203 |
// has_children. The former will return docs if the input is a
|
2162 |
// file-level document, but the latter should be true both in this
|
2204 |
// file-level document, but the latter should be true both in this
|
2163 |
// case and if the input is already a subdoc, so the first test
|
2205 |
// case and if the input is already a subdoc, so the first test
|
2164 |
// should be redundant. Does not hurt much in any case, to be
|
2206 |
// should be redundant. Does not hurt much in any case, to be
|
2165 |
// checked one day.
|
2207 |
// checked one day.
|
2166 |
vector<Xapian::docid> docids;
|
2208 |
vector<Xapian::docid> docids;
|
2167 |
if (!m_ndb->subDocs(inudi, idoc.idxi, docids)) {
|
2209 |
if (!m_ndb->subDocs(inudi, idoc.idxi, docids)) {
|
2168 |
LOGDEB("Db::hasSubDocs: lower level subdocs failed\n" );
|
2210 |
LOGDEB("Db::hasSubDocs: lower level subdocs failed\n");
|
2169 |
return false;
|
2211 |
return false;
|
2170 |
}
|
2212 |
}
|
2171 |
if (!docids.empty())
|
2213 |
if (!docids.empty())
|
2172 |
return true;
|
2214 |
return true;
|
2173 |
|
2215 |
|
|
... |
|
... |
2184 |
if (m_ndb == 0)
|
2226 |
if (m_ndb == 0)
|
2185 |
return false;
|
2227 |
return false;
|
2186 |
|
2228 |
|
2187 |
string inudi;
|
2229 |
string inudi;
|
2188 |
if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) {
|
2230 |
if (!idoc.getmeta(Doc::keyudi, &inudi) || inudi.empty()) {
|
2189 |
LOGERR("Db::getSubDocs: no input udi or empty\n" );
|
2231 |
LOGERR("Db::getSubDocs: no input udi or empty\n");
|
2190 |
return false;
|
2232 |
return false;
|
2191 |
}
|
2233 |
}
|
2192 |
|
2234 |
|
2193 |
string rootudi;
|
2235 |
string rootudi;
|
2194 |
string ipath = idoc.ipath;
|
2236 |
string ipath = idoc.ipath;
|
2195 |
LOGDEB0("Db::getSubDocs: idxi " << (idoc.idxi) << " inudi [" << (inudi) << "] ipath [" << (ipath) << "]\n" );
|
2237 |
LOGDEB0("Db::getSubDocs: idxi " << idoc.idxi << " inudi [" << inudi <<
|
|
|
2238 |
"] ipath [" << ipath << "]\n");
|
2196 |
if (ipath.empty()) {
|
2239 |
if (ipath.empty()) {
|
2197 |
// File-level doc. Use it as root
|
2240 |
// File-level doc. Use it as root
|
2198 |
rootudi = inudi;
|
2241 |
rootudi = inudi;
|
2199 |
} else {
|
2242 |
} else {
|
2200 |
// See if we have a parent term
|
2243 |
// See if we have a parent term
|
2201 |
Xapian::Document xdoc;
|
2244 |
Xapian::Document xdoc;
|
2202 |
if (!m_ndb->getDoc(inudi, idoc.idxi, xdoc)) {
|
2245 |
if (!m_ndb->getDoc(inudi, idoc.idxi, xdoc)) {
|
2203 |
LOGERR("Db::getSubDocs: can't get Xapian document\n" );
|
2246 |
LOGERR("Db::getSubDocs: can't get Xapian document\n");
|
2204 |
return false;
|
2247 |
return false;
|
2205 |
}
|
2248 |
}
|
2206 |
Xapian::TermIterator xit;
|
2249 |
Xapian::TermIterator xit;
|
2207 |
XAPTRY(xit = xdoc.termlist_begin();
|
2250 |
XAPTRY(xit = xdoc.termlist_begin();
|
2208 |
xit.skip_to(wrap_prefix(parent_prefix)),
|
2251 |
xit.skip_to(wrap_prefix(parent_prefix)),
|
2209 |
m_ndb->xrdb, m_reason);
|
2252 |
m_ndb->xrdb, m_reason);
|
2210 |
if (!m_reason.empty()) {
|
2253 |
if (!m_reason.empty()) {
|
2211 |
LOGERR("Db::getSubDocs: xapian error: " << (m_reason) << "\n" );
|
2254 |
LOGERR("Db::getSubDocs: xapian error: " << m_reason << "\n");
|
2212 |
return false;
|
2255 |
return false;
|
2213 |
}
|
2256 |
}
|
2214 |
if (xit == xdoc.termlist_end()) {
|
2257 |
if (xit == xdoc.termlist_end()) {
|
2215 |
LOGERR("Db::getSubDocs: parent term not found\n" );
|
2258 |
LOGERR("Db::getSubDocs: parent term not found\n");
|
2216 |
return false;
|
2259 |
return false;
|
2217 |
}
|
2260 |
}
|
2218 |
rootudi = strip_prefix(*xit);
|
2261 |
rootudi = strip_prefix(*xit);
|
2219 |
}
|
2262 |
}
|
2220 |
|
2263 |
|
2221 |
LOGDEB("Db::getSubDocs: root: [" << (rootudi) << "]\n" );
|
2264 |
LOGDEB("Db::getSubDocs: root: [" << rootudi << "]\n");
|
2222 |
|
2265 |
|
2223 |
// Retrieve all subdoc xapian ids for the root
|
2266 |
// Retrieve all subdoc xapian ids for the root
|
2224 |
vector<Xapian::docid> docids;
|
2267 |
vector<Xapian::docid> docids;
|
2225 |
if (!m_ndb->subDocs(rootudi, idoc.idxi, docids)) {
|
2268 |
if (!m_ndb->subDocs(rootudi, idoc.idxi, docids)) {
|
2226 |
LOGDEB("Db::getSubDocs: lower level subdocs failed\n" );
|
2269 |
LOGDEB("Db::getSubDocs: lower level subdocs failed\n");
|
2227 |
return false;
|
2270 |
return false;
|
2228 |
}
|
2271 |
}
|
2229 |
|
2272 |
|
2230 |
// Retrieve doc, filter, and build output list
|
2273 |
// Retrieve doc, filter, and build output list
|
2231 |
for (int tries = 0; tries < 2; tries++) {
|
2274 |
for (int tries = 0; tries < 2; tries++) {
|
|
... |
|
... |
2239 |
Doc doc;
|
2282 |
Doc doc;
|
2240 |
doc.meta[Doc::keyudi] = docudi;
|
2283 |
doc.meta[Doc::keyudi] = docudi;
|
2241 |
doc.meta[Doc::keyrr] = "100%";
|
2284 |
doc.meta[Doc::keyrr] = "100%";
|
2242 |
doc.pc = 100;
|
2285 |
doc.pc = 100;
|
2243 |
if (!m_ndb->dbDataToRclDoc(*it, data, doc)) {
|
2286 |
if (!m_ndb->dbDataToRclDoc(*it, data, doc)) {
|
2244 |
LOGERR("Db::getSubDocs: doc conversion error\n" );
|
2287 |
LOGERR("Db::getSubDocs: doc conversion error\n");
|
2245 |
return false;
|
2288 |
return false;
|
2246 |
}
|
2289 |
}
|
2247 |
if (ipath.empty() ||
|
2290 |
if (ipath.empty() ||
|
2248 |
FileInterner::ipathContains(ipath, doc.ipath)) {
|
2291 |
FileInterner::ipathContains(ipath, doc.ipath)) {
|
2249 |
subdocs.push_back(doc);
|
2292 |
subdocs.push_back(doc);
|
|
... |
|
... |
2256 |
continue;
|
2299 |
continue;
|
2257 |
} XCATCHERROR(m_reason);
|
2300 |
} XCATCHERROR(m_reason);
|
2258 |
break;
|
2301 |
break;
|
2259 |
}
|
2302 |
}
|
2260 |
|
2303 |
|
2261 |
LOGERR("Db::getSubDocs: Xapian error: " << (m_reason) << "\n" );
|
2304 |
LOGERR("Db::getSubDocs: Xapian error: " << m_reason << "\n");
|
2262 |
return false;
|
2305 |
return false;
|
2263 |
}
|
2306 |
}
|
2264 |
|
2307 |
|
2265 |
} // End namespace Rcl
|
2308 |
} // End namespace Rcl
|
2266 |
|
2309 |
|