|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.120 2007-07-10 09:23:28 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.121 2007-07-12 08:34:51 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
523 |
list<string> res;
|
523 |
list<string> res;
|
524 |
stringToStrings(Xapian::Stem::get_available_languages(), res);
|
524 |
stringToStrings(Xapian::Stem::get_available_languages(), res);
|
525 |
return res;
|
525 |
return res;
|
526 |
}
|
526 |
}
|
527 |
|
527 |
|
|
|
528 |
// Generic Xapian exception catching code. We do this quite often,
|
|
|
529 |
// and I have no idea how to do this except for a macro
|
|
|
530 |
#define XCATCHERROR(MSG) \
|
|
|
531 |
catch (const Xapian::Error &e) { \
|
|
|
532 |
MSG = e.get_msg(); \
|
|
|
533 |
if (MSG.empty()) MSG = "Empty error message"; \
|
|
|
534 |
} catch (const string &s) { \
|
|
|
535 |
MSG = s; \
|
|
|
536 |
if (MSG.empty()) MSG = "Empty error message"; \
|
|
|
537 |
} catch (const char *s) { \
|
|
|
538 |
MSG = s; \
|
|
|
539 |
if (MSG.empty()) MSG = "Empty error message"; \
|
|
|
540 |
} catch (...) { \
|
|
|
541 |
MSG = "Caught unknown xapian exception"; \
|
|
|
542 |
}
|
|
|
543 |
|
528 |
|
544 |
|
529 |
bool Db::open(const string& dir, const string &stops, OpenMode mode, int qops)
|
545 |
bool Db::open(const string& dir, const string &stops, OpenMode mode, int qops)
|
530 |
{
|
546 |
{
|
531 |
bool keep_updated = (qops & QO_KEEP_UPDATED) != 0;
|
547 |
bool keep_updated = (qops & QO_KEEP_UPDATED) != 0;
|
532 |
qops &= ~QO_KEEP_UPDATED;
|
548 |
qops &= ~QO_KEEP_UPDATED;
|
|
... |
|
... |
542 |
return false;
|
558 |
return false;
|
543 |
}
|
559 |
}
|
544 |
if (!stops.empty())
|
560 |
if (!stops.empty())
|
545 |
m_stops.setFile(stops);
|
561 |
m_stops.setFile(stops);
|
546 |
|
562 |
|
547 |
const char *ermsg = "Unknown";
|
563 |
string ermsg;
|
548 |
try {
|
564 |
try {
|
549 |
switch (mode) {
|
565 |
switch (mode) {
|
550 |
case DbUpd:
|
566 |
case DbUpd:
|
551 |
case DbTrunc:
|
567 |
case DbTrunc:
|
552 |
{
|
568 |
{
|
|
... |
|
... |
579 |
LOGDEB(("Db::Open: adding query db [%s]\n", it->c_str()));
|
595 |
LOGDEB(("Db::Open: adding query db [%s]\n", it->c_str()));
|
580 |
aerr.erase();
|
596 |
aerr.erase();
|
581 |
try {
|
597 |
try {
|
582 |
// Make this non-fatal
|
598 |
// Make this non-fatal
|
583 |
m_ndb->db.add_database(Xapian::Database(*it));
|
599 |
m_ndb->db.add_database(Xapian::Database(*it));
|
584 |
} catch (const Xapian::Error &e) {
|
600 |
} XCATCHERROR(aerr);
|
585 |
aerr = e.get_msg().c_str();
|
|
|
586 |
} catch (const string &s) {
|
|
|
587 |
aerr = s.c_str();
|
|
|
588 |
} catch (const char *s) {
|
|
|
589 |
aerr = s;
|
|
|
590 |
} catch (...) {
|
|
|
591 |
aerr = "Caught unknown exception";
|
|
|
592 |
}
|
|
|
593 |
if (!aerr.empty())
|
601 |
if (!aerr.empty())
|
594 |
LOGERR(("Db::Open: error while trying to add database "
|
602 |
LOGERR(("Db::Open: error while trying to add database "
|
595 |
"from [%s]: %s\n", it->c_str(), aerr.c_str()));
|
603 |
"from [%s]: %s\n", it->c_str(), aerr.c_str()));
|
596 |
}
|
604 |
}
|
597 |
break;
|
605 |
break;
|
598 |
}
|
606 |
}
|
599 |
m_mode = mode;
|
607 |
m_mode = mode;
|
600 |
m_ndb->m_isopen = true;
|
608 |
m_ndb->m_isopen = true;
|
601 |
m_basedir = dir;
|
609 |
m_basedir = dir;
|
602 |
return true;
|
610 |
return true;
|
603 |
} catch (const Xapian::Error &e) {
|
611 |
} XCATCHERROR(ermsg);
|
604 |
ermsg = e.get_msg().c_str();
|
|
|
605 |
} catch (const string &s) {
|
|
|
606 |
ermsg = s.c_str();
|
|
|
607 |
} catch (const char *s) {
|
|
|
608 |
ermsg = s;
|
|
|
609 |
} catch (...) {
|
|
|
610 |
ermsg = "Caught unknown exception";
|
|
|
611 |
}
|
|
|
612 |
LOGERR(("Db::open: exception while opening [%s]: %s\n",
|
612 |
LOGERR(("Db::open: exception while opening [%s]: %s\n",
|
613 |
dir.c_str(), ermsg));
|
613 |
dir.c_str(), ermsg.c_str()));
|
614 |
return false;
|
614 |
return false;
|
615 |
}
|
615 |
}
|
616 |
|
616 |
|
617 |
string Db::getDbDir()
|
617 |
string Db::getDbDir()
|
618 |
{
|
618 |
{
|
|
... |
|
... |
632 |
LOGDEB(("Db::i_close(%d): m_isopen %d m_iswritable %d\n", final,
|
632 |
LOGDEB(("Db::i_close(%d): m_isopen %d m_iswritable %d\n", final,
|
633 |
m_ndb->m_isopen, m_ndb->m_iswritable));
|
633 |
m_ndb->m_isopen, m_ndb->m_iswritable));
|
634 |
if (m_ndb->m_isopen == false && !final)
|
634 |
if (m_ndb->m_isopen == false && !final)
|
635 |
return true;
|
635 |
return true;
|
636 |
|
636 |
|
637 |
const char *ermsg = "Unknown";
|
637 |
string ermsg;
|
638 |
try {
|
638 |
try {
|
639 |
bool w = m_ndb->m_iswritable;
|
639 |
bool w = m_ndb->m_iswritable;
|
640 |
if (w)
|
640 |
if (w)
|
641 |
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
|
641 |
LOGDEB(("Rcl::Db:close: xapian will close. May take some time\n"));
|
642 |
// Used to do a flush here. Cant see why it should be necessary.
|
642 |
// Used to do a flush here. Cant see why it should be necessary.
|
|
... |
|
... |
651 |
if (m_ndb) {
|
651 |
if (m_ndb) {
|
652 |
return true;
|
652 |
return true;
|
653 |
}
|
653 |
}
|
654 |
LOGERR(("Rcl::Db::close(): cant recreate db object\n"));
|
654 |
LOGERR(("Rcl::Db::close(): cant recreate db object\n"));
|
655 |
return false;
|
655 |
return false;
|
656 |
} catch (const Xapian::Error &e) {
|
656 |
} XCATCHERROR(ermsg);
|
657 |
ermsg = e.get_msg().c_str();
|
|
|
658 |
} catch (const string &s) {
|
|
|
659 |
ermsg = s.c_str();
|
|
|
660 |
} catch (const char *s) {
|
|
|
661 |
ermsg = s;
|
|
|
662 |
} catch (...) {
|
|
|
663 |
ermsg = "Caught unknown exception";
|
|
|
664 |
}
|
|
|
665 |
LOGERR(("Db:close: exception while deleting db: %s\n", ermsg));
|
657 |
LOGERR(("Db:close: exception while deleting db: %s\n", ermsg.c_str()));
|
666 |
return false;
|
658 |
return false;
|
667 |
}
|
659 |
}
|
668 |
|
660 |
|
669 |
bool Db::reOpen()
|
661 |
bool Db::reOpen()
|
670 |
{
|
662 |
{
|
|
... |
|
... |
678 |
return true;
|
670 |
return true;
|
679 |
}
|
671 |
}
|
680 |
|
672 |
|
681 |
int Db::docCnt()
|
673 |
int Db::docCnt()
|
682 |
{
|
674 |
{
|
|
|
675 |
int res = -1;
|
|
|
676 |
string ermsg;
|
683 |
if (m_ndb && m_ndb->m_isopen) {
|
677 |
if (m_ndb && m_ndb->m_isopen) {
|
|
|
678 |
try {
|
684 |
return m_ndb->m_iswritable ? m_ndb->wdb.get_doccount() :
|
679 |
res = m_ndb->m_iswritable ? m_ndb->wdb.get_doccount() :
|
685 |
m_ndb->db.get_doccount();
|
680 |
m_ndb->db.get_doccount();
|
|
|
681 |
} catch (const Xapian::DatabaseModifiedError &e) {
|
|
|
682 |
LOGDEB(("Db::docCnt: got modified error. reopen/retry\n"));
|
|
|
683 |
reOpen();
|
|
|
684 |
res = m_ndb->m_iswritable ? m_ndb->wdb.get_doccount() :
|
|
|
685 |
m_ndb->db.get_doccount();
|
|
|
686 |
} XCATCHERROR(ermsg);
|
|
|
687 |
if (!ermsg.empty())
|
|
|
688 |
LOGERR(("Db::docCnt: got error: %s\n", ermsg.c_str()));
|
686 |
}
|
689 |
}
|
687 |
return -1;
|
690 |
return res;
|
688 |
}
|
691 |
}
|
689 |
|
692 |
|
690 |
bool Db::addQueryDb(const string &dir)
|
693 |
bool Db::addQueryDb(const string &dir)
|
691 |
{
|
694 |
{
|
692 |
LOGDEB(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb,
|
695 |
LOGDEB(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb,
|
|
... |
|
... |
716 |
m_extraDbs.erase(it);
|
719 |
m_extraDbs.erase(it);
|
717 |
}
|
720 |
}
|
718 |
}
|
721 |
}
|
719 |
return reOpen();
|
722 |
return reOpen();
|
720 |
}
|
723 |
}
|
|
|
724 |
|
721 |
bool Db::testDbDir(const string &dir)
|
725 |
bool Db::testDbDir(const string &dir)
|
722 |
{
|
726 |
{
|
723 |
string aerr;
|
727 |
string aerr;
|
724 |
LOGDEB(("Db::testDbDir: [%s]\n", dir.c_str()));
|
728 |
LOGDEB(("Db::testDbDir: [%s]\n", dir.c_str()));
|
725 |
try {
|
729 |
try {
|
726 |
Xapian::Database db(dir);
|
730 |
Xapian::Database db(dir);
|
727 |
} catch (const Xapian::Error &e) {
|
731 |
} XCATCHERROR(aerr);
|
728 |
aerr = e.get_msg().c_str();
|
|
|
729 |
} catch (const string &s) {
|
|
|
730 |
aerr = s.c_str();
|
|
|
731 |
} catch (const char *s) {
|
|
|
732 |
aerr = s;
|
|
|
733 |
} catch (...) {
|
|
|
734 |
aerr = "Caught unknown exception";
|
|
|
735 |
}
|
|
|
736 |
if (!aerr.empty()) {
|
732 |
if (!aerr.empty()) {
|
737 |
LOGERR(("Db::Open: error while trying to open database "
|
733 |
LOGERR(("Db::Open: error while trying to open database "
|
738 |
"from [%s]: %s\n", dir.c_str(), aerr.c_str()));
|
734 |
"from [%s]: %s\n", dir.c_str(), aerr.c_str()));
|
739 |
return false;
|
735 |
return false;
|
740 |
}
|
736 |
}
|
|
... |
|
... |
821 |
if (transcode(term, printable, "UTF-8", "ISO-8859-1")) {
|
817 |
if (transcode(term, printable, "UTF-8", "ISO-8859-1")) {
|
822 |
LOGDEB((" [%s]\n", printable.c_str()));
|
818 |
LOGDEB((" [%s]\n", printable.c_str()));
|
823 |
}
|
819 |
}
|
824 |
#endif
|
820 |
#endif
|
825 |
|
821 |
|
826 |
const char *ermsg;
|
822 |
string ermsg;
|
827 |
try {
|
823 |
try {
|
828 |
if (stops.hasStops() && stops.isStop(term)) {
|
824 |
if (stops.hasStops() && stops.isStop(term)) {
|
829 |
LOGDEB1(("Db: takeword [%s] in stop list\n", term.c_str()));
|
825 |
LOGDEB1(("Db: takeword [%s] in stop list\n", term.c_str()));
|
830 |
return true;
|
826 |
return true;
|
831 |
}
|
827 |
}
|
|
... |
|
... |
837 |
doc.add_posting(term, pos, 1);
|
833 |
doc.add_posting(term, pos, 1);
|
838 |
if (!prefix.empty()) {
|
834 |
if (!prefix.empty()) {
|
839 |
doc.add_posting(prefix + term, pos, 1);
|
835 |
doc.add_posting(prefix + term, pos, 1);
|
840 |
}
|
836 |
}
|
841 |
return true;
|
837 |
return true;
|
842 |
} catch (const Xapian::Error &e) {
|
838 |
} XCATCHERROR(ermsg);
|
843 |
ermsg = e.get_msg().c_str();
|
|
|
844 |
} catch (...) {
|
|
|
845 |
ermsg= "Unknown error";
|
|
|
846 |
}
|
|
|
847 |
LOGERR(("Db: xapian add_posting error %s\n", ermsg));
|
839 |
LOGERR(("Db: xapian add_posting error %s\n", ermsg.c_str()));
|
848 |
return false;
|
840 |
return false;
|
849 |
}
|
841 |
}
|
850 |
|
842 |
|
851 |
// Unaccent and lowercase data, replace \n\r with spaces
|
843 |
// Unaccent and lowercase data, replace \n\r with spaces
|
852 |
// Removing crlfs is so that we can use the text in the document data fields.
|
844 |
// Removing crlfs is so that we can use the text in the document data fields.
|
|
... |
|
... |
1100 |
doc.ipath.c_str()));
|
1092 |
doc.ipath.c_str()));
|
1101 |
} else {
|
1093 |
} else {
|
1102 |
LOGDEB(("Db::add: docid %d added [%s , %s]\n", did, fnc,
|
1094 |
LOGDEB(("Db::add: docid %d added [%s , %s]\n", did, fnc,
|
1103 |
doc.ipath.c_str()));
|
1095 |
doc.ipath.c_str()));
|
1104 |
}
|
1096 |
}
|
1105 |
} catch (const Xapian::Error &e) {
|
1097 |
} XCATCHERROR(ermsg);
|
1106 |
ermsg = e.get_msg();
|
|
|
1107 |
if (ermsg.empty())
|
|
|
1108 |
ermsg = "Empty error message";
|
|
|
1109 |
} catch (...) {
|
|
|
1110 |
ermsg= "Unknown error";
|
|
|
1111 |
}
|
|
|
1112 |
|
1098 |
|
1113 |
if (!ermsg.empty()) {
|
1099 |
if (!ermsg.empty()) {
|
1114 |
LOGERR(("Db::add: replace_document failed: %s\n", ermsg.c_str()));
|
1100 |
LOGERR(("Db::add: replace_document failed: %s\n", ermsg.c_str()));
|
1115 |
ermsg.erase();
|
1101 |
ermsg.erase();
|
1116 |
// FIXME: is this ever actually needed?
|
1102 |
// FIXME: is this ever actually needed?
|
1117 |
try {
|
1103 |
try {
|
1118 |
m_ndb->wdb.add_document(newdocument);
|
1104 |
m_ndb->wdb.add_document(newdocument);
|
1119 |
LOGDEB(("Db::add: %s added (failed re-seek for duplicate)\n",
|
1105 |
LOGDEB(("Db::add: %s added (failed re-seek for duplicate)\n",
|
1120 |
fnc));
|
1106 |
fnc));
|
1121 |
} catch (const Xapian::Error &e) {
|
1107 |
} XCATCHERROR(ermsg);
|
1122 |
ermsg = e.get_msg();
|
|
|
1123 |
if (ermsg.empty())
|
|
|
1124 |
ermsg = "Empty error message";
|
|
|
1125 |
} catch (...) {
|
|
|
1126 |
ermsg= "Unknown error";
|
|
|
1127 |
}
|
|
|
1128 |
if (!ermsg.empty()) {
|
1108 |
if (!ermsg.empty()) {
|
1129 |
LOGERR(("Db::add: add_document failed: %s\n", ermsg.c_str()));
|
1109 |
LOGERR(("Db::add: add_document failed: %s\n", ermsg.c_str()));
|
1130 |
return false;
|
1110 |
return false;
|
1131 |
}
|
1111 |
}
|
1132 |
}
|
1112 |
}
|
|
... |
|
... |
1137 |
if ((m_curtxtsz - m_flushtxtsz) / MB >= m_flushMb) {
|
1117 |
if ((m_curtxtsz - m_flushtxtsz) / MB >= m_flushMb) {
|
1138 |
ermsg.erase();
|
1118 |
ermsg.erase();
|
1139 |
LOGDEB(("Db::add: text size >= %d Mb, flushing\n", m_flushMb));
|
1119 |
LOGDEB(("Db::add: text size >= %d Mb, flushing\n", m_flushMb));
|
1140 |
try {
|
1120 |
try {
|
1141 |
m_ndb->wdb.flush();
|
1121 |
m_ndb->wdb.flush();
|
1142 |
} catch (const Xapian::Error &e) {
|
1122 |
} XCATCHERROR(ermsg);
|
1143 |
ermsg = e.get_msg();
|
|
|
1144 |
if (ermsg.empty())
|
|
|
1145 |
ermsg = "Empty error message";
|
|
|
1146 |
} catch (...) {
|
|
|
1147 |
ermsg= "Unknown error";
|
|
|
1148 |
}
|
|
|
1149 |
if (!ermsg.empty()) {
|
1123 |
if (!ermsg.empty()) {
|
1150 |
LOGERR(("Db::add: flush() failed: %s\n", ermsg.c_str()));
|
1124 |
LOGERR(("Db::add: flush() failed: %s\n", ermsg.c_str()));
|
1151 |
return false;
|
1125 |
return false;
|
1152 |
}
|
1126 |
}
|
1153 |
m_flushtxtsz = m_curtxtsz;
|
1127 |
m_flushtxtsz = m_curtxtsz;
|
|
... |
|
... |
1243 |
// LOGDEB(("Db::needUpdate: used %d mS\n", chron.millis()));
|
1217 |
// LOGDEB(("Db::needUpdate: used %d mS\n", chron.millis()));
|
1244 |
return false;
|
1218 |
return false;
|
1245 |
} catch (const Xapian::DatabaseModifiedError &e) {
|
1219 |
} catch (const Xapian::DatabaseModifiedError &e) {
|
1246 |
LOGDEB(("Db::needUpdate: got modified error. reopen/retry\n"));
|
1220 |
LOGDEB(("Db::needUpdate: got modified error. reopen/retry\n"));
|
1247 |
reOpen();
|
1221 |
reOpen();
|
1248 |
} catch (const Xapian::Error &e) {
|
1222 |
} XCATCHERROR(ermsg);
|
1249 |
ermsg = e.get_msg();
|
|
|
1250 |
break;
|
|
|
1251 |
} catch (...) {
|
|
|
1252 |
ermsg= "Unknown error";
|
|
|
1253 |
break;
|
|
|
1254 |
}
|
|
|
1255 |
}
|
1223 |
}
|
1256 |
LOGERR(("Db::needUpdate: error while checking existence: %s\n",
|
1224 |
LOGERR(("Db::needUpdate: error while checking existence: %s\n",
|
1257 |
ermsg.c_str()));
|
1225 |
ermsg.c_str()));
|
1258 |
return true;
|
1226 |
return true;
|
1259 |
}
|
1227 |
}
|
|
... |
|
... |
1291 |
{
|
1259 |
{
|
1292 |
LOGDEB(("Db::createStemDb(%s)\n", lang.c_str()));
|
1260 |
LOGDEB(("Db::createStemDb(%s)\n", lang.c_str()));
|
1293 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1261 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1294 |
return false;
|
1262 |
return false;
|
1295 |
|
1263 |
|
1296 |
return StemDb:: createDb(m_ndb->m_iswritable ? m_ndb->wdb : m_ndb->db,
|
1264 |
return StemDb::createDb(m_ndb->m_iswritable ? m_ndb->wdb : m_ndb->db,
|
1297 |
m_basedir, lang);
|
1265 |
m_basedir, lang);
|
1298 |
}
|
1266 |
}
|
1299 |
|
1267 |
|
1300 |
/**
|
1268 |
/**
|
1301 |
* This is called at the end of an indexing session, to delete the
|
1269 |
* This is called at the end of an indexing session, to delete the
|
|
... |
|
... |
1359 |
return false;
|
1327 |
return false;
|
1360 |
Xapian::WritableDatabase db = m_ndb->wdb;
|
1328 |
Xapian::WritableDatabase db = m_ndb->wdb;
|
1361 |
string hash;
|
1329 |
string hash;
|
1362 |
pathHash(fn, hash, PATHHASHLEN);
|
1330 |
pathHash(fn, hash, PATHHASHLEN);
|
1363 |
string pterm = "P" + hash;
|
1331 |
string pterm = "P" + hash;
|
1364 |
const char *ermsg = "";
|
1332 |
string ermsg;
|
1365 |
try {
|
1333 |
try {
|
1366 |
Xapian::PostingIterator docid = db.postlist_begin(pterm);
|
1334 |
Xapian::PostingIterator docid = db.postlist_begin(pterm);
|
1367 |
if (docid == db.postlist_end(pterm))
|
1335 |
if (docid == db.postlist_end(pterm))
|
1368 |
return true;
|
1336 |
return true;
|
1369 |
LOGDEB(("purgeFile: delete docid %d\n", *docid));
|
1337 |
LOGDEB(("purgeFile: delete docid %d\n", *docid));
|
|
... |
|
... |
1375 |
it != docids.end(); it++) {
|
1343 |
it != docids.end(); it++) {
|
1376 |
LOGDEB2(("Db::purgeFile: delete subdoc %d\n", *it));
|
1344 |
LOGDEB2(("Db::purgeFile: delete subdoc %d\n", *it));
|
1377 |
db.delete_document(*it);
|
1345 |
db.delete_document(*it);
|
1378 |
}
|
1346 |
}
|
1379 |
return true;
|
1347 |
return true;
|
1380 |
} catch (const Xapian::Error &e) {
|
1348 |
} XCATCHERROR(ermsg);
|
1381 |
ermsg = e.get_msg().c_str();
|
|
|
1382 |
} catch (const string &s) {
|
|
|
1383 |
ermsg = s.c_str();
|
|
|
1384 |
} catch (const char *s) {
|
|
|
1385 |
ermsg = s;
|
|
|
1386 |
} catch (...) {
|
|
|
1387 |
ermsg = "Caught unknown exception";
|
|
|
1388 |
}
|
|
|
1389 |
if (*ermsg) {
|
1349 |
if (!ermsg.empty()) {
|
1390 |
LOGERR(("Db::purgeFile: %s\n", ermsg));
|
1350 |
LOGERR(("Db::purgeFile: %s\n", ermsg.c_str()));
|
1391 |
}
|
1351 |
}
|
1392 |
return false;
|
1352 |
return false;
|
1393 |
}
|
1353 |
}
|
1394 |
|
1354 |
|
1395 |
bool Db::filenameWildExp(const string& fnexp, list<string>& names)
|
1355 |
bool Db::filenameWildExp(const string& fnexp, list<string>& names)
|
|
... |
|
... |
1410 |
} // else let it be
|
1370 |
} // else let it be
|
1411 |
|
1371 |
|
1412 |
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
|
1372 |
LOGDEB((" pattern: [%s]\n", pattern.c_str()));
|
1413 |
|
1373 |
|
1414 |
// Match pattern against all file names in the db
|
1374 |
// Match pattern against all file names in the db
|
|
|
1375 |
string ermsg;
|
|
|
1376 |
try {
|
1415 |
Xapian::TermIterator it = m_ndb->db.allterms_begin();
|
1377 |
Xapian::TermIterator it = m_ndb->db.allterms_begin();
|
1416 |
it.skip_to("XSFN");
|
1378 |
it.skip_to("XSFN");
|
1417 |
for (;it != m_ndb->db.allterms_end(); it++) {
|
1379 |
for (;it != m_ndb->db.allterms_end(); it++) {
|
1418 |
if ((*it).find("XSFN") != 0)
|
1380 |
if ((*it).find("XSFN") != 0)
|
1419 |
break;
|
1381 |
break;
|
1420 |
string fn = (*it).substr(4);
|
1382 |
string fn = (*it).substr(4);
|
1421 |
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
|
1383 |
LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
|
1422 |
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
|
1384 |
if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
|
1423 |
names.push_back((*it).c_str());
|
1385 |
names.push_back((*it).c_str());
|
1424 |
}
|
1386 |
}
|
1425 |
// Limit the match count
|
1387 |
// Limit the match count
|
1426 |
if (names.size() > 1000) {
|
1388 |
if (names.size() > 1000) {
|
1427 |
LOGERR(("Db::filenameWildExp: too many matched file names\n"));
|
1389 |
LOGERR(("Db::filenameWildExp: too many matched file names\n"));
|
1428 |
break;
|
1390 |
break;
|
1429 |
}
|
|
|
1430 |
}
|
1391 |
}
|
|
|
1392 |
}
|
|
|
1393 |
} XCATCHERROR(ermsg);
|
|
|
1394 |
if (!ermsg.empty()) {
|
|
|
1395 |
LOGERR(("filenameWildExp: xapian error: %s\n", ermsg.c_str()));
|
|
|
1396 |
return false;
|
|
|
1397 |
}
|
|
|
1398 |
|
1431 |
if (names.empty()) {
|
1399 |
if (names.empty()) {
|
1432 |
// Build an impossible query: we know its impossible because we
|
1400 |
// Build an impossible query: we know its impossible because we
|
1433 |
// control the prefixes!
|
1401 |
// control the prefixes!
|
1434 |
names.push_back("XIMPOSSIBLE");
|
1402 |
names.push_back("XIMPOSSIBLE");
|
1435 |
}
|
1403 |
}
|
|
... |
|
... |
1457 |
(opts & Db::QO_STEM) ? stemlang : "")) {
|
1425 |
(opts & Db::QO_STEM) ? stemlang : "")) {
|
1458 |
m_reason += sdata->getReason();
|
1426 |
m_reason += sdata->getReason();
|
1459 |
return false;
|
1427 |
return false;
|
1460 |
}
|
1428 |
}
|
1461 |
m_ndb->query = xq;
|
1429 |
m_ndb->query = xq;
|
|
|
1430 |
string ermsg;
|
|
|
1431 |
string d;
|
|
|
1432 |
try {
|
1462 |
delete m_ndb->enquire;
|
1433 |
delete m_ndb->enquire;
|
1463 |
m_ndb->enquire = new Xapian::Enquire(m_ndb->db);
|
1434 |
m_ndb->enquire = new Xapian::Enquire(m_ndb->db);
|
1464 |
m_ndb->enquire->set_query(m_ndb->query);
|
1435 |
m_ndb->enquire->set_query(m_ndb->query);
|
1465 |
m_ndb->mset = Xapian::MSet();
|
1436 |
m_ndb->mset = Xapian::MSet();
|
1466 |
// Get the query description and trim the "Xapian::Query"
|
1437 |
// Get the query description and trim the "Xapian::Query"
|
1467 |
string d = m_ndb->query.get_description();
|
1438 |
d = m_ndb->query.get_description();
|
|
|
1439 |
} XCATCHERROR(ermsg);
|
|
|
1440 |
if (!ermsg.empty()) {
|
|
|
1441 |
LOGDEB(("Db::SetQuery: xapian error %s\n", ermsg.c_str()));
|
|
|
1442 |
return false;
|
|
|
1443 |
}
|
|
|
1444 |
|
1468 |
if (d.find("Xapian::Query") == 0)
|
1445 |
if (d.find("Xapian::Query") == 0)
|
1469 |
d.erase(0, strlen("Xapian::Query"));
|
1446 |
d.erase(0, strlen("Xapian::Query"));
|
1470 |
sdata->setDescription(d);
|
1447 |
sdata->setDescription(d);
|
1471 |
LOGDEB(("Db::SetQuery: Q: %s\n", sdata->getDescription().c_str()));
|
1448 |
LOGDEB(("Db::SetQuery: Q: %s\n", sdata->getDescription().c_str()));
|
1472 |
return true;
|
1449 |
return true;
|
|
... |
|
... |
1618 |
if (!m_ndb || !m_ndb->m_isopen)
|
1595 |
if (!m_ndb || !m_ndb->m_isopen)
|
1619 |
return 0;
|
1596 |
return 0;
|
1620 |
TermIter *tit = new TermIter;
|
1597 |
TermIter *tit = new TermIter;
|
1621 |
if (tit) {
|
1598 |
if (tit) {
|
1622 |
tit->db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
|
1599 |
tit->db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
|
|
|
1600 |
string ermsg;
|
|
|
1601 |
try {
|
1623 |
tit->it = tit->db.allterms_begin();
|
1602 |
tit->it = tit->db.allterms_begin();
|
|
|
1603 |
} XCATCHERROR(ermsg);
|
|
|
1604 |
if (!ermsg.empty()) {
|
|
|
1605 |
LOGERR(("Db::termWalkOpen: xapian error: %s\n", ermsg.c_str()));
|
|
|
1606 |
return 0;
|
|
|
1607 |
}
|
1624 |
}
|
1608 |
}
|
1625 |
return tit;
|
1609 |
return tit;
|
1626 |
}
|
1610 |
}
|
1627 |
bool Db::termWalkNext(TermIter *tit, string &term)
|
1611 |
bool Db::termWalkNext(TermIter *tit, string &term)
|
1628 |
{
|
1612 |
{
|
1629 |
|
1613 |
string ermsg;
|
|
|
1614 |
try {
|
1630 |
if (tit && tit->it != tit->db.allterms_end()) {
|
1615 |
if (tit && tit->it != tit->db.allterms_end()) {
|
1631 |
term = *(tit->it)++;
|
1616 |
term = *(tit->it)++;
|
1632 |
return true;
|
1617 |
return true;
|
|
|
1618 |
}
|
|
|
1619 |
} XCATCHERROR(ermsg);
|
|
|
1620 |
if (!ermsg.empty()) {
|
|
|
1621 |
LOGERR(("Db::termWalkOpen: xapian error: %s\n", ermsg.c_str()));
|
1633 |
}
|
1622 |
}
|
1634 |
return false;
|
1623 |
return false;
|
1635 |
}
|
1624 |
}
|
1636 |
void Db::termWalkClose(TermIter *tit)
|
1625 |
void Db::termWalkClose(TermIter *tit)
|
1637 |
{
|
1626 |
{
|
|
|
1627 |
try {
|
1638 |
delete tit;
|
1628 |
delete tit;
|
|
|
1629 |
} catch (...) {}
|
1639 |
}
|
1630 |
}
|
1640 |
|
|
|
1641 |
|
1631 |
|
1642 |
bool Db::termExists(const string& word)
|
1632 |
bool Db::termExists(const string& word)
|
1643 |
{
|
1633 |
{
|
1644 |
if (!m_ndb || !m_ndb->m_isopen)
|
1634 |
if (!m_ndb || !m_ndb->m_isopen)
|
1645 |
return 0;
|
1635 |
return 0;
|
1646 |
Xapian::Database db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
|
1636 |
Xapian::Database db = m_ndb->m_iswritable ? m_ndb->wdb: m_ndb->db;
|
|
|
1637 |
string ermsg;
|
|
|
1638 |
try {
|
1647 |
if (!db.term_exists(word))
|
1639 |
if (!db.term_exists(word))
|
1648 |
return false;
|
1640 |
return false;
|
|
|
1641 |
} XCATCHERROR(ermsg);
|
|
|
1642 |
if (!ermsg.empty()) {
|
|
|
1643 |
LOGERR(("Db::termWalkOpen: xapian error: %s\n", ermsg.c_str()));
|
|
|
1644 |
return false;
|
|
|
1645 |
}
|
1649 |
return true;
|
1646 |
return true;
|
1650 |
}
|
1647 |
}
|
1651 |
|
1648 |
|
1652 |
|
1649 |
|
1653 |
bool Db::stemDiffers(const string& lang, const string& word,
|
1650 |
bool Db::stemDiffers(const string& lang, const string& word,
|
|
... |
|
... |
1667 |
if (!m_ndb)
|
1664 |
if (!m_ndb)
|
1668 |
return false;
|
1665 |
return false;
|
1669 |
|
1666 |
|
1670 |
terms.clear();
|
1667 |
terms.clear();
|
1671 |
Xapian::TermIterator it;
|
1668 |
Xapian::TermIterator it;
|
|
|
1669 |
string ermsg;
|
1672 |
try {
|
1670 |
try {
|
1673 |
for (it = m_ndb->query.get_terms_begin();
|
1671 |
for (it = m_ndb->query.get_terms_begin();
|
1674 |
it != m_ndb->query.get_terms_end(); it++) {
|
1672 |
it != m_ndb->query.get_terms_end(); it++) {
|
1675 |
terms.push_back(*it);
|
1673 |
terms.push_back(*it);
|
1676 |
}
|
1674 |
}
|
1677 |
} catch (...) {
|
1675 |
} XCATCHERROR(ermsg);
|
|
|
1676 |
if (!ermsg.empty()) {
|
|
|
1677 |
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
1678 |
return false;
|
1678 |
return false;
|
1679 |
}
|
1679 |
}
|
1680 |
return true;
|
1680 |
return true;
|
1681 |
}
|
1681 |
}
|
1682 |
|
1682 |
|
|
... |
|
... |
1688 |
}
|
1688 |
}
|
1689 |
|
1689 |
|
1690 |
terms.clear();
|
1690 |
terms.clear();
|
1691 |
Xapian::TermIterator it;
|
1691 |
Xapian::TermIterator it;
|
1692 |
Xapian::docid id = Xapian::docid(doc.xdocid);
|
1692 |
Xapian::docid id = Xapian::docid(doc.xdocid);
|
|
|
1693 |
string ermsg;
|
1693 |
try {
|
1694 |
try {
|
1694 |
for (it=m_ndb->enquire->get_matching_terms_begin(id);
|
1695 |
for (it=m_ndb->enquire->get_matching_terms_begin(id);
|
1695 |
it != m_ndb->enquire->get_matching_terms_end(id); it++) {
|
1696 |
it != m_ndb->enquire->get_matching_terms_end(id); it++) {
|
1696 |
terms.push_back(*it);
|
1697 |
terms.push_back(*it);
|
1697 |
}
|
1698 |
}
|
1698 |
} catch (...) {
|
1699 |
} XCATCHERROR(ermsg);
|
|
|
1700 |
if (!ermsg.empty()) {
|
|
|
1701 |
LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
|
1699 |
return false;
|
1702 |
return false;
|
1700 |
}
|
1703 |
}
|
|
|
1704 |
|
1701 |
return true;
|
1705 |
return true;
|
1702 |
}
|
1706 |
}
|
1703 |
|
1707 |
|
1704 |
// Mset size
|
1708 |
// Mset size
|
1705 |
static const int qquantum = 30;
|
1709 |
static const int qquantum = 30;
|
|
... |
|
... |
1708 |
{
|
1712 |
{
|
1709 |
if (!m_ndb || !m_ndb->enquire) {
|
1713 |
if (!m_ndb || !m_ndb->enquire) {
|
1710 |
LOGERR(("Db::getResCnt: no query opened\n"));
|
1714 |
LOGERR(("Db::getResCnt: no query opened\n"));
|
1711 |
return -1;
|
1715 |
return -1;
|
1712 |
}
|
1716 |
}
|
|
|
1717 |
string ermsg;
|
1713 |
if (m_ndb->mset.size() <= 0) {
|
1718 |
if (m_ndb->mset.size() <= 0) {
|
1714 |
try {
|
1719 |
try {
|
1715 |
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
|
1720 |
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
|
1716 |
} catch (const Xapian::DatabaseModifiedError &error) {
|
1721 |
} catch (const Xapian::DatabaseModifiedError &error) {
|
1717 |
m_ndb->db.reopen();
|
1722 |
m_ndb->db.reopen();
|
1718 |
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
|
1723 |
m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
|
1719 |
} catch (const Xapian::Error & error) {
|
1724 |
} XCATCHERROR(ermsg);
|
|
|
1725 |
if (!ermsg.empty()) {
|
1720 |
LOGERR(("enquire->get_mset: exception: %s\n",
|
1726 |
LOGERR(("enquire->get_mset: exception: %s\n", ermsg.c_str()));
|
1721 |
error.get_msg().c_str()));
|
|
|
1722 |
return -1;
|
1727 |
return -1;
|
1723 |
}
|
1728 |
}
|
1724 |
}
|
1729 |
}
|
1725 |
|
1730 |
int ret = -1;
|
|
|
1731 |
try {
|
1726 |
return m_ndb->mset.get_matches_lower_bound();
|
1732 |
ret = m_ndb->mset.get_matches_lower_bound();
|
|
|
1733 |
} catch (...) {}
|
|
|
1734 |
return ret;
|
1727 |
}
|
1735 |
}
|
1728 |
|
1736 |
|
1729 |
|
1737 |
|
1730 |
// Get document at rank i in query (i is the index in the whole result
|
1738 |
// Get document at rank i in query (i is the index in the whole result
|
1731 |
// set, as in the enquire class. We check if the current mset has the
|
1739 |
// set, as in the enquire class. We check if the current mset has the
|
|
... |
|
... |
1859 |
*pc = 100;
|
1867 |
*pc = 100;
|
1860 |
|
1868 |
|
1861 |
string hash;
|
1869 |
string hash;
|
1862 |
pathHash(fn, hash, PATHHASHLEN);
|
1870 |
pathHash(fn, hash, PATHHASHLEN);
|
1863 |
string pqterm = ipath.empty() ? "P" + hash : "Q" + hash + "|" + ipath;
|
1871 |
string pqterm = ipath.empty() ? "P" + hash : "Q" + hash + "|" + ipath;
|
1864 |
const char *ermsg = "";
|
1872 |
string ermsg;
|
1865 |
try {
|
1873 |
try {
|
1866 |
if (!m_ndb->db.term_exists(pqterm)) {
|
1874 |
if (!m_ndb->db.term_exists(pqterm)) {
|
1867 |
// Document found in history no longer in the database.
|
1875 |
// Document found in history no longer in the database.
|
1868 |
// We return true (because their might be other ok docs further)
|
1876 |
// We return true (because their might be other ok docs further)
|
1869 |
// but indicate the error with pc = -1
|
1877 |
// but indicate the error with pc = -1
|
|
... |
|
... |
1876 |
Xapian::PostingIterator docid = m_ndb->db.postlist_begin(pqterm);
|
1884 |
Xapian::PostingIterator docid = m_ndb->db.postlist_begin(pqterm);
|
1877 |
Xapian::Document xdoc = m_ndb->db.get_document(*docid);
|
1885 |
Xapian::Document xdoc = m_ndb->db.get_document(*docid);
|
1878 |
string data = xdoc.get_data();
|
1886 |
string data = xdoc.get_data();
|
1879 |
list<string> terms;
|
1887 |
list<string> terms;
|
1880 |
return m_ndb->dbDataToRclDoc(*docid, data, doc);
|
1888 |
return m_ndb->dbDataToRclDoc(*docid, data, doc);
|
1881 |
} catch (const Xapian::Error &e) {
|
1889 |
} XCATCHERROR(ermsg);
|
1882 |
ermsg = e.get_msg().c_str();
|
|
|
1883 |
} catch (const string &s) {
|
|
|
1884 |
ermsg = s.c_str();
|
|
|
1885 |
} catch (const char *s) {
|
|
|
1886 |
ermsg = s;
|
|
|
1887 |
} catch (...) {
|
|
|
1888 |
ermsg = "Caught unknown exception";
|
|
|
1889 |
}
|
|
|
1890 |
if (*ermsg) {
|
1890 |
if (!ermsg.empty()) {
|
1891 |
LOGERR(("Db::getDoc: %s\n", ermsg));
|
1891 |
LOGERR(("Db::getDoc: %s\n", ermsg.c_str()));
|
1892 |
}
|
1892 |
}
|
1893 |
return false;
|
1893 |
return false;
|
1894 |
}
|
1894 |
}
|
1895 |
|
1895 |
|
1896 |
list<string> Db::expand(const Doc &doc)
|
1896 |
list<string> Db::expand(const Doc &doc)
|
|
... |
|
... |
1898 |
list<string> res;
|
1898 |
list<string> res;
|
1899 |
if (!m_ndb || !m_ndb->enquire) {
|
1899 |
if (!m_ndb || !m_ndb->enquire) {
|
1900 |
LOGERR(("Db::expand: no query opened\n"));
|
1900 |
LOGERR(("Db::expand: no query opened\n"));
|
1901 |
return res;
|
1901 |
return res;
|
1902 |
}
|
1902 |
}
|
|
|
1903 |
string ermsg;
|
|
|
1904 |
for (int tries = 0; tries < 2; tries++) {
|
|
|
1905 |
try {
|
1903 |
Xapian::RSet rset;
|
1906 |
Xapian::RSet rset;
|
1904 |
rset.add_document(Xapian::docid(doc.xdocid));
|
1907 |
rset.add_document(Xapian::docid(doc.xdocid));
|
1905 |
// We don't exclude the original query terms.
|
1908 |
// We don't exclude the original query terms.
|
1906 |
Xapian::ESet eset = m_ndb->enquire->get_eset(20, rset, false);
|
1909 |
Xapian::ESet eset = m_ndb->enquire->get_eset(20, rset, false);
|
1907 |
LOGDEB(("ESet terms:\n"));
|
1910 |
LOGDEB(("ESet terms:\n"));
|
1908 |
// We filter out the special terms
|
1911 |
// We filter out the special terms
|
1909 |
for (Xapian::ESetIterator it = eset.begin(); it != eset.end(); it++) {
|
1912 |
for (Xapian::ESetIterator it = eset.begin();
|
|
|
1913 |
it != eset.end(); it++) {
|
1910 |
LOGDEB((" [%s]\n", (*it).c_str()));
|
1914 |
LOGDEB((" [%s]\n", (*it).c_str()));
|
1911 |
if ((*it).empty() || ((*it).at(0)>='A' && (*it).at(0)<='Z'))
|
1915 |
if ((*it).empty() || ((*it).at(0)>='A' && (*it).at(0)<='Z'))
|
|
|
1916 |
continue;
|
|
|
1917 |
res.push_back(*it);
|
|
|
1918 |
if (res.size() >= 10)
|
|
|
1919 |
break;
|
|
|
1920 |
}
|
|
|
1921 |
} catch (const Xapian::DatabaseModifiedError &error) {
|
1912 |
continue;
|
1922 |
continue;
|
1913 |
res.push_back(*it);
|
1923 |
} XCATCHERROR(ermsg);
|
1914 |
if (res.size() >= 10)
|
1924 |
if (!ermsg.empty()) {
|
|
|
1925 |
LOGERR(("Db::expand: xapian error %s\n", ermsg.c_str()));
|
|
|
1926 |
res.clear();
|
|
|
1927 |
}
|
1915 |
break;
|
1928 |
break;
|
1916 |
}
|
1929 |
}
|
|
|
1930 |
|
1917 |
return res;
|
1931 |
return res;
|
1918 |
}
|
1932 |
}
|
1919 |
|
1933 |
|
1920 |
|
1934 |
|
1921 |
#ifndef NO_NAMESPACES
|
1935 |
#ifndef NO_NAMESPACES
|