|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.133 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.134 2008-07-01 11:51:51 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
120 |
} catch (const string &s) {
|
120 |
} catch (const string &s) {
|
121 |
ermsg = s;
|
121 |
ermsg = s;
|
122 |
if (ermsg.empty())
|
122 |
if (ermsg.empty())
|
123 |
ermsg = "Empty error message";
|
123 |
ermsg = "Empty error message";
|
124 |
} catch (const char *s) {
|
124 |
} catch (const char *s) {
|
125 |
ermsg = s ? s : "";
|
125 |
ermsg = s ? s : string();
|
126 |
if (ermsg.empty())
|
126 |
if (ermsg.empty())
|
127 |
ermsg = "Empty error message";
|
127 |
ermsg = "Empty error message";
|
128 |
} catch (...) {
|
128 |
} catch (...) {
|
129 |
ermsg= "Unknown xapian error (not Xapian::Error or string)";
|
129 |
ermsg= "Unknown xapian error (not Xapian::Error or string)";
|
130 |
break;
|
130 |
break;
|
|
... |
|
... |
200 |
list<string> iterms;
|
200 |
list<string> iterms;
|
201 |
query->getQueryTerms(iterms);
|
201 |
query->getQueryTerms(iterms);
|
202 |
|
202 |
|
203 |
list<string> terms = noPrefixList(iterms);
|
203 |
list<string> terms = noPrefixList(iterms);
|
204 |
if (terms.empty()) {
|
204 |
if (terms.empty()) {
|
205 |
return "";
|
205 |
return string();
|
206 |
}
|
206 |
}
|
207 |
|
207 |
|
208 |
// Retrieve db-wide frequencies for the query terms
|
208 |
// Retrieve db-wide frequencies for the query terms
|
209 |
if (query->m_nq->termfreqs.empty()) {
|
209 |
if (query->m_nq->termfreqs.empty()) {
|
210 |
double doccnt = db.get_doccount();
|
210 |
double doccnt = db.get_doccount();
|
|
... |
|
... |
287 |
m_db->m_synthAbsLen /(7 * (m_db->m_synthAbsWordCtxLen+1));
|
287 |
m_db->m_synthAbsLen /(7 * (m_db->m_synthAbsWordCtxLen+1));
|
288 |
LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));
|
288 |
LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));
|
289 |
// This can't happen, but would crash us
|
289 |
// This can't happen, but would crash us
|
290 |
if (totalweight == 0.0) {
|
290 |
if (totalweight == 0.0) {
|
291 |
LOGERR(("makeAbstract: 0 totalweight!\n"));
|
291 |
LOGERR(("makeAbstract: 0 totalweight!\n"));
|
292 |
return "";
|
292 |
return string();
|
293 |
}
|
293 |
}
|
294 |
|
294 |
|
295 |
// Let's go populate
|
295 |
// Let's go populate
|
296 |
for (multimap<double, string>::reverse_iterator qit = byQ.rbegin();
|
296 |
for (multimap<double, string>::reverse_iterator qit = byQ.rbegin();
|
297 |
qit != byQ.rend(); qit++) {
|
297 |
qit != byQ.rend(); qit++) {
|
|
... |
|
... |
346 |
chron.millis(), qtermposs.size()));
|
346 |
chron.millis(), qtermposs.size()));
|
347 |
|
347 |
|
348 |
// This can happen if there are term occurences in the keywords
|
348 |
// This can happen if there are term occurences in the keywords
|
349 |
// etc. but not elsewhere ?
|
349 |
// etc. but not elsewhere ?
|
350 |
if (qtermposs.size() == 0)
|
350 |
if (qtermposs.size() == 0)
|
351 |
return "";
|
351 |
return string();
|
352 |
|
352 |
|
353 |
// Walk all document's terms position lists and populate slots
|
353 |
// Walk all document's terms position lists and populate slots
|
354 |
// around the query terms. We arbitrarily truncate the list to
|
354 |
// around the query terms. We arbitrarily truncate the list to
|
355 |
// avoid taking forever. If we do cutoff, the abstract may be
|
355 |
// avoid taking forever. If we do cutoff, the abstract may be
|
356 |
// inconsistant (missing words, potentially altering meaning),
|
356 |
// inconsistant (missing words, potentially altering meaning),
|
|
... |
|
... |
591 |
bool Db::reOpen()
|
591 |
bool Db::reOpen()
|
592 |
{
|
592 |
{
|
593 |
if (m_ndb && m_ndb->m_isopen) {
|
593 |
if (m_ndb && m_ndb->m_isopen) {
|
594 |
if (!close())
|
594 |
if (!close())
|
595 |
return false;
|
595 |
return false;
|
596 |
if (!open(m_basedir, "", m_mode, true)) {
|
596 |
if (!open(m_basedir, string(), m_mode, true)) {
|
597 |
return false;
|
597 |
return false;
|
598 |
}
|
598 |
}
|
599 |
}
|
599 |
}
|
600 |
return true;
|
600 |
return true;
|
601 |
}
|
601 |
}
|
|
... |
|
... |
682 |
bool Db::fieldToPrefix(const string& fldname, string &pfx)
|
682 |
bool Db::fieldToPrefix(const string& fldname, string &pfx)
|
683 |
{
|
683 |
{
|
684 |
// This is the default table
|
684 |
// This is the default table
|
685 |
static map<string, string> fldToPrefs;
|
685 |
static map<string, string> fldToPrefs;
|
686 |
if (fldToPrefs.empty()) {
|
686 |
if (fldToPrefs.empty()) {
|
687 |
fldToPrefs["abstract"] = "";
|
687 |
fldToPrefs["abstract"] = string();
|
688 |
fldToPrefs["ext"] = "XE";
|
688 |
fldToPrefs["ext"] = "XE";
|
689 |
|
689 |
|
690 |
fldToPrefs["title"] = "S";
|
690 |
fldToPrefs["title"] = "S";
|
691 |
fldToPrefs["caption"] = "S";
|
691 |
fldToPrefs["caption"] = "S";
|
692 |
fldToPrefs["subject"] = "S";
|
692 |
fldToPrefs["subject"] = "S";
|
|
... |
|
... |
776 |
//
|
776 |
//
|
777 |
// Note that we always return true (but set out to "" on error). We don't
|
777 |
// Note that we always return true (but set out to "" on error). We don't
|
778 |
// want to stop indexation because of a bad string
|
778 |
// want to stop indexation because of a bad string
|
779 |
bool dumb_string(const string &in, string &out)
|
779 |
bool dumb_string(const string &in, string &out)
|
780 |
{
|
780 |
{
|
781 |
out.erase();
|
781 |
out.clear();
|
782 |
if (in.empty())
|
782 |
if (in.empty())
|
783 |
return true;
|
783 |
return true;
|
784 |
|
784 |
|
785 |
string s1 = neutchars(in, "\n\r");
|
785 |
string s1 = neutchars(in, "\n\r");
|
786 |
if (!unacmaybefold(s1, out, "UTF-8", true)) {
|
786 |
if (!unacmaybefold(s1, out, "UTF-8", true)) {
|
787 |
LOGINFO(("dumb_string: unac failed for [%s]\n", in.c_str()));
|
787 |
LOGINFO(("dumb_string: unac failed for [%s]\n", in.c_str()));
|
788 |
out.erase();
|
788 |
out.clear();
|
789 |
// See comment at start of func
|
789 |
// See comment at start of func
|
790 |
return true;
|
790 |
return true;
|
791 |
}
|
791 |
}
|
792 |
return true;
|
792 |
return true;
|
793 |
}
|
793 |
}
|