|
a/src/rcldb/rclquery.cpp |
|
b/src/rcldb/rclquery.cpp |
|
... |
|
... |
49 |
// Field names inside the index data record may differ from the rcldoc ones
|
49 |
// Field names inside the index data record may differ from the rcldoc ones
|
50 |
// (esp.: caption / title)
|
50 |
// (esp.: caption / title)
|
51 |
static const string& docfToDatf(const string& df)
|
51 |
static const string& docfToDatf(const string& df)
|
52 |
{
|
52 |
{
|
53 |
if (!df.compare(Doc::keytt)) {
|
53 |
if (!df.compare(Doc::keytt)) {
|
54 |
return cstr_caption;
|
54 |
return cstr_caption;
|
55 |
} else if (!df.compare(Doc::keymt)) {
|
55 |
} else if (!df.compare(Doc::keymt)) {
|
56 |
return cstr_dmtime;
|
56 |
return cstr_dmtime;
|
57 |
} else {
|
57 |
} else {
|
58 |
return df;
|
58 |
return df;
|
59 |
}
|
59 |
}
|
60 |
}
|
60 |
}
|
61 |
|
61 |
|
62 |
// Sort helper class. As Xapian sorting is lexicographic, we do some
|
62 |
// Sort helper class. As Xapian sorting is lexicographic, we do some
|
63 |
// special processing for special fields like dates and sizes. User
|
63 |
// special processing for special fields like dates and sizes. User
|
|
... |
|
... |
68 |
#else
|
68 |
#else
|
69 |
class QSorter : public Xapian::KeyMaker {
|
69 |
class QSorter : public Xapian::KeyMaker {
|
70 |
#endif
|
70 |
#endif
|
71 |
public:
|
71 |
public:
|
72 |
QSorter(const string& f)
|
72 |
QSorter(const string& f)
|
73 |
: m_fld(docfToDatf(f) + "=")
|
73 |
: m_fld(docfToDatf(f) + "=")
|
74 |
{
|
74 |
{
|
75 |
m_ismtime = !m_fld.compare("dmtime=");
|
75 |
m_ismtime = !m_fld.compare("dmtime=");
|
76 |
if (m_ismtime)
|
76 |
if (m_ismtime)
|
77 |
m_issize = false;
|
77 |
m_issize = false;
|
78 |
else
|
78 |
else
|
79 |
m_issize = !m_fld.compare("fbytes=") || !m_fld.compare("dbytes=") ||
|
79 |
m_issize = !m_fld.compare("fbytes=") || !m_fld.compare("dbytes=") ||
|
80 |
!m_fld.compare("pcbytes=");
|
80 |
!m_fld.compare("pcbytes=");
|
81 |
}
|
81 |
}
|
82 |
|
82 |
|
83 |
virtual std::string operator()(const Xapian::Document& xdoc) const
|
83 |
virtual std::string operator()(const Xapian::Document& xdoc) const
|
84 |
{
|
84 |
{
|
85 |
string data = xdoc.get_data();
|
85 |
string data = xdoc.get_data();
|
86 |
// It would be simpler to do the record->Rcl::Doc thing, but
|
86 |
// It would be simpler to do the record->Rcl::Doc thing, but
|
87 |
// hand-doing this will be faster. It makes more assumptions
|
87 |
// hand-doing this will be faster. It makes more assumptions
|
88 |
// about the format than a ConfTree though:
|
88 |
// about the format than a ConfTree though:
|
89 |
string::size_type i1, i2;
|
89 |
string::size_type i1, i2;
|
90 |
i1 = data.find(m_fld);
|
90 |
i1 = data.find(m_fld);
|
91 |
if (i1 == string::npos) {
|
91 |
if (i1 == string::npos) {
|
92 |
if (m_ismtime) {
|
92 |
if (m_ismtime) {
|
93 |
// Ugly: specialcase mtime as it's either dmtime or fmtime
|
93 |
// Ugly: specialcase mtime as it's either dmtime or fmtime
|
94 |
i1 = data.find("fmtime=");
|
94 |
i1 = data.find("fmtime=");
|
95 |
if (i1 == string::npos) {
|
95 |
if (i1 == string::npos) {
|
|
|
96 |
return string();
|
|
|
97 |
}
|
|
|
98 |
} else {
|
96 |
return string();
|
99 |
return string();
|
97 |
}
|
100 |
}
|
98 |
} else {
|
101 |
}
|
99 |
return string();
|
|
|
100 |
}
|
|
|
101 |
}
|
|
|
102 |
i1 += m_fld.length();
|
102 |
i1 += m_fld.length();
|
103 |
if (i1 >= data.length())
|
103 |
if (i1 >= data.length())
|
104 |
return string();
|
104 |
return string();
|
105 |
i2 = data.find_first_of("\n\r", i1);
|
105 |
i2 = data.find_first_of("\n\r", i1);
|
106 |
if (i2 == string::npos)
|
106 |
if (i2 == string::npos)
|
107 |
return string();
|
107 |
return string();
|
108 |
|
108 |
|
109 |
string term = data.substr(i1, i2-i1);
|
109 |
string term = data.substr(i1, i2-i1);
|
110 |
if (m_ismtime) {
|
110 |
if (m_ismtime) {
|
111 |
return term;
|
111 |
return term;
|
112 |
} else if (m_issize) {
|
112 |
} else if (m_issize) {
|
113 |
// Left zeropad values for appropriate numeric sorting
|
113 |
// Left zeropad values for appropriate numeric sorting
|
114 |
leftzeropad(term, 12);
|
114 |
leftzeropad(term, 12);
|
115 |
return term;
|
115 |
return term;
|
116 |
}
|
116 |
}
|
117 |
|
117 |
|
118 |
// Process data for better sorting. We should actually do the
|
118 |
// Process data for better sorting. We should actually do the
|
119 |
// unicode thing
|
119 |
// unicode thing
|
120 |
// (http://unicode.org/reports/tr10/#Introduction), but just
|
120 |
// (http://unicode.org/reports/tr10/#Introduction), but just
|
121 |
// removing accents and majuscules will remove the most
|
121 |
// removing accents and majuscules will remove the most
|
122 |
// glaring weirdnesses (or not, depending on your national
|
122 |
// glaring weirdnesses (or not, depending on your national
|
123 |
// approach to collating...)
|
123 |
// approach to collating...)
|
124 |
string sortterm;
|
124 |
string sortterm;
|
125 |
// We're not even sure the term is utf8 here (ie: url)
|
125 |
// We're not even sure the term is utf8 here (ie: url)
|
126 |
if (!unacmaybefold(term, sortterm, "UTF-8", UNACOP_UNACFOLD)) {
|
126 |
if (!unacmaybefold(term, sortterm, "UTF-8", UNACOP_UNACFOLD)) {
|
127 |
sortterm = term;
|
127 |
sortterm = term;
|
128 |
}
|
128 |
}
|
129 |
// Also remove some common uninteresting starting characters
|
129 |
// Also remove some common uninteresting starting characters
|
130 |
i1 = sortterm.find_first_not_of(" \t\\\"'([*+,.#/");
|
130 |
i1 = sortterm.find_first_not_of(" \t\\\"'([*+,.#/");
|
131 |
if (i1 != 0 && i1 != string::npos) {
|
131 |
if (i1 != 0 && i1 != string::npos) {
|
132 |
sortterm = sortterm.substr(i1, sortterm.size()-i1);
|
132 |
sortterm = sortterm.substr(i1, sortterm.size()-i1);
|
133 |
}
|
133 |
}
|
134 |
|
134 |
|
135 |
LOGDEB2("QSorter: [" << (term) << "] -> [" << (sortterm) << "]\n" );
|
135 |
LOGDEB2("QSorter: [" << term << "] -> [" << sortterm << "]\n");
|
136 |
return sortterm;
|
136 |
return sortterm;
|
137 |
}
|
137 |
}
|
138 |
|
138 |
|
139 |
private:
|
139 |
private:
|
140 |
string m_fld;
|
140 |
string m_fld;
|
141 |
bool m_ismtime;
|
141 |
bool m_ismtime;
|
|
... |
|
... |
145 |
Query::Query(Db *db)
|
145 |
Query::Query(Db *db)
|
146 |
: m_nq(new Native(this)), m_db(db), m_sorter(0), m_sortAscending(true),
|
146 |
: m_nq(new Native(this)), m_db(db), m_sorter(0), m_sortAscending(true),
|
147 |
m_collapseDuplicates(false), m_resCnt(-1), m_snipMaxPosWalk(1000000)
|
147 |
m_collapseDuplicates(false), m_resCnt(-1), m_snipMaxPosWalk(1000000)
|
148 |
{
|
148 |
{
|
149 |
if (db)
|
149 |
if (db)
|
150 |
db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
|
150 |
db->getConf()->getConfParam("snippetMaxPosWalk", &m_snipMaxPosWalk);
|
151 |
}
|
151 |
}
|
152 |
|
152 |
|
153 |
Query::~Query()
|
153 |
Query::~Query()
|
154 |
{
|
154 |
{
|
155 |
deleteZ(m_nq);
|
155 |
deleteZ(m_nq);
|
156 |
if (m_sorter) {
|
156 |
if (m_sorter) {
|
157 |
delete (QSorter*)m_sorter;
|
157 |
delete (QSorter*)m_sorter;
|
158 |
m_sorter = 0;
|
158 |
m_sorter = 0;
|
159 |
}
|
159 |
}
|
160 |
}
|
160 |
}
|
161 |
|
161 |
|
162 |
void Query::setSortBy(const string& fld, bool ascending) {
|
162 |
void Query::setSortBy(const string& fld, bool ascending) {
|
163 |
if (fld.empty()) {
|
163 |
if (fld.empty()) {
|
164 |
m_sortField.erase();
|
164 |
m_sortField.erase();
|
165 |
} else {
|
165 |
} else {
|
166 |
m_sortField = m_db->getConf()->fieldQCanon(fld);
|
166 |
m_sortField = m_db->getConf()->fieldQCanon(fld);
|
167 |
m_sortAscending = ascending;
|
167 |
m_sortAscending = ascending;
|
168 |
}
|
168 |
}
|
169 |
LOGDEB0("RclQuery::setSortBy: [" << (m_sortField) << "] " << (m_sortAscending ? "ascending" : "descending") << "\n" );
|
169 |
LOGDEB0("RclQuery::setSortBy: [" << m_sortField << "] " <<
|
|
|
170 |
(m_sortAscending ? "ascending" : "descending") << "\n");
|
170 |
}
|
171 |
}
|
171 |
|
172 |
|
172 |
//#define ISNULL(X) (X).isNull()
|
173 |
//#define ISNULL(X) (X).isNull()
|
173 |
#define ISNULL(X) !(X)
|
174 |
#define ISNULL(X) !(X)
|
174 |
|
175 |
|
175 |
// Prepare query out of user search data
|
176 |
// Prepare query out of user search data
|
176 |
bool Query::setQuery(std::shared_ptr<SearchData> sdata)
|
177 |
bool Query::setQuery(std::shared_ptr<SearchData> sdata)
|
177 |
{
|
178 |
{
|
178 |
LOGDEB("Query::setQuery:\n" );
|
179 |
LOGDEB("Query::setQuery:\n");
|
179 |
|
180 |
|
180 |
if (!m_db || ISNULL(m_nq)) {
|
181 |
if (!m_db || ISNULL(m_nq)) {
|
181 |
LOGERR("Query::setQuery: not initialised!\n" );
|
182 |
LOGERR("Query::setQuery: not initialised!\n");
|
182 |
return false;
|
183 |
return false;
|
183 |
}
|
184 |
}
|
184 |
m_resCnt = -1;
|
185 |
m_resCnt = -1;
|
185 |
m_reason.erase();
|
186 |
m_reason.erase();
|
186 |
|
187 |
|
187 |
m_nq->clear();
|
188 |
m_nq->clear();
|
188 |
m_sd = sdata;
|
189 |
m_sd = sdata;
|
189 |
|
190 |
|
190 |
Xapian::Query xq;
|
191 |
Xapian::Query xq;
|
191 |
if (!sdata->toNativeQuery(*m_db, &xq)) {
|
192 |
if (!sdata->toNativeQuery(*m_db, &xq)) {
|
192 |
m_reason += sdata->getReason();
|
193 |
m_reason += sdata->getReason();
|
193 |
return false;
|
194 |
return false;
|
194 |
}
|
195 |
}
|
195 |
|
196 |
|
196 |
m_nq->xquery = xq;
|
197 |
m_nq->xquery = xq;
|
197 |
|
198 |
|
198 |
string d;
|
199 |
string d;
|
199 |
for (int tries = 0; tries < 2; tries++) {
|
200 |
for (int tries = 0; tries < 2; tries++) {
|
200 |
try {
|
201 |
try {
|
201 |
m_nq->xenquire = new Xapian::Enquire(m_db->m_ndb->xrdb);
|
202 |
m_nq->xenquire = new Xapian::Enquire(m_db->m_ndb->xrdb);
|
202 |
if (m_collapseDuplicates) {
|
203 |
if (m_collapseDuplicates) {
|
203 |
m_nq->xenquire->set_collapse_key(Rcl::VALUE_MD5);
|
204 |
m_nq->xenquire->set_collapse_key(Rcl::VALUE_MD5);
|
204 |
} else {
|
205 |
} else {
|
205 |
m_nq->xenquire->set_collapse_key(Xapian::BAD_VALUENO);
|
206 |
m_nq->xenquire->set_collapse_key(Xapian::BAD_VALUENO);
|
206 |
}
|
207 |
}
|
207 |
m_nq->xenquire->set_docid_order(Xapian::Enquire::DONT_CARE);
|
208 |
m_nq->xenquire->set_docid_order(Xapian::Enquire::DONT_CARE);
|
208 |
if (!m_sortField.empty() &&
|
209 |
if (!m_sortField.empty() &&
|
209 |
stringlowercmp("relevancyrating", m_sortField)) {
|
210 |
stringlowercmp("relevancyrating", m_sortField)) {
|
210 |
if (m_sorter) {
|
211 |
if (m_sorter) {
|
211 |
delete (QSorter*)m_sorter;
|
212 |
delete (QSorter*)m_sorter;
|
212 |
m_sorter = 0;
|
213 |
m_sorter = 0;
|
213 |
}
|
214 |
}
|
214 |
m_sorter = new QSorter(m_sortField);
|
215 |
m_sorter = new QSorter(m_sortField);
|
215 |
// It really seems there is a xapian bug about sort order, we
|
216 |
// It really seems there is a xapian bug about sort order, we
|
216 |
// invert here.
|
217 |
// invert here.
|
217 |
m_nq->xenquire->set_sort_by_key((QSorter*)m_sorter,
|
218 |
m_nq->xenquire->set_sort_by_key((QSorter*)m_sorter,
|
218 |
!m_sortAscending);
|
219 |
!m_sortAscending);
|
219 |
}
|
220 |
}
|
220 |
m_nq->xenquire->set_query(m_nq->xquery);
|
221 |
m_nq->xenquire->set_query(m_nq->xquery);
|
221 |
m_nq->xmset = Xapian::MSet();
|
222 |
m_nq->xmset = Xapian::MSet();
|
222 |
// Get the query description and trim the "Xapian::Query"
|
223 |
// Get the query description and trim the "Xapian::Query"
|
223 |
d = m_nq->xquery.get_description();
|
224 |
d = m_nq->xquery.get_description();
|
224 |
m_reason.erase();
|
225 |
m_reason.erase();
|
225 |
break;
|
226 |
break;
|
226 |
} catch (const Xapian::DatabaseModifiedError &e) {
|
227 |
} catch (const Xapian::DatabaseModifiedError &e) {
|
227 |
m_reason = e.get_msg();
|
228 |
m_reason = e.get_msg();
|
228 |
m_db->m_ndb->xrdb.reopen();
|
229 |
m_db->m_ndb->xrdb.reopen();
|
229 |
continue;
|
230 |
continue;
|
230 |
} XCATCHERROR(m_reason);
|
231 |
} XCATCHERROR(m_reason);
|
231 |
break;
|
232 |
break;
|
232 |
}
|
233 |
}
|
233 |
|
234 |
|
234 |
if (!m_reason.empty()) {
|
235 |
if (!m_reason.empty()) {
|
235 |
LOGDEB("Query::SetQuery: xapian error " << (m_reason) << "\n" );
|
236 |
LOGDEB("Query::SetQuery: xapian error " << m_reason << "\n");
|
236 |
return false;
|
237 |
return false;
|
237 |
}
|
238 |
}
|
238 |
|
239 |
|
239 |
if (d.find("Xapian::Query") == 0)
|
240 |
if (d.find("Xapian::Query") == 0)
|
240 |
d.erase(0, strlen("Xapian::Query"));
|
241 |
d.erase(0, strlen("Xapian::Query"));
|
241 |
|
242 |
|
242 |
sdata->setDescription(d);
|
243 |
sdata->setDescription(d);
|
243 |
m_sd = sdata;
|
244 |
m_sd = sdata;
|
244 |
LOGDEB("Query::SetQuery: Q: " << (sdata->getDescription()) << "\n" );
|
245 |
LOGDEB("Query::SetQuery: Q: " << sdata->getDescription() << "\n");
|
245 |
return true;
|
246 |
return true;
|
246 |
}
|
247 |
}
|
247 |
|
248 |
|
248 |
bool Query::getQueryTerms(vector<string>& terms)
|
249 |
bool Query::getQueryTerms(vector<string>& terms)
|
249 |
{
|
250 |
{
|
250 |
if (ISNULL(m_nq))
|
251 |
if (ISNULL(m_nq))
|
251 |
return false;
|
252 |
return false;
|
252 |
|
253 |
|
253 |
terms.clear();
|
254 |
terms.clear();
|
254 |
Xapian::TermIterator it;
|
255 |
Xapian::TermIterator it;
|
255 |
string ermsg;
|
256 |
string ermsg;
|
256 |
try {
|
257 |
try {
|
257 |
for (it = m_nq->xquery.get_terms_begin();
|
258 |
for (it = m_nq->xquery.get_terms_begin();
|
258 |
it != m_nq->xquery.get_terms_end(); it++) {
|
259 |
it != m_nq->xquery.get_terms_end(); it++) {
|
259 |
terms.push_back(*it);
|
260 |
terms.push_back(*it);
|
260 |
}
|
261 |
}
|
261 |
} XCATCHERROR(ermsg);
|
262 |
} XCATCHERROR(ermsg);
|
262 |
if (!ermsg.empty()) {
|
263 |
if (!ermsg.empty()) {
|
263 |
LOGERR("getQueryTerms: xapian error: " << (ermsg) << "\n" );
|
264 |
LOGERR("getQueryTerms: xapian error: " << ermsg << "\n");
|
264 |
return false;
|
265 |
return false;
|
265 |
}
|
266 |
}
|
266 |
return true;
|
267 |
return true;
|
267 |
}
|
268 |
}
|
268 |
|
269 |
|
269 |
int Query::makeDocAbstract(const Doc &doc,
|
270 |
int Query::makeDocAbstract(const Doc &doc, vector<Snippet>& abstract,
|
270 |
vector<Snippet>& abstract,
|
271 |
int maxoccs, int ctxwords)
|
271 |
int maxoccs, int ctxwords)
|
|
|
272 |
{
|
272 |
{
|
273 |
LOGDEB("makeDocAbstract: maxoccs " << (maxoccs) << " ctxwords " << (ctxwords) << "\n" );
|
273 |
LOGDEB("makeDocAbstract: maxoccs " << maxoccs << " ctxwords " <<
|
|
|
274 |
ctxwords << "\n");
|
274 |
if (!m_db || !m_db->m_ndb || !m_db->m_ndb->m_isopen || !m_nq) {
|
275 |
if (!m_db || !m_db->m_ndb || !m_db->m_ndb->m_isopen || !m_nq) {
|
275 |
LOGERR("Query::makeDocAbstract: no db or no nq\n" );
|
276 |
LOGERR("Query::makeDocAbstract: no db or no nq\n");
|
276 |
return ABSRES_ERROR;
|
277 |
return ABSRES_ERROR;
|
277 |
}
|
278 |
}
|
278 |
int ret = ABSRES_ERROR;
|
279 |
int ret = ABSRES_ERROR;
|
279 |
XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords),
|
280 |
XAPTRY(ret = m_nq->makeAbstract(doc.xdocid, abstract, maxoccs, ctxwords),
|
280 |
m_db->m_ndb->xrdb, m_reason);
|
281 |
m_db->m_ndb->xrdb, m_reason);
|
281 |
if (!m_reason.empty()) {
|
282 |
if (!m_reason.empty()) {
|
282 |
LOGDEB("makeDocAbstract: makeAbstract error, reason: " << (m_reason) << "\n" );
|
283 |
LOGDEB("makeDocAbstract: makeAbstract: reason: " << m_reason << "\n");
|
283 |
return ABSRES_ERROR;
|
284 |
return ABSRES_ERROR;
|
284 |
}
|
285 |
}
|
285 |
return ret;
|
286 |
return ret;
|
286 |
}
|
287 |
}
|
287 |
|
288 |
|
288 |
bool Query::makeDocAbstract(const Doc &doc, vector<string>& abstract)
|
289 |
bool Query::makeDocAbstract(const Doc &doc, vector<string>& abstract)
|
289 |
{
|
290 |
{
|
290 |
vector<Snippet> vpabs;
|
291 |
vector<Snippet> vpabs;
|
291 |
if (!makeDocAbstract(doc, vpabs))
|
292 |
if (!makeDocAbstract(doc, vpabs))
|
292 |
return false;
|
293 |
return false;
|
293 |
for (vector<Snippet>::const_iterator it = vpabs.begin();
|
294 |
for (vector<Snippet>::const_iterator it = vpabs.begin();
|
294 |
it != vpabs.end(); it++) {
|
295 |
it != vpabs.end(); it++) {
|
295 |
string chunk;
|
296 |
string chunk;
|
296 |
if (it->page > 0) {
|
297 |
if (it->page > 0) {
|
297 |
ostringstream ss;
|
298 |
ostringstream ss;
|
298 |
ss << it->page;
|
299 |
ss << it->page;
|
299 |
chunk += string(" [p ") + ss.str() + "] ";
|
300 |
chunk += string(" [p ") + ss.str() + "] ";
|
300 |
}
|
301 |
}
|
301 |
chunk += it->snippet;
|
302 |
chunk += it->snippet;
|
302 |
abstract.push_back(chunk);
|
303 |
abstract.push_back(chunk);
|
303 |
}
|
304 |
}
|
304 |
return true;
|
305 |
return true;
|
305 |
}
|
306 |
}
|
306 |
|
307 |
|
307 |
bool Query::makeDocAbstract(const Doc &doc, string& abstract)
|
308 |
bool Query::makeDocAbstract(const Doc &doc, string& abstract)
|
308 |
{
|
309 |
{
|
309 |
vector<Snippet> vpabs;
|
310 |
vector<Snippet> vpabs;
|
310 |
if (!makeDocAbstract(doc, vpabs))
|
311 |
if (!makeDocAbstract(doc, vpabs))
|
311 |
return false;
|
312 |
return false;
|
312 |
for (vector<Snippet>::const_iterator it = vpabs.begin();
|
313 |
for (vector<Snippet>::const_iterator it = vpabs.begin();
|
313 |
it != vpabs.end(); it++) {
|
314 |
it != vpabs.end(); it++) {
|
314 |
abstract.append(it->snippet);
|
315 |
abstract.append(it->snippet);
|
315 |
abstract.append(cstr_ellipsis);
|
316 |
abstract.append(cstr_ellipsis);
|
316 |
}
|
317 |
}
|
317 |
return m_reason.empty() ? true : false;
|
318 |
return m_reason.empty() ? true : false;
|
318 |
}
|
319 |
}
|
319 |
|
320 |
|
320 |
int Query::getFirstMatchPage(const Doc &doc, string& term)
|
321 |
int Query::getFirstMatchPage(const Doc &doc, string& term)
|
321 |
{
|
322 |
{
|
322 |
LOGDEB1("Db::getFirstMatchPage\n" );;
|
323 |
LOGDEB1("Db::getFirstMatchPage\n");;
|
323 |
if (!m_nq) {
|
324 |
if (!m_nq) {
|
324 |
LOGERR("Query::getFirstMatchPage: no nq\n" );
|
325 |
LOGERR("Query::getFirstMatchPage: no nq\n");
|
325 |
return false;
|
326 |
return false;
|
326 |
}
|
327 |
}
|
327 |
int pagenum = -1;
|
328 |
int pagenum = -1;
|
328 |
XAPTRY(pagenum = m_nq->getFirstMatchPage(Xapian::docid(doc.xdocid), term),
|
329 |
XAPTRY(pagenum = m_nq->getFirstMatchPage(Xapian::docid(doc.xdocid), term),
|
329 |
m_db->m_ndb->xrdb, m_reason);
|
330 |
m_db->m_ndb->xrdb, m_reason);
|
330 |
return m_reason.empty() ? pagenum : -1;
|
331 |
return m_reason.empty() ? pagenum : -1;
|
331 |
}
|
332 |
}
|
332 |
|
333 |
|
333 |
|
334 |
|
334 |
// Mset size
|
335 |
// Mset size
|
|
... |
|
... |
337 |
// Get estimated result count for query. Xapian actually does most of
|
338 |
// Get estimated result count for query. Xapian actually does most of
|
338 |
// the search job in there, this can be long
|
339 |
// the search job in there, this can be long
|
339 |
int Query::getResCnt()
|
340 |
int Query::getResCnt()
|
340 |
{
|
341 |
{
|
341 |
if (ISNULL(m_nq) || !m_nq->xenquire) {
|
342 |
if (ISNULL(m_nq) || !m_nq->xenquire) {
|
342 |
LOGERR("Query::getResCnt: no query opened\n" );
|
343 |
LOGERR("Query::getResCnt: no query opened\n");
|
343 |
return -1;
|
344 |
return -1;
|
344 |
}
|
345 |
}
|
345 |
if (m_resCnt >= 0)
|
346 |
if (m_resCnt >= 0)
|
346 |
return m_resCnt;
|
347 |
return m_resCnt;
|
347 |
|
348 |
|
348 |
m_resCnt = -1;
|
349 |
m_resCnt = -1;
|
349 |
if (m_nq->xmset.size() <= 0) {
|
350 |
if (m_nq->xmset.size() <= 0) {
|
350 |
Chrono chron;
|
351 |
Chrono chron;
|
351 |
|
352 |
|
352 |
XAPTRY(m_nq->xmset =
|
353 |
XAPTRY(m_nq->xmset =
|
353 |
m_nq->xenquire->get_mset(0, qquantum, 1000);
|
354 |
m_nq->xenquire->get_mset(0, qquantum, 1000);
|
354 |
m_resCnt = m_nq->xmset.get_matches_lower_bound(),
|
355 |
m_resCnt = m_nq->xmset.get_matches_lower_bound(),
|
355 |
m_db->m_ndb->xrdb, m_reason);
|
356 |
m_db->m_ndb->xrdb, m_reason);
|
356 |
|
357 |
|
357 |
LOGDEB("Query::getResCnt: " << (m_resCnt) << " " << (chron.millis()) << " mS\n" );
|
358 |
LOGDEB("Query::getResCnt: "<<m_resCnt<<" "<< chron.millis() << " mS\n");
|
358 |
if (!m_reason.empty())
|
359 |
if (!m_reason.empty())
|
359 |
LOGERR("xenquire->get_mset: exception: " << (m_reason) << "\n" );
|
360 |
LOGERR("xenquire->get_mset: exception: " << m_reason << "\n");
|
360 |
} else {
|
361 |
} else {
|
361 |
m_resCnt = m_nq->xmset.get_matches_lower_bound();
|
362 |
m_resCnt = m_nq->xmset.get_matches_lower_bound();
|
362 |
}
|
363 |
}
|
363 |
return m_resCnt;
|
364 |
return m_resCnt;
|
364 |
}
|
365 |
}
|
|
... |
|
... |
371 |
// Note that as stated by a Xapian developer, Enquire searches from
|
372 |
// Note that as stated by a Xapian developer, Enquire searches from
|
372 |
// scratch each time get_mset() is called. So the better performance
|
373 |
// scratch each time get_mset() is called. So the better performance
|
373 |
// on subsequent calls is probably only due to disk caching.
|
374 |
// on subsequent calls is probably only due to disk caching.
|
374 |
bool Query::getDoc(int xapi, Doc &doc)
|
375 |
bool Query::getDoc(int xapi, Doc &doc)
|
375 |
{
|
376 |
{
|
376 |
LOGDEB1("Query::getDoc: xapian enquire index " << (xapi) << "\n" );
|
377 |
LOGDEB1("Query::getDoc: xapian enquire index " << xapi << "\n");
|
377 |
if (ISNULL(m_nq) || !m_nq->xenquire) {
|
378 |
if (ISNULL(m_nq) || !m_nq->xenquire) {
|
378 |
LOGERR("Query::getDoc: no query opened\n" );
|
379 |
LOGERR("Query::getDoc: no query opened\n");
|
379 |
return false;
|
380 |
return false;
|
380 |
}
|
381 |
}
|
381 |
|
382 |
|
382 |
int first = m_nq->xmset.get_firstitem();
|
383 |
int first = m_nq->xmset.get_firstitem();
|
383 |
int last = first + m_nq->xmset.size() -1;
|
384 |
int last = first + m_nq->xmset.size() -1;
|
384 |
|
385 |
|
385 |
if (!(xapi >= first && xapi <= last)) {
|
386 |
if (!(xapi >= first && xapi <= last)) {
|
386 |
LOGDEB("Fetching for first " << (xapi) << ", count " << (qquantum) << "\n" );
|
387 |
LOGDEB("Fetching for first " << xapi << ", count " << qquantum << "\n");
|
387 |
|
388 |
|
388 |
XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,
|
389 |
XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,
|
389 |
(const Xapian::RSet *)0),
|
390 |
(const Xapian::RSet *)0),
|
390 |
m_db->m_ndb->xrdb, m_reason);
|
391 |
m_db->m_ndb->xrdb, m_reason);
|
391 |
|
392 |
|
392 |
if (!m_reason.empty()) {
|
393 |
if (!m_reason.empty()) {
|
393 |
LOGERR("enquire->get_mset: exception: " << (m_reason) << "\n" );
|
394 |
LOGERR("enquire->get_mset: exception: " << m_reason << "\n");
|
394 |
return false;
|
395 |
return false;
|
395 |
}
|
396 |
}
|
396 |
if (m_nq->xmset.empty()) {
|
397 |
if (m_nq->xmset.empty()) {
|
397 |
LOGDEB("enquire->get_mset: got empty result\n" );
|
398 |
LOGDEB("enquire->get_mset: got empty result\n");
|
398 |
return false;
|
399 |
return false;
|
399 |
}
|
400 |
}
|
400 |
first = m_nq->xmset.get_firstitem();
|
401 |
first = m_nq->xmset.get_firstitem();
|
401 |
last = first + m_nq->xmset.size() -1;
|
402 |
last = first + m_nq->xmset.size() -1;
|
402 |
}
|
403 |
}
|
403 |
|
404 |
|
404 |
Xapian::Document xdoc;
|
405 |
Xapian::Document xdoc;
|
405 |
Xapian::docid docid = 0;
|
406 |
Xapian::docid docid = 0;
|
406 |
int pc = 0;
|
407 |
int pc = 0;
|
|
... |
|
... |
409 |
string udi;
|
410 |
string udi;
|
410 |
m_reason.erase();
|
411 |
m_reason.erase();
|
411 |
for (int xaptries=0; xaptries < 2; xaptries++) {
|
412 |
for (int xaptries=0; xaptries < 2; xaptries++) {
|
412 |
try {
|
413 |
try {
|
413 |
xdoc = m_nq->xmset[xapi-first].get_document();
|
414 |
xdoc = m_nq->xmset[xapi-first].get_document();
|
414 |
collapsecount = m_nq->xmset[xapi-first].get_collapse_count();
|
415 |
collapsecount = m_nq->xmset[xapi-first].get_collapse_count();
|
415 |
docid = *(m_nq->xmset[xapi-first]);
|
416 |
docid = *(m_nq->xmset[xapi-first]);
|
416 |
pc = m_nq->xmset.convert_to_percent(m_nq->xmset[xapi-first]);
|
417 |
pc = m_nq->xmset.convert_to_percent(m_nq->xmset[xapi-first]);
|
417 |
data = xdoc.get_data();
|
418 |
data = xdoc.get_data();
|
418 |
m_reason.erase();
|
419 |
m_reason.erase();
|
419 |
Chrono chron;
|
420 |
Chrono chron;
|
420 |
m_db->m_ndb->xdocToUdi(xdoc, udi);
|
421 |
m_db->m_ndb->xdocToUdi(xdoc, udi);
|
421 |
LOGDEB2("Query::getDoc: " << (chron.millis()) << " ms for udi [" << (udi) << "], collapse count " << (collapsecount) << "\n" );
|
422 |
LOGDEB2("Query::getDoc: " << chron.millis() << " ms for udi [" <<
|
|
|
423 |
udi << "], collapse count " << collapsecount << "\n");
|
422 |
break;
|
424 |
break;
|
423 |
} catch (Xapian::DatabaseModifiedError &error) {
|
425 |
} catch (Xapian::DatabaseModifiedError &error) {
|
424 |
// retry or end of loop
|
426 |
// retry or end of loop
|
425 |
m_reason = error.get_msg();
|
427 |
m_reason = error.get_msg();
|
426 |
continue;
|
428 |
continue;
|
427 |
}
|
429 |
}
|
428 |
XCATCHERROR(m_reason);
|
430 |
XCATCHERROR(m_reason);
|
429 |
break;
|
431 |
break;
|
430 |
}
|
432 |
}
|
431 |
if (!m_reason.empty()) {
|
433 |
if (!m_reason.empty()) {
|
432 |
LOGERR("Query::getDoc: " << (m_reason) << "\n" );
|
434 |
LOGERR("Query::getDoc: " << m_reason << "\n");
|
433 |
return false;
|
435 |
return false;
|
434 |
}
|
436 |
}
|
435 |
doc.meta[Rcl::Doc::keyudi] = udi;
|
437 |
doc.meta[Rcl::Doc::keyudi] = udi;
|
436 |
|
438 |
|
437 |
doc.pc = pc;
|
439 |
doc.pc = pc;
|
438 |
char buf[200];
|
440 |
char buf[200];
|
439 |
if (collapsecount > 0) {
|
441 |
if (collapsecount > 0) {
|
440 |
sprintf(buf,"%3d%% (%d)", pc, collapsecount + 1);
|
442 |
sprintf(buf,"%3d%% (%d)", pc, collapsecount + 1);
|
441 |
} else {
|
443 |
} else {
|
442 |
sprintf(buf,"%3d%%", pc);
|
444 |
sprintf(buf,"%3d%%", pc);
|
443 |
}
|
445 |
}
|
444 |
doc.meta[Doc::keyrr] = buf;
|
446 |
doc.meta[Doc::keyrr] = buf;
|
445 |
|
447 |
|
446 |
if (collapsecount > 0) {
|
448 |
if (collapsecount > 0) {
|
447 |
sprintf(buf, "%d", collapsecount);
|
449 |
sprintf(buf, "%d", collapsecount);
|
448 |
doc.meta[Rcl::Doc::keycc] = buf;
|
450 |
doc.meta[Rcl::Doc::keycc] = buf;
|
449 |
}
|
451 |
}
|
450 |
|
452 |
|
451 |
// Parse xapian document's data and populate doc fields
|
453 |
// Parse xapian document's data and populate doc fields
|
452 |
return m_db->m_ndb->dbDataToRclDoc(docid, data, doc);
|
454 |
return m_db->m_ndb->dbDataToRclDoc(docid, data, doc);
|
453 |
}
|
455 |
}
|
454 |
|
456 |
|
455 |
vector<string> Query::expand(const Doc &doc)
|
457 |
vector<string> Query::expand(const Doc &doc)
|
456 |
{
|
458 |
{
|
457 |
LOGDEB("Rcl::Query::expand()\n" );
|
459 |
LOGDEB("Rcl::Query::expand()\n");
|
458 |
vector<string> res;
|
460 |
vector<string> res;
|
459 |
if (ISNULL(m_nq) || !m_nq->xenquire) {
|
461 |
if (ISNULL(m_nq) || !m_nq->xenquire) {
|
460 |
LOGERR("Query::expand: no query opened\n" );
|
462 |
LOGERR("Query::expand: no query opened\n");
|
461 |
return res;
|
463 |
return res;
|
462 |
}
|
464 |
}
|
463 |
|
465 |
|
464 |
for (int tries = 0; tries < 2; tries++) {
|
466 |
for (int tries = 0; tries < 2; tries++) {
|
465 |
try {
|
467 |
try {
|
466 |
Xapian::RSet rset;
|
468 |
Xapian::RSet rset;
|
467 |
rset.add_document(Xapian::docid(doc.xdocid));
|
469 |
rset.add_document(Xapian::docid(doc.xdocid));
|
468 |
// We don't exclude the original query terms.
|
470 |
// We don't exclude the original query terms.
|
469 |
Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false);
|
471 |
Xapian::ESet eset = m_nq->xenquire->get_eset(20, rset, false);
|
470 |
LOGDEB("ESet terms:\n" );
|
472 |
LOGDEB("ESet terms:\n");
|
471 |
// We filter out the special terms
|
473 |
// We filter out the special terms
|
472 |
for (Xapian::ESetIterator it = eset.begin();
|
474 |
for (Xapian::ESetIterator it = eset.begin();
|
473 |
it != eset.end(); it++) {
|
475 |
it != eset.end(); it++) {
|
474 |
LOGDEB(" [" << ((*it)) << "]\n" );
|
476 |
LOGDEB(" [" << (*it) << "]\n");
|
475 |
if ((*it).empty() || has_prefix(*it))
|
477 |
if ((*it).empty() || has_prefix(*it))
|
476 |
continue;
|
478 |
continue;
|
477 |
res.push_back(*it);
|
479 |
res.push_back(*it);
|
478 |
if (res.size() >= 10)
|
480 |
if (res.size() >= 10)
|
479 |
break;
|
481 |
break;
|
480 |
}
|
482 |
}
|
481 |
m_reason.erase();
|
483 |
m_reason.erase();
|
482 |
break;
|
484 |
break;
|
483 |
} catch (const Xapian::DatabaseModifiedError &e) {
|
485 |
} catch (const Xapian::DatabaseModifiedError &e) {
|
484 |
m_reason = e.get_msg();
|
486 |
m_reason = e.get_msg();
|
485 |
m_db->m_ndb->xrdb.reopen();
|
487 |
m_db->m_ndb->xrdb.reopen();
|
486 |
continue;
|
488 |
continue;
|
487 |
} XCATCHERROR(m_reason);
|
489 |
} XCATCHERROR(m_reason);
|
488 |
break;
|
490 |
break;
|
489 |
}
|
491 |
}
|
490 |
|
492 |
|
491 |
if (!m_reason.empty()) {
|
493 |
if (!m_reason.empty()) {
|
492 |
LOGERR("Query::expand: xapian error " << (m_reason) << "\n" );
|
494 |
LOGERR("Query::expand: xapian error " << m_reason << "\n");
|
493 |
res.clear();
|
495 |
res.clear();
|
494 |
}
|
496 |
}
|
495 |
|
497 |
|
496 |
return res;
|
498 |
return res;
|
497 |
}
|
499 |
}
|