|
a/src/rcldb/rclabstract.cpp |
|
b/src/rcldb/rclabstract.cpp |
|
... |
|
... |
58 |
static void listList(const string&, const vector<string>&)
|
58 |
static void listList(const string&, const vector<string>&)
|
59 |
{
|
59 |
{
|
60 |
}
|
60 |
}
|
61 |
#endif
|
61 |
#endif
|
62 |
|
62 |
|
63 |
// Unprefix terms.
|
63 |
// Unprefix terms. Actually it's not completely clear if we should
|
|
|
64 |
// remove prefixes and keep all terms or prune the prefixed
|
|
|
65 |
// ones. There is no good way to be sure what will provide the best
|
|
|
66 |
// result in general.
|
|
|
67 |
static const bool prune_prefixed_terms = true;
|
64 |
static void noPrefixList(const vector<string>& in, vector<string>& out)
|
68 |
static void noPrefixList(const vector<string>& in, vector<string>& out)
|
65 |
{
|
69 |
{
|
66 |
for (vector<string>::const_iterator qit = in.begin();
|
70 |
for (vector<string>::const_iterator qit = in.begin();
|
67 |
qit != in.end(); qit++) {
|
71 |
qit != in.end(); qit++) {
|
|
|
72 |
if (prune_prefixed_terms) {
|
|
|
73 |
if (has_prefix(*qit))
|
|
|
74 |
continue;
|
|
|
75 |
}
|
68 |
out.push_back(strip_prefix(*qit));
|
76 |
out.push_back(strip_prefix(*qit));
|
69 |
}
|
77 |
}
|
70 |
sort(out.begin(), out.end());
|
78 |
sort(out.begin(), out.end());
|
71 |
vector<string>::iterator it = unique(out.begin(), out.end());
|
79 |
vector<string>::iterator it = unique(out.begin(), out.end());
|
72 |
out.resize(it - out.begin());
|
80 |
out.resize(it - out.begin());
|
|
... |
|
... |
80 |
}
|
88 |
}
|
81 |
|
89 |
|
82 |
terms.clear();
|
90 |
terms.clear();
|
83 |
Xapian::TermIterator it;
|
91 |
Xapian::TermIterator it;
|
84 |
Xapian::docid id = Xapian::docid(xdocid);
|
92 |
Xapian::docid id = Xapian::docid(xdocid);
|
85 |
|
93 |
vector<string> iterms;
|
86 |
XAPTRY(terms.insert(terms.begin(),
|
94 |
XAPTRY(iterms.insert(iterms.begin(),
|
87 |
xenquire->get_matching_terms_begin(id),
|
95 |
xenquire->get_matching_terms_begin(id),
|
88 |
xenquire->get_matching_terms_end(id)),
|
96 |
xenquire->get_matching_terms_end(id)),
|
89 |
m_q->m_db->m_ndb->xrdb, m_q->m_reason);
|
97 |
m_q->m_db->m_ndb->xrdb, m_q->m_reason);
|
90 |
|
|
|
91 |
if (!m_q->m_reason.empty()) {
|
98 |
if (!m_q->m_reason.empty()) {
|
92 |
LOGERR(("getMatchTerms: xapian error: %s\n", m_q->m_reason.c_str()));
|
99 |
LOGERR(("getMatchTerms: xapian error: %s\n", m_q->m_reason.c_str()));
|
93 |
return false;
|
100 |
return false;
|
94 |
}
|
101 |
}
|
95 |
|
102 |
noPrefixList(iterms, terms);
|
96 |
return true;
|
103 |
return true;
|
97 |
}
|
104 |
}
|
98 |
|
105 |
|
99 |
// Retrieve db-wide frequencies for the query terms and store them in
|
106 |
// Retrieve db-wide frequencies for the query terms and store them in
|
100 |
// the query object. This is done at most once for a query, and the data is used
|
107 |
// the query object. This is done at most once for a query, and the data is used
|
|
... |
|
... |
257 |
}
|
264 |
}
|
258 |
Rcl::Db::Native *ndb(m_q->m_db->m_ndb);
|
265 |
Rcl::Db::Native *ndb(m_q->m_db->m_ndb);
|
259 |
Xapian::Database& xrdb(ndb->xrdb);
|
266 |
Xapian::Database& xrdb(ndb->xrdb);
|
260 |
|
267 |
|
261 |
vector<string> terms;
|
268 |
vector<string> terms;
|
262 |
{
|
|
|
263 |
vector<string> iterms;
|
|
|
264 |
getMatchTerms(docid, iterms);
|
269 |
getMatchTerms(docid, terms);
|
265 |
noPrefixList(iterms, terms);
|
270 |
|
266 |
}
|
|
|
267 |
if (terms.empty()) {
|
271 |
if (terms.empty()) {
|
268 |
LOGDEB(("getFirstMatchPage: empty match term list (field match?)\n"));
|
272 |
LOGDEB(("getFirstMatchPage: empty match term list (field match?)\n"));
|
269 |
return -1;
|
273 |
return -1;
|
270 |
}
|
274 |
}
|
271 |
|
275 |
|
|
... |
|
... |
317 |
LOGABS(("makeAbstract: docid %ld imaxoccs %d ictxwords %d\n",
|
321 |
LOGABS(("makeAbstract: docid %ld imaxoccs %d ictxwords %d\n",
|
318 |
long(docid), imaxoccs, ictxwords));
|
322 |
long(docid), imaxoccs, ictxwords));
|
319 |
|
323 |
|
320 |
// The (unprefixed) terms matched by this document
|
324 |
// The (unprefixed) terms matched by this document
|
321 |
vector<string> matchedTerms;
|
325 |
vector<string> matchedTerms;
|
322 |
{
|
|
|
323 |
vector<string> iterms;
|
|
|
324 |
getMatchTerms(docid, iterms);
|
326 |
getMatchTerms(docid, matchedTerms);
|
325 |
noPrefixList(iterms, matchedTerms);
|
|
|
326 |
if (matchedTerms.empty()) {
|
327 |
if (matchedTerms.empty()) {
|
327 |
LOGDEB(("makeAbstract::Empty term list\n"));
|
328 |
LOGDEB(("makeAbstract::Empty term list\n"));
|
328 |
return ABSRES_ERROR;
|
329 |
return ABSRES_ERROR;
|
329 |
}
|
330 |
}
|
330 |
}
|
331 |
|
331 |
listList("Match terms: ", matchedTerms);
|
332 |
listList("Match terms: ", matchedTerms);
|
332 |
|
333 |
|
333 |
// Retrieve the term frequencies for the query terms. This is
|
334 |
// Retrieve the term frequencies for the query terms. This is
|
334 |
// actually computed only once for a query, and for all terms in
|
335 |
// actually computed only once for a query, and for all terms in
|
335 |
// the query (not only the matches for this doc)
|
336 |
// the query (not only the matches for this doc)
|