|
a/src/rcldb/rclabstract.cpp |
|
b/src/rcldb/rclabstract.cpp |
|
... |
|
... |
328 |
return ABSRES_ERROR;
|
328 |
return ABSRES_ERROR;
|
329 |
}
|
329 |
}
|
330 |
}
|
330 |
}
|
331 |
listList("Match terms: ", matchedTerms);
|
331 |
listList("Match terms: ", matchedTerms);
|
332 |
|
332 |
|
333 |
// Retrieve the term freqencies for the query terms. This is
|
333 |
// Retrieve the term frequencies for the query terms. This is
|
334 |
// actually computed only once for a query, and for all terms in
|
334 |
// actually computed only once for a query, and for all terms in
|
335 |
// the query (not only the matches for this doc)
|
335 |
// the query (not only the matches for this doc)
|
336 |
setDbWideQTermsFreqs();
|
336 |
setDbWideQTermsFreqs();
|
337 |
|
337 |
|
338 |
// Build a sorted by quality container for the match terms We are
|
338 |
// Build a sorted by quality container for the match terms We are
|
339 |
// going to try and show text around the less common search terms.
|
339 |
// going to try and show text around the less common search terms.
|
340 |
// TOBEDONE: terms issued from an original one by stem expansion
|
340 |
// Terms issued from an original one by stem expansion are
|
341 |
// should be somehow aggregated here, else, it may happen that
|
341 |
// aggregated by the qualityTerms() routine.
|
342 |
// such a group prevents displaying matches for other terms (by
|
|
|
343 |
// removing its meaning from the maximum occurrences per term test
|
|
|
344 |
// used while walking the list below)
|
|
|
345 |
multimap<double, vector<string> > byQ;
|
342 |
multimap<double, vector<string> > byQ;
|
346 |
double totalweight = qualityTerms(docid, matchedTerms, byQ);
|
343 |
double totalweight = qualityTerms(docid, matchedTerms, byQ);
|
347 |
LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms()));
|
344 |
LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms()));
|
348 |
// This can't happen, but would crash us
|
345 |
// This can't happen, but would crash us
|
349 |
if (totalweight == 0.0) {
|
346 |
if (totalweight == 0.0) {
|
|
... |
|
... |
428 |
for (pos = xrdb.positionlist_begin(docid, qterm);
|
425 |
for (pos = xrdb.positionlist_begin(docid, qterm);
|
429 |
pos != xrdb.positionlist_end(docid, qterm); pos++) {
|
426 |
pos != xrdb.positionlist_end(docid, qterm); pos++) {
|
430 |
int ipos = *pos;
|
427 |
int ipos = *pos;
|
431 |
if (ipos < int(baseTextPosition)) // Not in text body
|
428 |
if (ipos < int(baseTextPosition)) // Not in text body
|
432 |
continue;
|
429 |
continue;
|
433 |
LOGABS(("makeAbstract: [%s] at pos %d grpoccs %d maxgrpoccs %d\n",
|
430 |
LOGABS(("makeAbstract: [%s] at pos %d grpoccs %d maxgrpoccs"
|
434 |
qterm.c_str(), ipos, grpoccs, maxgrpoccs));
|
431 |
" %d\n", qterm.c_str(), ipos, grpoccs, maxgrpoccs));
|
435 |
|
432 |
|
436 |
totaloccs++;
|
433 |
totaloccs++;
|
437 |
grpoccs++;
|
434 |
grpoccs++;
|
438 |
|
435 |
|
439 |
// Add adjacent slots to the set to populate at next
|
436 |
// Add adjacent slots to the set to populate at next
|
440 |
// step by inserting empty strings. Special provisions
|
437 |
// step by inserting empty strings. Special provisions
|
441 |
// for adding ellipsis and for positions overlapped by
|
438 |
// for adding ellipsis and for positions overlapped by
|
442 |
// the match term.
|
439 |
// the match term.
|
443 |
unsigned int sta = MAX(0, ipos - ctxwords);
|
440 |
unsigned int sta = MAX(int(baseTextPosition),
|
|
|
441 |
ipos - ctxwords);
|
444 |
unsigned int sto = ipos + qtrmwrdcnt-1 +
|
442 |
unsigned int sto = ipos + qtrmwrdcnt-1 +
|
445 |
m_q->m_db->getAbsCtxLen();
|
443 |
m_q->m_db->getAbsCtxLen();
|
446 |
for (unsigned int ii = sta; ii <= sto; ii++) {
|
444 |
for (unsigned int ii = sta; ii <= sto; ii++) {
|
447 |
if (ii == (unsigned int)ipos) {
|
445 |
if (ii == (unsigned int)ipos) {
|
448 |
sparseDoc[ii] = qterm;
|
446 |
sparseDoc[ii] = qterm;
|
|
... |
|
... |
453 |
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
451 |
ii < (unsigned int)ipos + qtrmwrdcnt) {
|
454 |
sparseDoc[ii] = occupiedmarker;
|
452 |
sparseDoc[ii] = occupiedmarker;
|
455 |
} else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
|
453 |
} else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
|
456 |
// For an empty slot, the test has a side
|
454 |
// For an empty slot, the test has a side
|
457 |
// effect of inserting an empty string which
|
455 |
// effect of inserting an empty string which
|
458 |
// is what we want
|
456 |
// is what we want.
|
459 |
sparseDoc[ii] = emptys;
|
457 |
sparseDoc[ii] = emptys;
|
460 |
}
|
458 |
}
|
461 |
}
|
459 |
}
|
462 |
// Add ellipsis at the end. This may be replaced later by
|
460 |
// Add ellipsis at the end. This may be replaced later by
|
463 |
// an overlapping extract. Take care not to replace an
|
461 |
// an overlapping extract. Take care not to replace an
|
|
... |
|
... |
524 |
for (pos = xrdb.positionlist_begin(docid, *term);
|
522 |
for (pos = xrdb.positionlist_begin(docid, *term);
|
525 |
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
523 |
pos != xrdb.positionlist_end(docid, *term); pos++) {
|
526 |
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
524 |
if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
|
527 |
ret = ABSRES_TERMMISS;
|
525 |
ret = ABSRES_TERMMISS;
|
528 |
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
526 |
LOGDEB0(("makeAbstract: max term count cutoff %d\n",
|
529 |
m_q->m_snipMaxPosWalk));
|
527 |
m_q->m_snipMaxPosWalk));
|
530 |
break;
|
528 |
break;
|
531 |
}
|
529 |
}
|
532 |
// If we are beyond the max possible position, stop
|
530 |
// If we are beyond the max possible position, stop
|
533 |
// for this term
|
531 |
// for this term
|
534 |
if (*pos > maxpos) {
|
532 |
if (*pos > maxpos) {
|
|
... |
|
... |
578 |
bool incjk = false;
|
576 |
bool incjk = false;
|
579 |
int page = 0;
|
577 |
int page = 0;
|
580 |
string term;
|
578 |
string term;
|
581 |
for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
|
579 |
for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
|
582 |
it != sparseDoc.end(); it++) {
|
580 |
it != sparseDoc.end(); it++) {
|
583 |
LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
|
581 |
LOGDEB2(("Abtract:output %u -> [%s]\n", it->first, it->second.c_str()));
|
584 |
if (!occupiedmarker.compare(it->second))
|
582 |
if (!occupiedmarker.compare(it->second)) {
|
|
|
583 |
LOGDEB(("Abstract: qtrm position not filled ??\n"));
|
585 |
continue;
|
584 |
continue;
|
|
|
585 |
}
|
586 |
if (chunk.empty() && !vpbreaks.empty()) {
|
586 |
if (chunk.empty() && !vpbreaks.empty()) {
|
587 |
page = ndb->getPageNumberForPosition(vpbreaks, it->first);
|
587 |
page = ndb->getPageNumberForPosition(vpbreaks, it->first);
|
588 |
if (page < 0)
|
588 |
if (page < 0)
|
589 |
page = 0;
|
589 |
page = 0;
|
590 |
term.clear();
|
590 |
term.clear();
|