recoll / Code / Diff of /src/rcldb/rclabstract.cpp

Diff of /src/rcldb/rclabstract.cpp [e5543b] .. [3c8c89]

Switch to unified view


...
            return ABSRES_ERROR;
        }
    }
    listList("Match terms: ", matchedTerms);

    // Retrieve the term frequencies for the query terms. This is
    // actually computed only once for a query, and for all terms in
    // the query (not only the matches for this doc)
    setDbWideQTermsFreqs();

    // Build a sorted by quality container for the match terms We are
    // going to try and show text around the less common search terms.
    // Terms issued from an original one by stem expansion are
    // aggregated by the qualityTerms() routine.



    multimap<double, vector<string> > byQ;
    double totalweight = qualityTerms(docid, matchedTerms, byQ);
    LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms()));
    // This can't happen, but would crash us
    if (totalweight == 0.0) {
...
        for (pos = xrdb.positionlist_begin(docid, qterm); 
             pos != xrdb.positionlist_end(docid, qterm); pos++) {
            int ipos = *pos;
            if (ipos < int(baseTextPosition)) // Not in text body
            continue;
            LOGABS(("makeAbstract: [%s] at pos %d grpoccs %d maxgrpoccs"
                " %d\n", qterm.c_str(), ipos, grpoccs, maxgrpoccs));

            totaloccs++;
            grpoccs++;

            // Add adjacent slots to the set to populate at next
            // step by inserting empty strings. Special provisions
            // for adding ellipsis and for positions overlapped by
            // the match term.
          unsigned int sta = MAX(int(baseTextPosition), 
                     ipos - ctxwords);
            unsigned int sto = ipos + qtrmwrdcnt-1 + 
            m_q->m_db->getAbsCtxLen();
            for (unsigned int ii = sta; ii <= sto;  ii++) {
            if (ii == (unsigned int)ipos) {
                sparseDoc[ii] = qterm;
...
                   ii < (unsigned int)ipos + qtrmwrdcnt) {
                sparseDoc[ii] = occupiedmarker;
            } else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
                // For an empty slot, the test has a side
                // effect of inserting an empty string which
                // is what we want.
                sparseDoc[ii] = emptys;
            }
            }
            // Add ellipsis at the end. This may be replaced later by
            // an overlapping extract. Take care not to replace an
...
        for (pos = xrdb.positionlist_begin(docid, *term); 
         pos != xrdb.positionlist_end(docid, *term); pos++) {
        if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
            ret = ABSRES_TERMMISS;
            LOGDEB0(("makeAbstract: max term count cutoff %d\n", 
                m_q->m_snipMaxPosWalk));
            break;
        }
        // If we are beyond the max possible position, stop
        // for this term
        if (*pos > maxpos) {
...
    bool incjk = false;
    int page = 0;
    string term;
    for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
     it != sparseDoc.end(); it++) {
    LOGDEB2(("Abtract:output %u -> [%s]\n", it->first, it->second.c_str()));
    if (!occupiedmarker.compare(it->second)) {
      LOGDEB(("Abstract: qtrm position not filled ??\n"));
        continue;
  }
    if (chunk.empty() && !vpbreaks.empty()) {
        page =  ndb->getPageNumberForPosition(vpbreaks, it->first);
        if (page < 0) 
        page = 0;
        term.clear();

	a/src/rcldb/rclabstract.cpp		b/src/rcldb/rclabstract.cpp
	...		...
328	return ABSRES_ERROR;	328	return ABSRES_ERROR;
329	}	329	}
330	}	330	}
331	listList("Match terms: ", matchedTerms);	331	listList("Match terms: ", matchedTerms);
332		332
333	// Retrieve the term freqencies for the query terms. This is	333	// Retrieve the term frequencies for the query terms. This is
334	// actually computed only once for a query, and for all terms in	334	// actually computed only once for a query, and for all terms in
335	// the query (not only the matches for this doc)	335	// the query (not only the matches for this doc)
336	setDbWideQTermsFreqs();	336	setDbWideQTermsFreqs();
337		337
338	// Build a sorted by quality container for the match terms We are	338	// Build a sorted by quality container for the match terms We are
339	// going to try and show text around the less common search terms.	339	// going to try and show text around the less common search terms.
340	// TOBEDONE: terms issued from an original one by stem expansion	340	// Terms issued from an original one by stem expansion are
341	// should be somehow aggregated here, else, it may happen that	341	// aggregated by the qualityTerms() routine.
342	// such a group prevents displaying matches for other terms (by
343	// removing its meaning from the maximum occurrences per term test
344	// used while walking the list below)
345	multimap<double, vector<string> > byQ;	342	multimap<double, vector<string> > byQ;
346	double totalweight = qualityTerms(docid, matchedTerms, byQ);	343	double totalweight = qualityTerms(docid, matchedTerms, byQ);
347	LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms()));	344	LOGABS(("makeAbstract:%d: computed Qcoefs.\n", chron.ms()));
348	// This can't happen, but would crash us	345	// This can't happen, but would crash us
349	if (totalweight == 0.0) {	346	if (totalweight == 0.0) {
	...		...
428	for (pos = xrdb.positionlist_begin(docid, qterm);	425	for (pos = xrdb.positionlist_begin(docid, qterm);
429	pos != xrdb.positionlist_end(docid, qterm); pos++) {	426	pos != xrdb.positionlist_end(docid, qterm); pos++) {
430	int ipos = *pos;	427	int ipos = *pos;
431	if (ipos < int(baseTextPosition)) // Not in text body	428	if (ipos < int(baseTextPosition)) // Not in text body
432	continue;	429	continue;
433	LOGABS(("makeAbstract: [%s] at pos %d grpoccs %d maxgrpoccs %d\n",	430	LOGABS(("makeAbstract: [%s] at pos %d grpoccs %d maxgrpoccs"
434	qterm.c_str(), ipos, grpoccs, maxgrpoccs));	431	" %d\n", qterm.c_str(), ipos, grpoccs, maxgrpoccs));
435		432
436	totaloccs++;	433	totaloccs++;
437	grpoccs++;	434	grpoccs++;
438		435
439	// Add adjacent slots to the set to populate at next	436	// Add adjacent slots to the set to populate at next
440	// step by inserting empty strings. Special provisions	437	// step by inserting empty strings. Special provisions
441	// for adding ellipsis and for positions overlapped by	438	// for adding ellipsis and for positions overlapped by
442	// the match term.	439	// the match term.
443	unsigned int sta = MAX(0, ipos - ctxwords);	440	unsigned int sta = MAX(int(baseTextPosition),
		441	ipos - ctxwords);
444	unsigned int sto = ipos + qtrmwrdcnt-1 +	442	unsigned int sto = ipos + qtrmwrdcnt-1 +
445	m_q->m_db->getAbsCtxLen();	443	m_q->m_db->getAbsCtxLen();
446	for (unsigned int ii = sta; ii <= sto; ii++) {	444	for (unsigned int ii = sta; ii <= sto; ii++) {
447	if (ii == (unsigned int)ipos) {	445	if (ii == (unsigned int)ipos) {
448	sparseDoc[ii] = qterm;	446	sparseDoc[ii] = qterm;
	...		...
453	ii < (unsigned int)ipos + qtrmwrdcnt) {	451	ii < (unsigned int)ipos + qtrmwrdcnt) {
454	sparseDoc[ii] = occupiedmarker;	452	sparseDoc[ii] = occupiedmarker;
455	} else if (!sparseDoc[ii].compare(cstr_ellipsis)) {	453	} else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
456	// For an empty slot, the test has a side	454	// For an empty slot, the test has a side
457	// effect of inserting an empty string which	455	// effect of inserting an empty string which
458	// is what we want	456	// is what we want.
459	sparseDoc[ii] = emptys;	457	sparseDoc[ii] = emptys;
460	}	458	}
461	}	459	}
462	// Add ellipsis at the end. This may be replaced later by	460	// Add ellipsis at the end. This may be replaced later by
463	// an overlapping extract. Take care not to replace an	461	// an overlapping extract. Take care not to replace an
	...		...
524	for (pos = xrdb.positionlist_begin(docid, *term);	522	for (pos = xrdb.positionlist_begin(docid, *term);
525	pos != xrdb.positionlist_end(docid, *term); pos++) {	523	pos != xrdb.positionlist_end(docid, *term); pos++) {
526	if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {	524	if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
527	ret = ABSRES_TERMMISS;	525	ret = ABSRES_TERMMISS;
528	LOGDEB0(("makeAbstract: max term count cutoff %d\n",	526	LOGDEB0(("makeAbstract: max term count cutoff %d\n",
529	m_q->m_snipMaxPosWalk));	527	m_q->m_snipMaxPosWalk));
530	break;	528	break;
531	}	529	}
532	// If we are beyond the max possible position, stop	530	// If we are beyond the max possible position, stop
533	// for this term	531	// for this term
534	if (*pos > maxpos) {	532	if (*pos > maxpos) {
	...		...
578	bool incjk = false;	576	bool incjk = false;
579	int page = 0;	577	int page = 0;
580	string term;	578	string term;
581	for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();	579	for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
582	it != sparseDoc.end(); it++) {	580	it != sparseDoc.end(); it++) {
583	LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));	581	LOGDEB2(("Abtract:output %u -> [%s]\n", it->first, it->second.c_str()));
584	if (!occupiedmarker.compare(it->second))	582	if (!occupiedmarker.compare(it->second)) {
		583	LOGDEB(("Abstract: qtrm position not filled ??\n"));
585	continue;	584	continue;
		585	}
586	if (chunk.empty() && !vpbreaks.empty()) {	586	if (chunk.empty() && !vpbreaks.empty()) {
587	page = ndb->getPageNumberForPosition(vpbreaks, it->first);	587	page = ndb->getPageNumberForPosition(vpbreaks, it->first);
588	if (page < 0)	588	if (page < 0)
589	page = 0;	589	page = 0;
590	term.clear();	590	term.clear();