Switch to unified view

a/src/rcldb/rclabstract.cpp b/src/rcldb/rclabstract.cpp
...
...
340
    map<unsigned int, string> sparseDoc;
340
    map<unsigned int, string> sparseDoc;
341
    // Also remember apart the search term positions so that we can list
341
    // Also remember apart the search term positions so that we can list
342
    // them with their snippets.
342
    // them with their snippets.
343
    unordered_set<unsigned int> searchTermPositions;
343
    unordered_set<unsigned int> searchTermPositions;
344
344
345
    // Remember max position. Used to stop walking positions lists while 
346
    // populating the adjacent slots.
347
    unsigned int maxpos = 0;
348
345
    // Total number of occurences for all terms. We stop when we have too much
349
    // Total number of occurences for all terms. We stop when we have too much
346
    unsigned int totaloccs = 0;
350
    unsigned int totaloccs = 0;
347
351
348
    // Total number of slots we populate. The 7 is taken as
352
    // Total number of slots we populate. The 7 is taken as
349
    // average word size. It was a mistake to have the user max
353
    // average word size. It was a mistake to have the user max
...
...
417
            m_q->m_db->getAbsCtxLen();
421
            m_q->m_db->getAbsCtxLen();
418
            for (unsigned int ii = sta; ii <= sto;  ii++) {
422
            for (unsigned int ii = sta; ii <= sto;  ii++) {
419
            if (ii == (unsigned int)ipos) {
423
            if (ii == (unsigned int)ipos) {
420
                sparseDoc[ii] = qterm;
424
                sparseDoc[ii] = qterm;
421
                searchTermPositions.insert(ii);
425
                searchTermPositions.insert(ii);
426
              if (ii > maxpos)
427
              maxpos = ii;
422
            } else if (ii > (unsigned int)ipos && 
428
            } else if (ii > (unsigned int)ipos && 
423
                   ii < (unsigned int)ipos + qtrmwrdcnt) {
429
                   ii < (unsigned int)ipos + qtrmwrdcnt) {
424
                sparseDoc[ii] = occupiedmarker;
430
                sparseDoc[ii] = occupiedmarker;
425
            } else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
431
            } else if (!sparseDoc[ii].compare(cstr_ellipsis)) {
426
                // For an empty slot, the test has a side
432
                // For an empty slot, the test has a side
...
...
458
        }
464
        }
459
    }
465
    }
460
    }
466
    }
461
    LOGABS(("makeAbstract:%d:chosen number of positions %d\n", 
467
    LOGABS(("makeAbstract:%d:chosen number of positions %d\n", 
462
        chron.millis(), totaloccs));
468
        chron.millis(), totaloccs));
469
    maxpos += ctxwords + 1;
463
470
464
    // This can happen if there are term occurences in the keywords
471
    // This can happen if there are term occurences in the keywords
465
    // etc. but not elsewhere ?
472
    // etc. but not elsewhere ?
466
    if (totaloccs == 0) {
473
    if (totaloccs == 0) {
467
    LOGDEB1(("makeAbstract: no occurrences\n"));
474
    LOGDEB1(("makeAbstract: no occurrences\n"));
...
...
470
477
471
    // Walk all document's terms position lists and populate slots
478
    // Walk all document's terms position lists and populate slots
472
    // around the query terms. We arbitrarily truncate the list to
479
    // around the query terms. We arbitrarily truncate the list to
473
    // avoid taking forever. If we do cutoff, the abstract may be
480
    // avoid taking forever. If we do cutoff, the abstract may be
474
    // inconsistant (missing words, potentially altering meaning),
481
    // inconsistant (missing words, potentially altering meaning),
475
    // which is bad.
482
    // which is bad. 
476
    { 
483
    { 
477
    Xapian::TermIterator term;
484
    Xapian::TermIterator term;
478
  int cutoff = 500 * 1000;
485
  int cutoff = m_q->m_snipMaxPosWalk;
479
480
    for (term = xrdb.termlist_begin(docid);
486
    for (term = xrdb.termlist_begin(docid);
481
         term != xrdb.termlist_end(docid); term++) {
487
         term != xrdb.termlist_end(docid); term++) {
482
        // Ignore prefixed terms
488
        // Ignore prefixed terms
483
        if (has_prefix(*term))
489
        if (has_prefix(*term))
484
        continue;
490
        continue;
485
      if (cutoff-- < 0) {
491
      if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
486
        ret = ABSRES_TRUNC;
492
        ret = ABSRES_TERMMISS;
487
        LOGDEB0(("makeAbstract: max term count cutoff\n"));
493
        LOGDEB0(("makeAbstract: max term count cutoff %d\n", 
494
           m_q->m_snipMaxPosWalk));
488
        break;
495
        break;
489
        }
496
        }
490
497
498
      map<unsigned int, string>::iterator vit;
491
        Xapian::PositionIterator pos;
499
        Xapian::PositionIterator pos;
492
        for (pos = xrdb.positionlist_begin(docid, *term); 
500
        for (pos = xrdb.positionlist_begin(docid, *term); 
493
         pos != xrdb.positionlist_end(docid, *term); pos++) {
501
         pos != xrdb.positionlist_end(docid, *term); pos++) {
494
      if (cutoff-- < 0) {
502
      if (m_q->m_snipMaxPosWalk > 0 && cutoff-- < 0) {
495
            ret = ABSRES_TRUNC;
503
            ret = ABSRES_TERMMISS;
496
            LOGDEB0(("makeAbstract: max term count cutoff\n"));
504
            LOGDEB0(("makeAbstract: max term count cutoff %d\n", 
505
               m_q->m_snipMaxPosWalk));
497
            break;
506
            break;
498
        }
507
        }
499
      map<unsigned int, string>::iterator vit;
508
      // If we are beyond the max possible position, stop
509
      // for this term
510
      if (*pos > maxpos) {
511
          break;
512
      }
500
        if ((vit = sparseDoc.find(*pos)) != sparseDoc.end()) {
513
        if ((vit = sparseDoc.find(*pos)) != sparseDoc.end()) {
501
            // Don't replace a term: the terms list is in
514
            // Don't replace a term: the terms list is in
502
            // alphabetic order, and we may have several terms
515
            // alphabetic order, and we may have several terms
503
            // at the same position, we want to keep only the
516
            // at the same position, we want to keep only the
504
            // first one (ie: dockes and dockes@wanadoo.fr)
517
            // first one (ie: dockes and dockes@wanadoo.fr)