Switch to unified view

a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
...
...
71
#ifndef NO_NAMESPACES
71
#ifndef NO_NAMESPACES
72
namespace Rcl {
72
namespace Rcl {
73
#endif
73
#endif
74
74
75
const string pathelt_prefix = "XP";
75
const string pathelt_prefix = "XP";
76
static const string ellipsis("...");
76
77
77
string version_string(){
78
string version_string(){
78
    return string("Recoll ") + string(rclversionstr) + string(" + Xapian ") +
79
    return string("Recoll ") + string(rclversionstr) + string(" + Xapian ") +
79
        string(Xapian::version_string());
80
        string(Xapian::version_string());
80
}
81
}
...
...
243
// Build a document abstract by extracting text chunks around the query terms
244
// Build a document abstract by extracting text chunks around the query terms
244
// This uses the db termlists, not the original document.
245
// This uses the db termlists, not the original document.
245
//
246
//
246
// DatabaseModified and other general exceptions are catched and
247
// DatabaseModified and other general exceptions are catched and
247
// possibly retried by our caller
248
// possibly retried by our caller
248
string Db::Native::makeAbstract(Xapian::docid docid, Query *query)
249
vector<string> Db::Native::makeAbstract(Xapian::docid docid, Query *query)
249
{
250
{
250
    Chrono chron;
251
    Chrono chron;
251
    LOGDEB2(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
252
    LOGDEB2(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
252
         m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
253
         m_rcldb->m_synthAbsLen, m_rcldb->m_synthAbsWordCtxLen));
253
254
...
...
257
        list<string> iterms;
258
        list<string> iterms;
258
        query->getMatchTerms(docid, iterms);
259
        query->getMatchTerms(docid, iterms);
259
        noPrefixList(iterms, terms);
260
        noPrefixList(iterms, terms);
260
        if (terms.empty()) {
261
        if (terms.empty()) {
261
            LOGDEB(("makeAbstract::Empty term list\n"));
262
            LOGDEB(("makeAbstract::Empty term list\n"));
262
            return string();
263
            return vector<string>();
263
        }
264
        }
264
    }
265
    }
265
//    listList("Match terms: ", terms);
266
//    listList("Match terms: ", terms);
266
267
267
    // Retrieve db-wide frequencies for the query terms (we do this once per
268
    // Retrieve db-wide frequencies for the query terms (we do this once per
...
...
351
    m_rcldb->m_synthAbsLen /(7 * (m_rcldb->m_synthAbsWordCtxLen+1));
352
    m_rcldb->m_synthAbsLen /(7 * (m_rcldb->m_synthAbsWordCtxLen+1));
352
    LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));
353
    LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));
353
    // This can't happen, but would crash us
354
    // This can't happen, but would crash us
354
    if (totalweight == 0.0) {
355
    if (totalweight == 0.0) {
355
    LOGERR(("makeAbstract: 0 totalweight!\n"));
356
    LOGERR(("makeAbstract: 0 totalweight!\n"));
356
    return string();
357
    return vector<string>();
357
    }
358
    }
358
359
359
    // This is used to mark positions overlapped by a multi-word match term
360
    // This is used to mark positions overlapped by a multi-word match term
360
    const string occupiedmarker("?");
361
    const string occupiedmarker("?");
361
    const string ellipsis("...");
362
362
363
    // Let's go populate
363
    // Let's go populate
364
    for (multimap<double, string>::reverse_iterator qit = byQ.rbegin(); 
364
    for (multimap<double, string>::reverse_iterator qit = byQ.rbegin(); 
365
     qit != byQ.rend(); qit++) {
365
     qit != byQ.rend(); qit++) {
366
    string qterm = qit->second;
366
    string qterm = qit->second;
...
...
437
        chron.millis(), qtermposs.size()));
437
        chron.millis(), qtermposs.size()));
438
438
439
    // This can happen if there are term occurences in the keywords
439
    // This can happen if there are term occurences in the keywords
440
    // etc. but not elsewhere ?
440
    // etc. but not elsewhere ?
441
    if (qtermposs.size() == 0) 
441
    if (qtermposs.size() == 0) 
442
    return string();
442
    return vector<string>();
443
443
444
    // Walk all document's terms position lists and populate slots
444
    // Walk all document's terms position lists and populate slots
445
    // around the query terms. We arbitrarily truncate the list to
445
    // around the query terms. We arbitrarily truncate the list to
446
    // avoid taking forever. If we do cutoff, the abstract may be
446
    // avoid taking forever. If we do cutoff, the abstract may be
447
    // inconsistant (missing words, potentially altering meaning),
447
    // inconsistant (missing words, potentially altering meaning),
...
...
502
#endif
502
#endif
503
503
504
    LOGABS(("makeAbstract:%d: extracting\n", chron.millis()));
504
    LOGABS(("makeAbstract:%d: extracting\n", chron.millis()));
505
505
506
    // Finally build the abstract by walking the map (in order of position)
506
    // Finally build the abstract by walking the map (in order of position)
507
    string abstract;
507
    vector<string> vabs;
508
    abstract.reserve(sparseDoc.size() * 10);
508
    string chunk;
509
    bool incjk = false;
509
    bool incjk = false;
510
    for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
510
    for (map<unsigned int, string>::const_iterator it = sparseDoc.begin();
511
     it != sparseDoc.end(); it++) {
511
     it != sparseDoc.end(); it++) {
512
    LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
512
    LOGDEB2(("Abtract:output %u -> [%s]\n", it->first,it->second.c_str()));
513
    if (!occupiedmarker.compare(it->second))
513
    if (!occupiedmarker.compare(it->second))
...
...
515
    Utf8Iter uit(it->second);
515
    Utf8Iter uit(it->second);
516
    bool newcjk = false;
516
    bool newcjk = false;
517
    if (TextSplit::isCJK(*uit))
517
    if (TextSplit::isCJK(*uit))
518
        newcjk = true;
518
        newcjk = true;
519
    if (!incjk || (incjk && !newcjk))
519
    if (!incjk || (incjk && !newcjk))
520
      abstract += " ";
520
      chunk += " ";
521
    incjk = newcjk;
521
    incjk = newcjk;
522
  abstract += it->second;
522
  if (it->second == ellipsis) {
523
      vabs.push_back(chunk);
524
      chunk.clear();
525
  } else {
526
      chunk += it->second;
527
  }
523
    }
528
    }
524
529
    if (!chunk.empty())
530
  vabs.push_back(chunk);
525
    // This happens for docs with no terms (only filename) indexed? I'll fix 
531
    // This happens for docs with no terms (only filename) indexed? I'll fix 
526
    // one day (yeah)
532
    // one day (yeah)
527
    if (!abstract.compare("... "))
533
    if (vabs.size() == 1 && !vabs[0].compare("... "))
528
    abstract.clear();
534
    vabs.clear();
529
535
530
    LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
536
    LOGDEB2(("makeAbtract: done in %d mS\n", chron.millis()));
531
    return abstract;
537
    return vabs;
532
}
538
}
533
539
534
/* Rcl::Db methods ///////////////////////////////// */
540
/* Rcl::Db methods ///////////////////////////////// */
535
541
536
Db::Db(RclConfig *cfp)
542
Db::Db(RclConfig *cfp)
...
...
1740
    return false;
1746
    return false;
1741
    }
1747
    }
1742
    return true;
1748
    return true;
1743
}
1749
}
1744
1750
1745
1746
bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
1751
bool Db::makeDocAbstract(Doc &doc, Query *query, vector<string>& abstract)
1747
{
1752
{
1748
    LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
1753
    LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
1749
    if (!m_ndb || !m_ndb->m_isopen) {
1754
    if (!m_ndb || !m_ndb->m_isopen) {
1750
    LOGERR(("Db::makeDocAbstract: no db\n"));
1755
    LOGERR(("Db::makeDocAbstract: no db\n"));
1751
    return false;
1756
    return false;
1752
    }
1757
    }
1753
1754
    XAPTRY(abstract = m_ndb->makeAbstract(doc.xdocid, query),
1758
    XAPTRY(abstract = m_ndb->makeAbstract(doc.xdocid, query),
1755
           m_ndb->xrdb, m_reason);
1759
           m_ndb->xrdb, m_reason);
1760
    return m_reason.empty() ? true : false;
1761
}
1756
1762
1763
bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
1764
{
1765
    LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
1766
    if (!m_ndb || !m_ndb->m_isopen) {
1767
  LOGERR(("Db::makeDocAbstract: no db\n"));
1768
  return false;
1769
    }
1770
    vector<string> vab;
1771
    XAPTRY(vab = m_ndb->makeAbstract(doc.xdocid, query),
1772
           m_ndb->xrdb, m_reason);
1773
    for (vector<string>::const_iterator it = vab.begin(); 
1774
   it != vab.end(); it++) {
1775
  abstract.append(*it);
1776
  abstract.append(ellipsis);
1777
    }
1757
    return m_reason.empty() ? true : false;
1778
    return m_reason.empty() ? true : false;
1758
}
1779
}
1759
1780
1760
// Retrieve document defined by Unique doc identifier. This is mainly used
1781
// Retrieve document defined by Unique doc identifier. This is mainly used
1761
// by the GUI history feature
1782
// by the GUI history feature