Switch to unified view

a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.132 2008-05-20 10:09:54 dockes Exp $ (C) 2004 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.133 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes";
3
#endif
3
#endif
4
/*
4
/*
5
 *   This program is free software; you can redistribute it and/or modify
5
 *   This program is free software; you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   the Free Software Foundation; either version 2 of the License, or
...
...
34
using namespace std;
34
using namespace std;
35
#endif /* NO_NAMESPACES */
35
#endif /* NO_NAMESPACES */
36
36
37
#include "rclconfig.h"
37
#include "rclconfig.h"
38
#include "rcldb.h"
38
#include "rcldb.h"
39
#include "rcldb_p.h"
39
#include "stemdb.h"
40
#include "stemdb.h"
40
#include "textsplit.h"
41
#include "textsplit.h"
41
#include "transcode.h"
42
#include "transcode.h"
42
#include "unacpp.h"
43
#include "unacpp.h"
43
#include "conftree.h"
44
#include "conftree.h"
...
...
45
#include "pathut.h"
46
#include "pathut.h"
46
#include "smallut.h"
47
#include "smallut.h"
47
#include "pathhash.h"
48
#include "pathhash.h"
48
#include "utf8iter.h"
49
#include "utf8iter.h"
49
#include "searchdata.h"
50
#include "searchdata.h"
51
#include "rclquery.h"
52
#include "rclquery_p.h"
50
53
51
#include "xapian.h"
52
54
53
#ifndef MAX
55
#ifndef MAX
54
#define MAX(A,B) (A>B?A:B)
56
#define MAX(A,B) (A>B?A:B)
55
#endif
57
#endif
56
#ifndef MIN
58
#ifndef MIN
...
...
86
// Synthetic abstract marker (to discriminate from abstract actually
88
// Synthetic abstract marker (to discriminate from abstract actually
87
// found in doc)
89
// found in doc)
88
const static string rclSyntAbs = "?!#@";
90
const static string rclSyntAbs = "?!#@";
89
const static string emptystring;
91
const static string emptystring;
90
92
91
// A class for data and methods that would have to expose
92
// Xapian-specific stuff if they were in Rcl::Db. There could actually be
93
// 2 different ones for indexing or query as there is not much in
94
// common.
95
class Native {
96
 public:
97
    Db *m_db;
98
    bool m_isopen;
99
    bool m_iswritable;
100
101
    // Indexing
102
    Xapian::WritableDatabase wdb;
103
104
    // Querying
105
    Xapian::Database db;
106
    Xapian::Query    query; // query descriptor: terms and subqueries
107
              // joined by operators (or/and etc...)
108
109
    // Filtering results on location. There are 2 possible approaches
110
    // for this:
111
    //   - Set a "MatchDecider" to be used by Xapian during the query
112
    //   - Filter the results out of Xapian (this also uses a
113
    //     Xapian::MatchDecider object, but applied to the results by Recoll.
114
    // 
115
    // The result filtering approach was the first implemented. 
116
    //
117
    // The efficiency of both methods depend on the searches, so the code
118
    // for both has been kept.  A nice point for the Xapian approach is that
119
    // the result count estimate are correct (they are wrong with
120
    // the postfilter approach). It is also faster in some worst case scenarios
121
    // so this now the default (but the post-filtering is faster in many common
122
    // cases).
123
    // 
124
    // Which is used is decided in SetQuery(), by setting either of
125
    // the two following members. This in turn is controlled by a
126
    // preprocessor directive.
127
128
#define XAPIAN_FILTERING 1
129
130
    Xapian::MatchDecider *decider;   // Xapian does the filtering
131
    Xapian::MatchDecider *postfilter; // Result filtering done by Recoll
132
133
    Xapian::Enquire      *enquire; // Open query descriptor.
134
    Xapian::MSet          mset;    // Partial result set
135
136
    // Term frequencies for current query. See makeAbstract, setQuery
137
    map<string, double>  m_termfreqs; 
138
    
139
    Native(Db *db) 
140
  : m_db(db),
141
    m_isopen(false), m_iswritable(false), decider(0), postfilter(0),
142
    enquire(0)
143
    { }
144
145
    ~Native() {
146
  delete decider;
147
  delete postfilter;
148
  delete enquire;
149
    }
150
151
    string makeAbstract(Xapian::docid id, const list<string>& terms);
152
153
    bool dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc);
154
155
    /** Compute list of subdocuments for a given path (given by hash) 
156
     *  We look for all Q terms beginning with the path/hash
157
     *  As suggested by James Aylett, a better method would be to add 
158
     *  a single term (ie: XP/path/to/file) to all subdocs, then finding
159
     *  them would be a simple matter of retrieving the posting list for the
160
     *  term. There would still be a need for the current Qterm though, as a
161
     *  unique term for replace_document, and for retrieving by
162
     *  path/ipath (history)
163
     */
164
    bool subDocs(const string &hash, vector<Xapian::docid>& docids);
165
166
};
167
168
class FilterMatcher : public Xapian::MatchDecider {
169
public:
170
    FilterMatcher(const string &topdir)
171
  : m_topdir(topdir)
172
    {}
173
    virtual ~FilterMatcher() {}
174
175
    virtual 
176
#if XAPIAN_MAJOR_VERSION < 1
177
    int 
178
#else
179
    bool
180
#endif
181
    operator()(const Xapian::Document &xdoc) const 
182
    {
183
  m_cnt++;
184
  // Parse xapian document's data and populate doc fields
185
  string data = xdoc.get_data();
186
  ConfSimple parms(&data);
187
188
  // The only filtering for now is on file path (subtree)
189
  string url;
190
  parms.get(string("url"), url);
191
  LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
192
       m_topdir.c_str(), url.c_str()));
193
  if (url.find(m_topdir, 7) == 7) {
194
      LOGDEB2(("FilterMatcher: MATCH    %d\n", m_cnt));
195
      return true; 
196
  } else {
197
      LOGDEB2(("FilterMatcher: NO MATCH %d\n", m_cnt));
198
      return false;
199
  }
200
    }
201
    static int m_cnt;
202
    
203
private:
204
    string m_topdir;
205
};
206
int FilterMatcher::m_cnt;
207
208
/* See comment in class declaration */
93
/* See comment in class declaration */
209
bool Native::subDocs(const string &hash, vector<Xapian::docid>& docids) 
94
bool Db::Native::subDocs(const string &hash, vector<Xapian::docid>& docids) 
210
{
95
{
211
    docids.clear();
96
    docids.clear();
212
    string qterm = "Q"+ hash + "|";
97
    string qterm = "Q"+ hash + "|";
213
    string ermsg;
98
    string ermsg;
214
99
...
...
248
    LOGERR(("Rcl::Db::subDocs: %s\n", ermsg.c_str()));
133
    LOGERR(("Rcl::Db::subDocs: %s\n", ermsg.c_str()));
249
    return false;
134
    return false;
250
}
135
}
251
136
252
// Turn data record from db into document fields
137
// Turn data record from db into document fields
253
bool Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
138
bool Db::Native::dbDataToRclDoc(Xapian::docid docid, std::string &data, Doc &doc)
254
{
139
{
255
    LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
140
    LOGDEB1(("Db::dbDataToRclDoc: data: %s\n", data.c_str()));
256
    ConfSimple parms(&data);
141
    ConfSimple parms(&data);
257
    if (!parms.ok())
142
    if (!parms.ok())
258
    return false;
143
    return false;
...
...
304
#define LOGABS LOGDEB2
189
#define LOGABS LOGDEB2
305
#endif
190
#endif
306
191
307
// Build a document abstract by extracting text chunks around the query terms
192
// Build a document abstract by extracting text chunks around the query terms
308
// This uses the db termlists, not the original document.
193
// This uses the db termlists, not the original document.
309
string Native::makeAbstract(Xapian::docid docid, const list<string>& iterms)
194
string Db::Native::makeAbstract(Xapian::docid docid, Query *query)
310
{
195
{
311
    Chrono chron;
196
    Chrono chron;
312
    LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
197
    LOGDEB(("makeAbstract:%d: maxlen %d wWidth %d\n", chron.ms(),
313
         m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
198
         m_db->m_synthAbsLen, m_db->m_synthAbsWordCtxLen));
314
199
200
    list<string> iterms;
201
    query->getQueryTerms(iterms);
202
315
    list<string> terms = noPrefixList(iterms);
203
    list<string> terms = noPrefixList(iterms);
316
    if (terms.empty()) {
204
    if (terms.empty()) {
317
    return "";
205
    return "";
318
    }
206
    }
319
207
320
    // Retrieve db-wide frequencies for the query terms
208
    // Retrieve db-wide frequencies for the query terms
321
    if (m_termfreqs.empty()) {
209
    if (query->m_nq->termfreqs.empty()) {
322
    double doccnt = db.get_doccount();
210
    double doccnt = db.get_doccount();
323
    if (doccnt == 0) doccnt = 1;
211
    if (doccnt == 0) doccnt = 1;
324
    for (list<string>::const_iterator qit = terms.begin(); 
212
    for (list<string>::const_iterator qit = terms.begin(); 
325
         qit != terms.end(); qit++) {
213
         qit != terms.end(); qit++) {
326
        m_termfreqs[*qit] = db.get_termfreq(*qit) / doccnt;
214
        query->m_nq->termfreqs[*qit] = db.get_termfreq(*qit) / doccnt;
327
        LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(), 
215
        LOGABS(("makeAbstract: [%s] db freq %.1e\n", qit->c_str(), 
328
             m_termfreqs[*qit]));
216
            query->m_nq->termfreqs[*qit]));
329
    }
217
    }
330
    LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
218
    LOGABS(("makeAbstract:%d: got termfreqs\n", chron.ms()));
331
    }
219
    }
332
220
333
    // Compute a term quality coefficient by retrieving the term
221
    // Compute a term quality coefficient by retrieving the term
...
...
341
    for (list<string>::const_iterator qit = terms.begin(); 
229
    for (list<string>::const_iterator qit = terms.begin(); 
342
     qit != terms.end(); qit++) {
230
     qit != terms.end(); qit++) {
343
    Xapian::TermIterator term = db.termlist_begin(docid);
231
    Xapian::TermIterator term = db.termlist_begin(docid);
344
    term.skip_to(*qit);
232
    term.skip_to(*qit);
345
    if (term != db.termlist_end(docid) && *term == *qit) {
233
    if (term != db.termlist_end(docid) && *term == *qit) {
346
        double q = (term.get_wdf() / doclen) * m_termfreqs[*qit];
234
        double q = (term.get_wdf() / doclen) * query->m_nq->termfreqs[*qit];
347
        q = -log10(q);
235
        q = -log10(q);
348
        if (q < 3) {
236
        if (q < 3) {
349
        q = 0.05;
237
        q = 0.05;
350
        } else if (q < 4) {
238
        } else if (q < 4) {
351
        q = 0.3;
239
        q = 0.3;
...
...
554
}
442
}
555
443
556
/* Rcl::Db methods ///////////////////////////////// */
444
/* Rcl::Db methods ///////////////////////////////// */
557
445
558
Db::Db() 
446
Db::Db() 
559
    : m_ndb(0), m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
447
    : m_ndb(0), m_idxAbsTruncLen(250), m_synthAbsLen(250),
560
      m_synthAbsWordCtxLen(4), m_flushMb(-1), 
448
      m_synthAbsWordCtxLen(4), m_flushMb(-1), 
561
      m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0),
449
      m_curtxtsz(0), m_flushtxtsz(0), m_occtxtsz(0),
562
      m_maxFsOccupPc(0), m_mode(Db::DbRO)
450
      m_maxFsOccupPc(0), m_mode(Db::DbRO)
563
{
451
{
564
    m_ndb = new Native(this);
452
    m_ndb = new Native(this);
...
...
584
    list<string> res;
472
    list<string> res;
585
    stringToStrings(Xapian::Stem::get_available_languages(), res);
473
    stringToStrings(Xapian::Stem::get_available_languages(), res);
586
    return res;
474
    return res;
587
}
475
}
588
476
589
// Generic Xapian exception catching code. We do this quite often,
590
// and I have no idea how to do this except for a macro
591
#define XCATCHERROR(MSG) \
592
 catch (const Xapian::Error &e) {        \
593
    MSG = e.get_msg();                   \
594
    if (MSG.empty()) MSG = "Empty error message";  \
595
 } catch (const string &s) {             \
596
    MSG = s;                     \
597
    if (MSG.empty()) MSG = "Empty error message";  \
598
 } catch (const char *s) {               \
599
    MSG = s;                     \
600
    if (MSG.empty()) MSG = "Empty error message";  \
601
 } catch (...) {                 \
602
    MSG = "Caught unknown xapian exception";     \
603
 } 
604
605
606
bool Db::open(const string& dir, const string &stops, OpenMode mode, int qops)
477
bool Db::open(const string& dir, const string &stops, OpenMode mode, 
478
        bool keep_updated)
607
{
479
{
608
    bool keep_updated = (qops & QO_KEEP_UPDATED) != 0;
609
    qops &= ~QO_KEEP_UPDATED;
610
611
    if (m_ndb == 0)
480
    if (m_ndb == 0)
612
    return false;
481
    return false;
613
    LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen, 
482
    LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen, 
614
        m_ndb->m_iswritable));
483
        m_ndb->m_iswritable));
615
484
...
...
722
bool Db::reOpen()
591
bool Db::reOpen()
723
{
592
{
724
    if (m_ndb && m_ndb->m_isopen) {
593
    if (m_ndb && m_ndb->m_isopen) {
725
    if (!close())
594
    if (!close())
726
        return false;
595
        return false;
727
  if (!open(m_basedir, "", m_mode, m_qOpts | QO_KEEP_UPDATED)) {
596
  if (!open(m_basedir, "", m_mode, true)) {
728
        return false;
597
        return false;
729
    }
598
    }
730
    }
599
    }
731
    return true;
600
    return true;
732
}
601
}
...
...
1465
    names.push_back("XIMPOSSIBLE");
1334
    names.push_back("XIMPOSSIBLE");
1466
    }
1335
    }
1467
    return true;
1336
    return true;
1468
}
1337
}
1469
1338
1470
// Prepare query out of user search data
1471
bool Db::setQuery(RefCntr<SearchData> sdata, int opts, 
1472
        const string& stemlang)
1473
{
1474
    if (!m_ndb) {
1475
  LOGERR(("Db::setQuery: no db!\n"));
1476
  return false;
1477
    }
1478
    m_reason.erase();
1479
    LOGDEB(("Db::setQuery:\n"));
1480
1481
    m_filterTopDir = sdata->getTopdir();
1482
    deleteZ(m_ndb->decider);
1483
    deleteZ(m_ndb->postfilter);
1484
    if (!m_filterTopDir.empty()) {
1485
#if XAPIAN_FILTERING
1486
  m_ndb->decider = 
1487
#else
1488
        m_ndb->postfilter =
1489
#endif
1490
      new FilterMatcher(m_filterTopDir);
1491
    }
1492
    m_dbindices.clear();
1493
    m_qOpts = opts;
1494
    m_ndb->m_termfreqs.clear();
1495
    FilterMatcher::m_cnt = 0;
1496
    Xapian::Query xq;
1497
    if (!sdata->toNativeQuery(*this, &xq, 
1498
                (opts & Db::QO_STEM) ? stemlang : "")) {
1499
  m_reason += sdata->getReason();
1500
  return false;
1501
    }
1502
    m_ndb->query = xq;
1503
    string ermsg;
1504
    string d;
1505
    try {
1506
  delete m_ndb->enquire;
1507
  m_ndb->enquire = new Xapian::Enquire(m_ndb->db);
1508
  m_ndb->enquire->set_query(m_ndb->query);
1509
  m_ndb->mset = Xapian::MSet();
1510
  // Get the query description and trim the "Xapian::Query"
1511
  d = m_ndb->query.get_description();
1512
    } XCATCHERROR(ermsg);
1513
    if (!ermsg.empty()) {
1514
  LOGDEB(("Db::SetQuery: xapian error %s\n", ermsg.c_str()));
1515
  return false;
1516
    }
1517
  
1518
    if (d.find("Xapian::Query") == 0)
1519
  d.erase(0, strlen("Xapian::Query"));
1520
    if (!m_filterTopDir.empty()) {
1521
  d += string(" [dir: ") + m_filterTopDir + "]";
1522
    }
1523
    sdata->setDescription(d);
1524
    LOGDEB(("Db::SetQuery: Q: %s\n", sdata->getDescription().c_str()));
1525
    return true;
1526
}
1527
1528
class TermMatchCmpByWcf {
1339
class TermMatchCmpByWcf {
1529
public:
1340
public:
1530
    int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
1341
    int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
1531
    return r.wcf - l.wcf < 0;
1342
    return r.wcf - l.wcf < 0;
1532
    }
1343
    }
...
...
1733
    return false;
1544
    return false;
1734
    }
1545
    }
1735
    return true;
1546
    return true;
1736
}
1547
}
1737
1548
1738
bool Db::getQueryTerms(list<string>& terms)
1739
{
1740
    if (!m_ndb)
1741
  return false;
1742
1549
1743
    terms.clear();
1744
    Xapian::TermIterator it;
1745
    string ermsg;
1746
    try {
1747
  for (it = m_ndb->query.get_terms_begin(); 
1748
       it != m_ndb->query.get_terms_end(); it++) {
1749
      terms.push_back(*it);
1750
  }
1751
    } XCATCHERROR(ermsg);
1752
    if (!ermsg.empty()) {
1753
  LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
1754
  return false;
1755
    }
1756
    return true;
1757
}
1758
1759
bool Db::getMatchTerms(const Doc& doc, list<string>& terms)
1760
{
1761
    if (!m_ndb || !m_ndb->enquire) {
1762
  LOGERR(("Db::getMatchTerms: no query opened\n"));
1763
  return -1;
1764
    }
1765
1766
    terms.clear();
1767
    Xapian::TermIterator it;
1768
    Xapian::docid id = Xapian::docid(doc.xdocid);
1769
    string ermsg;
1770
    try {
1771
  for (it=m_ndb->enquire->get_matching_terms_begin(id);
1772
       it != m_ndb->enquire->get_matching_terms_end(id); it++) {
1773
      terms.push_back(*it);
1774
  }
1775
    } XCATCHERROR(ermsg);
1776
    if (!ermsg.empty()) {
1777
  LOGERR(("getQueryTerms: xapian error: %s\n", ermsg.c_str()));
1778
  return false;
1779
    }
1780
1781
    return true;
1782
}
1783
1784
// Mset size
1785
static const int qquantum = 30;
1786
1787
int Db::getResCnt()
1788
{
1789
    if (!m_ndb || !m_ndb->enquire) {
1790
  LOGERR(("Db::getResCnt: no query opened\n"));
1791
  return -1;
1792
    }
1793
    string ermsg;
1794
    if (m_ndb->mset.size() <= 0) {
1795
  try {
1796
      m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum, 
1797
                         0, m_ndb->decider);
1798
  } catch (const Xapian::DatabaseModifiedError &error) {
1799
      m_ndb->db.reopen();
1800
      m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum,
1801
                         0, m_ndb->decider);
1802
  } XCATCHERROR(ermsg);
1803
  if (!ermsg.empty()) {
1804
      LOGERR(("enquire->get_mset: exception: %s\n", ermsg.c_str()));
1805
      return -1;
1806
  }
1807
    }
1808
    int ret = -1;
1809
    try {
1810
    ret = m_ndb->mset.get_matches_lower_bound();
1811
    } catch (...) {}
1812
    return ret;
1813
}
1814
1815
1816
// Get document at rank i in query (i is the index in the whole result
1817
// set, as in the enquire class. We check if the current mset has the
1818
// doc, else ask for an other one. We use msets of 10 documents. Don't
1819
// know if the whole thing makes sense at all but it seems to work.
1820
//
1821
// If there is a postquery filter (ie: file names), we have to
1822
// maintain a correspondance from the sequential external index
1823
// sequence to the internal Xapian hole-y one (the holes being the documents 
1824
// that dont match the filter).
1825
bool Db::getDoc(int exti, Doc &doc, int *percent)
1826
{
1827
    LOGDEB1(("Db::getDoc: exti %d\n", exti));
1828
    if (!m_ndb || !m_ndb->enquire) {
1829
  LOGERR(("Db::getDoc: no query opened\n"));
1830
  return false;
1831
    }
1832
1833
    int xapi;
1834
    if (m_ndb->postfilter) {
1835
  // There is a postquery filter, does this fall in already known area ?
1836
  if (exti >= (int)m_dbindices.size()) {
1837
      // Have to fetch xapian docs and filter until we get
1838
      // enough or fail
1839
      m_dbindices.reserve(exti+1);
1840
      // First xapian doc we fetch is the one after last stored 
1841
      int first = m_dbindices.size() > 0 ? m_dbindices.back() + 1 : 0;
1842
      // Loop until we get enough docs
1843
      while (exti >= (int)m_dbindices.size()) {
1844
      LOGDEB(("Db::getDoc: fetching %d starting at %d\n",
1845
          qquantum, first));
1846
      try {
1847
          m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
1848
      } catch (const Xapian::DatabaseModifiedError &error) {
1849
          m_ndb->db.reopen();
1850
          m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
1851
      } catch (const Xapian::Error & error) {
1852
        LOGERR(("enquire->get_mset: exception: %s\n", 
1853
            error.get_msg().c_str()));
1854
        abort();
1855
      }
1856
1857
      if (m_ndb->mset.empty()) {
1858
          LOGDEB(("Db::getDoc: got empty mset\n"));
1859
          return false;
1860
      }
1861
      first = m_ndb->mset.get_firstitem();
1862
      for (unsigned int i = 0; i < m_ndb->mset.size() ; i++) {
1863
          LOGDEB(("Db::getDoc: [%d]\n", i));
1864
          Xapian::Document xdoc = m_ndb->mset[i].get_document();
1865
          if ((*m_ndb->postfilter)(xdoc)) {
1866
          m_dbindices.push_back(first + i);
1867
          }
1868
      }
1869
      first = first + m_ndb->mset.size();
1870
      }
1871
  }
1872
  xapi = m_dbindices[exti];
1873
    } else {
1874
  xapi = exti;
1875
    }
1876
1877
    // From there on, we work with a xapian enquire item number. Fetch it
1878
    int first = m_ndb->mset.get_firstitem();
1879
    int last = first + m_ndb->mset.size() -1;
1880
1881
    if (!(xapi >= first && xapi <= last)) {
1882
  LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
1883
  try {
1884
      m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
1885
                         0, m_ndb->decider);
1886
  } catch (const Xapian::DatabaseModifiedError &error) {
1887
      m_ndb->db.reopen();
1888
      m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum,
1889
                         0, m_ndb->decider);
1890
1891
  } catch (const Xapian::Error & error) {
1892
    LOGERR(("enquire->get_mset: exception: %s\n", 
1893
        error.get_msg().c_str()));
1894
    abort();
1895
  }
1896
  if (m_ndb->mset.empty())
1897
      return false;
1898
  first = m_ndb->mset.get_firstitem();
1899
  last = first + m_ndb->mset.size() -1;
1900
    }
1901
1902
    LOGDEB1(("Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
1903
       m_ndb->query.get_description().c_str(), 
1904
       first, last,
1905
       m_ndb->mset.get_matches_lower_bound()));
1906
1907
    Xapian::Document xdoc = m_ndb->mset[xapi-first].get_document();
1908
    Xapian::docid docid = *(m_ndb->mset[xapi-first]);
1909
    if (percent)
1910
  *percent = m_ndb->mset.convert_to_percent(m_ndb->mset[xapi-first]);
1911
1912
    // Parse xapian document's data and populate doc fields
1913
    string data = xdoc.get_data();
1914
    return m_ndb->dbDataToRclDoc(docid, data, doc);
1915
}
1916
1917
bool Db::makeDocAbstract(Doc &doc, string& abstract)
1550
bool Db::makeDocAbstract(Doc &doc, Query *query, string& abstract)
1918
{
1551
{
1919
    LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
1552
    LOGDEB1(("Db::makeDocAbstract: exti %d\n", exti));
1920
    if (!m_ndb || !m_ndb->enquire) {
1553
    if (!m_ndb) {
1921
    LOGERR(("Db::makeDocAbstract: no query opened\n"));
1554
    LOGERR(("Db::makeDocAbstract: no db\n"));
1922
    return false;
1555
    return false;
1923
    }
1556
    }
1924
    list<string> terms;
1925
    getQueryTerms(terms);
1926
    abstract = m_ndb->makeAbstract(doc.xdocid, terms);
1557
    abstract = m_ndb->makeAbstract(doc.xdocid, query);
1927
    return true;
1558
    return true;
1928
}
1559
}
1929
1560
1930
// Retrieve document defined by file name and internal path. 
1561
// Retrieve document defined by file name and internal path. 
1931
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
1562
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
...
...
1967
    LOGERR(("Db::getDoc: %s\n", ermsg.c_str()));
1598
    LOGERR(("Db::getDoc: %s\n", ermsg.c_str()));
1968
    }
1599
    }
1969
    return false;
1600
    return false;
1970
}
1601
}
1971
1602
1972
list<string> Db::expand(const Doc &doc)
1973
{
1974
    list<string> res;
1975
    if (!m_ndb || !m_ndb->enquire) {
1976
  LOGERR(("Db::expand: no query opened\n"));
1977
  return res;
1978
    }
1979
    string ermsg;
1980
    for (int tries = 0; tries < 2; tries++) {
1981
  try {
1982
      Xapian::RSet rset;
1983
      rset.add_document(Xapian::docid(doc.xdocid));
1984
      // We don't exclude the original query terms.
1985
      Xapian::ESet eset = m_ndb->enquire->get_eset(20, rset, false);
1986
      LOGDEB(("ESet terms:\n"));
1987
      // We filter out the special terms
1988
      for (Xapian::ESetIterator it = eset.begin(); 
1989
       it != eset.end(); it++) {
1990
      LOGDEB((" [%s]\n", (*it).c_str()));
1991
      if ((*it).empty() || ((*it).at(0)>='A' && (*it).at(0)<='Z'))
1992
          continue;
1993
      res.push_back(*it);
1994
      if (res.size() >= 10)
1995
          break;
1996
      }
1997
  } catch (const Xapian::DatabaseModifiedError &error) {
1998
      continue;
1999
  } XCATCHERROR(ermsg);
2000
  if (!ermsg.empty()) {
2001
      LOGERR(("Db::expand: xapian error %s\n", ermsg.c_str()));
2002
      res.clear();
2003
  }
2004
  break;
2005
    }
2006
2007
    return res;
2008
}
2009
2010
2011
#ifndef NO_NAMESPACES
1603
#ifndef NO_NAMESPACES
2012
}
1604
}
2013
#endif
1605
#endif