Switch to unified view

a/src/rcldb/rclquery.cpp b/src/rcldb/rclquery.cpp
...
...
21
#include "rclconfig.h"
21
#include "rclconfig.h"
22
22
23
#ifndef NO_NAMESPACES
23
#ifndef NO_NAMESPACES
24
namespace Rcl {
24
namespace Rcl {
25
#endif
25
#endif
26
27
28
class FilterMatcher : public Xapian::MatchDecider {
29
public:
30
    FilterMatcher(const string &topdir)
31
  : m_topdir(topdir)
32
    {}
33
    virtual ~FilterMatcher() {}
34
35
    virtual 
36
#if XAPIAN_MAJOR_VERSION < 1
37
    int 
38
#else
39
    bool
40
#endif
41
    operator()(const Xapian::Document &xdoc) const 
42
    {
43
  // Parse xapian document's data and populate doc fields
44
  string data = xdoc.get_data();
45
  ConfSimple parms(data);
46
47
  // The only filtering for now is on file path (subtree)
48
  string url;
49
  parms.get(Doc::keyurl, url);
50
  LOGDEB2(("FilterMatcher topdir [%s] url [%s]\n",
51
       m_topdir.c_str(), url.c_str()));
52
  if (url.find(m_topdir, 7) == 7) {
53
      return true; 
54
  } else {
55
      return false;
56
  }
57
    }
58
    
59
private:
60
    string m_topdir;
61
};
62
26
63
// Sort helper class
27
// Sort helper class
64
class QSorter : public Xapian::Sorter {
28
class QSorter : public Xapian::Sorter {
65
public:
29
public:
66
    QSorter(const string& f) : m_fld(docfToDatf(f) + "=") {}
30
    QSorter(const string& f) : m_fld(docfToDatf(f) + "=") {}
...
...
132
    LOGERR(("Query::setQuery: not initialised!\n"));
96
    LOGERR(("Query::setQuery: not initialised!\n"));
133
    return false;
97
    return false;
134
    }
98
    }
135
    m_reason.erase();
99
    m_reason.erase();
136
100
137
    m_filterTopDir = sdata->getTopdir();
138
    m_nq->clear();
101
    m_nq->clear();
139
140
    if (!m_filterTopDir.empty()) {
141
#if XAPIAN_FILTERING
142
  m_nq->decider = 
143
#else
144
        m_nq->postfilter =
145
#endif
146
      new FilterMatcher(m_filterTopDir);
147
    }
148
102
149
    Xapian::Query xq;
103
    Xapian::Query xq;
150
    if (!sdata->toNativeQuery(*m_db, &xq)) {
104
    if (!sdata->toNativeQuery(*m_db, &xq)) {
151
    m_reason += sdata->getReason();
105
    m_reason += sdata->getReason();
152
    return false;
106
    return false;
153
    }
107
    }
108
154
    m_nq->xquery = xq;
109
    m_nq->xquery = xq;
155
110
156
    string d;
111
    string d;
157
    for (int tries = 0; tries < 2; tries++) {
112
    for (int tries = 0; tries < 2; tries++) {
158
    try {
113
    try {
...
...
160
            if (m_collapseDuplicates) {
115
            if (m_collapseDuplicates) {
161
                m_nq->xenquire->set_collapse_key(Rcl::VALUE_MD5);
116
                m_nq->xenquire->set_collapse_key(Rcl::VALUE_MD5);
162
            } else {
117
            } else {
163
                m_nq->xenquire->set_collapse_key(Xapian::BAD_VALUENO);
118
                m_nq->xenquire->set_collapse_key(Xapian::BAD_VALUENO);
164
            }
119
            }
120
      m_nq->xenquire->set_docid_order(Xapian::Enquire::DONT_CARE);
165
            if (!m_sortField.empty()) {
121
            if (!m_sortField.empty()) {
166
                if (m_sorter) {
122
                if (m_sorter) {
167
                    delete (QSorter*)m_sorter;
123
                    delete (QSorter*)m_sorter;
168
                    m_sorter = 0;
124
                    m_sorter = 0;
169
                }
125
                }
...
...
192
    return false;
148
    return false;
193
    }
149
    }
194
    
150
    
195
    if (d.find("Xapian::Query") == 0)
151
    if (d.find("Xapian::Query") == 0)
196
    d.erase(0, strlen("Xapian::Query"));
152
    d.erase(0, strlen("Xapian::Query"));
197
    if (!m_filterTopDir.empty()) {
153
198
  d += string(" [dir: ") + m_filterTopDir + "]";
199
    }
200
    sdata->setDescription(d);
154
    sdata->setDescription(d);
201
    LOGDEB(("Query::SetQuery: Q: %s\n", sdata->getDescription().c_str()));
155
    LOGDEB(("Query::SetQuery: Q: %s\n", sdata->getDescription().c_str()));
202
    return true;
156
    return true;
203
}
157
}
204
158
...
...
250
    }
204
    }
251
205
252
    return true;
206
    return true;
253
}
207
}
254
208
209
255
// Mset size
210
// Mset size
256
static const int qquantum = 30;
211
static const int qquantum = 50;
257
212
258
// Get estimated result count for query. Xapian actually does most of
213
// Get estimated result count for query. Xapian actually does most of
259
// the search job in there, this can be long
214
// the search job in there, this can be long
260
int Query::getResCnt()
215
int Query::getResCnt()
261
{
216
{
...
...
267
    int ret = -1;
222
    int ret = -1;
268
    if (m_nq->xmset.size() <= 0) {
223
    if (m_nq->xmset.size() <= 0) {
269
        Chrono chron;
224
        Chrono chron;
270
225
271
        XAPTRY(m_nq->xmset = 
226
        XAPTRY(m_nq->xmset = 
272
               m_nq->xenquire->get_mset(0, qquantum,0, m_nq->decider);
227
               m_nq->xenquire->get_mset(0, qquantum, (const Xapian::RSet *)0);
273
               ret = m_nq->xmset.get_matches_lower_bound(),
228
               ret = m_nq->xmset.get_matches_lower_bound(),
274
               m_db->m_ndb->xrdb, m_reason);
229
               m_db->m_ndb->xrdb, m_reason);
275
230
276
        LOGDEB(("Query::getResCnt: %d mS\n", chron.millis()));
231
        LOGDEB(("Query::getResCnt: %d mS\n", chron.millis()));
277
    if (!m_reason.empty())
232
    if (!m_reason.empty())
...
...
281
    }
236
    }
282
    return ret;
237
    return ret;
283
}
238
}
284
239
285
240
286
// Get document at rank i in query (i is the index in the whole result
241
// Get document at rank xapi in query results.  We check if the
287
// set, as in the enquire class. We check if the current mset has the
242
// current mset has the doc, else ask for an other one. We use msets
288
// doc, else ask for an other one. We use msets of 10 documents. Don't
243
// of qquantum documents.
289
// know if the whole thing makes sense at all but it seems to work.
290
//
244
//
291
// If there is a postquery filter (ie: file names), we have to
245
// Note that as stated by a Xapian developer, Enquire searches from
292
// maintain a correspondance from the sequential external index
246
// scratch each time get_mset() is called. So the better performance
293
// sequence to the internal Xapian hole-y one (the holes being the documents 
247
// on subsequent calls is probably only due to disk caching.
294
// that dont match the filter).
295
bool Query::getDoc(int exti, Doc &doc)
248
bool Query::getDoc(int xapi, Doc &doc)
296
{
249
{
297
    LOGDEB1(("Query::getDoc: exti %d\n", exti));
250
    LOGDEB1(("Query::getDoc: xapian enquire index %d\n", xapi));
298
    if (ISNULL(m_nq) || !m_nq->xenquire) {
251
    if (ISNULL(m_nq) || !m_nq->xenquire) {
299
    LOGERR(("Query::getDoc: no query opened\n"));
252
    LOGERR(("Query::getDoc: no query opened\n"));
300
    return false;
253
    return false;
301
    }
254
    }
302
255
303
    int xapi;
304
    if (m_nq->postfilter) {
305
  // There is a postquery filter, does this fall in already known area ?
306
  if (exti >= (int)m_nq->m_dbindices.size()) {
307
      // Have to fetch xapian docs and filter until we get
308
      // enough or fail
309
      m_nq->m_dbindices.reserve(exti+1);
310
      // First xapian doc we fetch is the one after last stored 
311
      int first = m_nq->m_dbindices.size() > 0 ? 
312
      m_nq->m_dbindices.back() + 1 : 0;
313
      // Loop until we get enough docs
314
      while (exti >= (int)m_nq->m_dbindices.size()) {
315
      LOGDEB(("Query::getDoc: fetching %d starting at %d\n",
316
          qquantum, first));
317
318
      XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(first, qquantum),
319
                       m_db->m_ndb->xrdb, m_reason);
320
321
                if (!m_reason.empty()) {
322
                    LOGERR(("enquire->get_mset: exception: %s\n", 
323
                            m_reason.c_str()));
324
                    return false;
325
      }
326
327
      if (m_nq->xmset.empty()) {
328
          LOGDEB(("Query::getDoc: got empty mset\n"));
329
          return false;
330
      }
331
      first = m_nq->xmset.get_firstitem();
332
      for (unsigned int i = 0; i < m_nq->xmset.size() ; i++) {
333
          LOGDEB(("Query::getDoc: [%d]\n", i));
334
          Xapian::Document xdoc = m_nq->xmset[i].get_document();
335
          if ((*m_nq->postfilter)(xdoc)) {
336
          m_nq->m_dbindices.push_back(first + i);
337
          }
338
      }
339
      first = first + m_nq->xmset.size();
340
      }
341
  }
342
  xapi = m_nq->m_dbindices[exti];
343
    } else {
344
  xapi = exti;
345
    }
346
347
    // From there on, we work with a xapian enquire item number. Fetch it
348
    int first = m_nq->xmset.get_firstitem();
256
    int first = m_nq->xmset.get_firstitem();
349
    int last = first + m_nq->xmset.size() -1;
257
    int last = first + m_nq->xmset.size() -1;
350
258
351
    if (!(xapi >= first && xapi <= last)) {
259
    if (!(xapi >= first && xapi <= last)) {
352
    LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
260
    LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
353
261
354
    XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,
262
    XAPTRY(m_nq->xmset = m_nq->xenquire->get_mset(xapi, qquantum,  
355
                                                      0, m_nq->decider),
263
                            (const Xapian::RSet *)0),
356
               m_db->m_ndb->xrdb, m_reason);
264
               m_db->m_ndb->xrdb, m_reason);
357
265
358
        if (!m_reason.empty()) {
266
        if (!m_reason.empty()) {
359
            LOGERR(("enquire->get_mset: exception: %s\n", m_reason.c_str()));
267
            LOGERR(("enquire->get_mset: exception: %s\n", m_reason.c_str()));
360
            return false;
268
            return false;
...
...
406
    if (!m_reason.empty()) {
314
    if (!m_reason.empty()) {
407
        LOGERR(("Query::getDoc: %s\n", m_reason.c_str()));
315
        LOGERR(("Query::getDoc: %s\n", m_reason.c_str()));
408
        return false;
316
        return false;
409
    }
317
    }
410
    doc.meta[Rcl::Doc::keyudi] = udi;
318
    doc.meta[Rcl::Doc::keyudi] = udi;
319
411
    // Parse xapian document's data and populate doc fields
320
    // Parse xapian document's data and populate doc fields
412
    return m_db->m_ndb->dbDataToRclDoc(docid, data, doc, pc);
321
    return m_db->m_ndb->dbDataToRclDoc(docid, data, doc, pc);
413
}
322
}
414
323
415
list<string> Query::expand(const Doc &doc)
324
list<string> Query::expand(const Doc &doc)