Switch to unified view

a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.60 2006-04-05 06:26:56 dockes Exp $ (C) 2004 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.61 2006-04-05 12:50:42 dockes Exp $ (C) 2004 J.F.Dockes";
3
#endif
3
#endif
4
/*
4
/*
5
 *   This program is free software; you can redistribute it and/or modify
5
 *   This program is free software; you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   the Free Software Foundation; either version 2 of the License, or
...
...
65
#define MA_ABSTRACT_SIZE 250
65
#define MA_ABSTRACT_SIZE 250
66
// This is how many words (context size) we keep around query terms
66
// This is how many words (context size) we keep around query terms
67
// when building the abstract
67
// when building the abstract
68
#define MA_EXTRACT_WIDTH 4
68
#define MA_EXTRACT_WIDTH 4
69
69
70
// Truncate longer path and uniquize with hash . The goal for this is
71
// to avoid xapian max term length limitations, not to gain space (we
72
// gain very little even with very short maxlens like 30)
73
#define PATHHASHLEN 150
74
75
// Synthetic abstract marker (to discriminate from abstract actually
76
// found in doc)
77
const static string rclSyntAbs = "?!#@";
78
70
// Data for a xapian database. There could actually be 2 different
79
// Data for a xapian database. There could actually be 2 different
71
// ones for indexing or query as there is not much in common.
80
// ones for indexing or query as there is not much in common.
72
class Native {
81
class Native {
73
 public:
82
 public:
74
    bool isopen;
83
    bool m_isopen;
75
    bool iswritable;
84
    bool m_iswritable;
85
    Db::OpenMode m_mode;
76
    string basedir;
86
    string m_basedir;
87
88
    // List of directories for additional databases to query
89
    list<string> m_extraDbs;
77
90
78
    // Indexing
91
    // Indexing
79
    Xapian::WritableDatabase wdb;
92
    Xapian::WritableDatabase wdb;
80
    vector<bool> updated;
93
    vector<bool> updated;
81
94
...
...
90
    bool dbDataToRclDoc(std::string &data, Doc &doc, 
103
    bool dbDataToRclDoc(std::string &data, Doc &doc, 
91
            int qopts,
104
            int qopts,
92
            Xapian::docid docid,
105
            Xapian::docid docid,
93
            const list<string>& terms);
106
            const list<string>& terms);
94
107
95
    Native() : isopen(false), iswritable(false), enquire(0) { }
108
    Native() 
109
  : m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0) 
110
    { }
96
    ~Native() {
111
    ~Native() {
97
    delete enquire;
112
    delete enquire;
98
    }
113
    }
99
    bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
114
    bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
100
    // Parse xapian document's data and populate doc fields
115
    // Parse xapian document's data and populate doc fields
...
...
110
    return false;
125
    return false;
111
    }
126
    }
112
};
127
};
113
128
114
Db::Db() 
129
Db::Db() 
130
    : m_qOpts(0)
115
{
131
{
116
    ndb = new Native;
132
    m_ndb = new Native;
117
    m_qOpts = 0;
118
}
133
}
119
134
120
Db::~Db()
135
Db::~Db()
121
{
136
{
122
    LOGDEB1(("Db::~Db\n"));
137
    LOGDEB1(("Db::~Db\n"));
123
    if (ndb == 0)
138
    if (m_ndb == 0)
124
    return;
139
    return;
125
    LOGDEB(("Db::~Db: isopen %d iswritable %d\n", ndb->isopen, 
140
    LOGDEB(("Db::~Db: isopen %d m_iswritable %d\n", m_ndb->m_isopen, 
126
        ndb->iswritable));
141
        m_ndb->m_iswritable));
127
    if (ndb->isopen == false)
142
    if (m_ndb->m_isopen == false)
128
    return;
143
    return;
129
    const char *ermsg = "Unknown error";
144
    const char *ermsg = "Unknown error";
130
    try {
145
    try {
131
    LOGDEB(("Db::~Db: closing native database\n"));
146
    LOGDEB(("Db::~Db: closing native database\n"));
132
    if (ndb->iswritable == true) {
147
    if (m_ndb->m_iswritable == true) {
133
        ndb->wdb.flush();
148
        m_ndb->wdb.flush();
134
    }
149
    }
135
    delete ndb;
150
    delete m_ndb;
151
  m_ndb = 0;
136
    return;
152
    return;
137
    } catch (const Xapian::Error &e) {
153
    } catch (const Xapian::Error &e) {
138
    ermsg = e.get_msg().c_str();
154
    ermsg = e.get_msg().c_str();
139
    } catch (const string &s) {
155
    } catch (const string &s) {
140
    ermsg = s.c_str();
156
    ermsg = s.c_str();
...
...
146
    LOGERR(("Db::~Db: got exception: %s\n", ermsg));
162
    LOGERR(("Db::~Db: got exception: %s\n", ermsg));
147
}
163
}
148
164
149
bool Db::open(const string& dir, OpenMode mode, int qops)
165
bool Db::open(const string& dir, OpenMode mode, int qops)
150
{
166
{
151
    if (ndb == 0)
167
    if (m_ndb == 0)
152
    return false;
168
    return false;
153
    LOGDEB(("Db::open: isopen %d iswritable %d\n", ndb->isopen, 
169
    LOGDEB(("Db::open: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen, 
154
        ndb->iswritable));
170
        m_ndb->m_iswritable));
155
    m_qOpts = qops;
156
171
157
    if (ndb->isopen) {
172
    if (m_ndb->m_isopen) {
158
  LOGERR(("Db::open: already open\n"));
173
  // We used to return an error here but I see no reason to
174
  if (!close())
159
    return false;
175
        return false;
160
    }
176
    }
161
    const char *ermsg = "Unknown";
177
    const char *ermsg = "Unknown";
162
    try {
178
    try {
163
    switch (mode) {
179
    switch (mode) {
164
    case DbUpd:
180
    case DbUpd:
165
    case DbTrunc: 
181
    case DbTrunc: 
166
        {
182
        {
167
        int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
183
        int action = (mode == DbUpd) ? Xapian::DB_CREATE_OR_OPEN :
168
            Xapian::DB_CREATE_OR_OVERWRITE;
184
            Xapian::DB_CREATE_OR_OVERWRITE;
169
        ndb->wdb = Xapian::WritableDatabase(dir, action);
185
        m_ndb->wdb = Xapian::WritableDatabase(dir, action);
170
        LOGDEB(("Db::open: lastdocid: %d\n", 
186
        LOGDEB(("Db::open: lastdocid: %d\n", 
171
            ndb->wdb.get_lastdocid()));
187
            m_ndb->wdb.get_lastdocid()));
172
        ndb->updated.resize(ndb->wdb.get_lastdocid() + 1);
188
        m_ndb->updated.resize(m_ndb->wdb.get_lastdocid() + 1);
173
        for (unsigned int i = 0; i < ndb->updated.size(); i++)
189
        for (unsigned int i = 0; i < m_ndb->updated.size(); i++)
174
            ndb->updated[i] = false;
190
            m_ndb->updated[i] = false;
175
        ndb->iswritable = true;
191
        m_ndb->m_iswritable = true;
176
        }
192
        }
177
        break;
193
        break;
178
    case DbRO:
194
    case DbRO:
179
    default:
195
    default:
180
        ndb->iswritable = false;
196
        m_ndb->m_iswritable = false;
181
        ndb->db = Xapian::Database(dir);
197
        m_ndb->db = Xapian::Database(dir);
198
      for (list<string>::iterator it = m_ndb->m_extraDbs.begin();
199
       it != m_ndb->m_extraDbs.end(); it++) {
200
      string aerr;
201
      LOGDEB(("Db::Open: adding query db [%s]\n", it->c_str()));
202
      aerr.clear();
203
      try {
204
          // Make this non-fatal
205
          m_ndb->db.add_database(Xapian::Database(*it));
206
      } catch (const Xapian::Error &e) {
207
          aerr = e.get_msg().c_str();
208
      } catch (const string &s) {
209
          aerr = s.c_str();
210
      } catch (const char *s) {
211
          aerr = s;
212
      } catch (...) {
213
          aerr = "Caught unknown exception";
214
      }
215
      if (!aerr.empty())
216
          LOGERR(("Db::Open: error while trying to add database "
217
              "from [%s]: %s\n", it->c_str(), aerr.c_str()));
218
      }
182
        break;
219
        break;
183
    }
220
    }
221
  m_qOpts = qops;
222
  m_ndb->m_mode = mode;
184
    ndb->isopen = true;
223
    m_ndb->m_isopen = true;
185
    ndb->basedir = dir;
224
    m_ndb->m_basedir = dir;
186
    return true;
225
    return true;
187
    } catch (const Xapian::Error &e) {
226
    } catch (const Xapian::Error &e) {
188
    ermsg = e.get_msg().c_str();
227
    ermsg = e.get_msg().c_str();
189
    } catch (const string &s) {
228
    } catch (const string &s) {
190
    ermsg = s.c_str();
229
    ermsg = s.c_str();
...
...
199
}
238
}
200
239
201
// Note: xapian has no close call, we delete and recreate the db
240
// Note: xapian has no close call, we delete and recreate the db
202
bool Db::close()
241
bool Db::close()
203
{
242
{
204
    if (ndb == 0)
243
    if (m_ndb == 0)
205
    return false;
244
    return false;
206
    LOGDEB(("Db::close(): isopen %d iswritable %d\n", ndb->isopen, 
245
    LOGDEB(("Db::close(): m_isopen %d m_iswritable %d\n", m_ndb->m_isopen, 
207
        ndb->iswritable));
246
        m_ndb->m_iswritable));
208
    if (ndb->isopen == false)
247
    if (m_ndb->m_isopen == false)
209
    return true;
248
    return true;
210
    const char *ermsg = "Unknown";
249
    const char *ermsg = "Unknown";
211
    try {
250
    try {
212
    if (ndb->iswritable == true) {
251
    if (m_ndb->m_iswritable == true) {
213
        ndb->wdb.flush();
252
        m_ndb->wdb.flush();
214
        LOGDEB(("Rcl:Db: Called xapian flush\n"));
253
        LOGDEB(("Rcl:Db: Called xapian flush\n"));
215
    }
254
    }
216
    delete ndb;
255
    delete m_ndb;
217
    ndb = new Native;
256
    m_ndb = new Native;
218
    if (ndb)
257
    if (m_ndb)
219
        return true;
258
        return true;
220
    } catch (const Xapian::Error &e) {
259
    } catch (const Xapian::Error &e) {
221
    ermsg = e.get_msg().c_str();
260
    ermsg = e.get_msg().c_str();
222
    } catch (const string &s) {
261
    } catch (const string &s) {
223
    ermsg = s.c_str();
262
    ermsg = s.c_str();
...
...
227
    ermsg = "Caught unknown exception";
266
    ermsg = "Caught unknown exception";
228
    }
267
    }
229
    LOGERR(("Db:close: exception while deleting db: %s\n", ermsg));
268
    LOGERR(("Db:close: exception while deleting db: %s\n", ermsg));
230
    return false;
269
    return false;
231
}
270
}
271
bool Db::reOpen()
272
{
273
    if (m_ndb->m_isopen) {
274
  if (!close())
275
      return false;
276
  if (!open(m_ndb->m_basedir, m_ndb->m_mode, m_qOpts)) {
277
      return false;
278
  }
279
    }
280
    return true;
281
}
282
bool Db::addQueryDb(const string &dir) 
283
{
284
    LOGDEB(("Db::addQueryDb: ndb %p iswritable %d db [%s]\n", m_ndb,
285
        (m_ndb)?m_ndb->m_iswritable:0, dir.c_str()));
286
    if (!m_ndb)
287
  return false;
288
    if (m_ndb->m_iswritable)
289
  return false;
290
    if (find(m_ndb->m_extraDbs.begin(), m_ndb->m_extraDbs.end(), dir) == 
291
  m_ndb->m_extraDbs.end()) {
292
  m_ndb->m_extraDbs.push_back(dir);
293
    }
294
    return reOpen();
295
}
296
297
bool Db::rmQueryDb(const string &dir)
298
{
299
    if (!m_ndb)
300
  return false;
301
    if (m_ndb->m_iswritable)
302
  return false;
303
    if (dir.empty()) {
304
  m_ndb->m_extraDbs.clear();
305
    } else {
306
  list<string>::iterator it = find(m_ndb->m_extraDbs.begin(), 
307
                   m_ndb->m_extraDbs.end(), dir);
308
  if (it != m_ndb->m_extraDbs.end()) {
309
      m_ndb->m_extraDbs.erase(it);
310
  }
311
    }
312
    return reOpen();
313
}
314
bool Db::testDbDir(const string &dir)
315
{
316
    string aerr;
317
    LOGDEB(("Db::testDbDir: [%s]\n", dir.c_str()));
318
    try {
319
  Xapian::Database db(dir);
320
    } catch (const Xapian::Error &e) {
321
  aerr = e.get_msg().c_str();
322
    } catch (const string &s) {
323
  aerr = s.c_str();
324
    } catch (const char *s) {
325
  aerr = s;
326
    } catch (...) {
327
  aerr = "Caught unknown exception";
328
    }
329
    if (!aerr.empty()) {
330
  LOGERR(("Db::Open: error while trying to open database "
331
      "from [%s]: %s\n", dir.c_str(), aerr.c_str()));
332
  return false;
333
    }
334
    return true;
335
}
232
336
233
bool Db::isopen()
337
bool Db::isopen()
234
{
338
{
235
    if (ndb == 0)
339
    if (m_ndb == 0)
236
    return false;
340
    return false;
237
    return ndb->isopen;
341
    return m_ndb->m_isopen;
238
}
342
}
239
343
240
// A small class to hold state while splitting text
344
// A small class to hold state while splitting text
241
class mySplitterCB : public TextSplitCB {
345
class mySplitterCB : public TextSplitCB {
242
 public:
346
 public:
...
...
333
    output += " ...";
437
    output += " ...";
334
    }
438
    }
335
    return output;
439
    return output;
336
}
440
}
337
441
338
// remove some chars and replace them with spaces
442
// Remove some chars and replace them with spaces
339
static string stripchars(const string &str, string delims)
443
static string stripchars(const string &str, string delims)
340
{
444
{
341
    string out;
445
    string out;
342
    string::size_type startPos, pos;
446
    string::size_type startPos, pos;
343
447
...
...
355
    }
459
    }
356
    }
460
    }
357
    return out;
461
    return out;
358
}
462
}
359
463
360
// Truncate longer path and uniquize with hash . The goal for this is
361
// to avoid xapian max term length limitations, not to gain space (we
362
// gain very little even with very short maxlens like 30)
363
#define PATHHASHLEN 150
364
365
const static string rclSyntAbs = "?!#@";
366
367
// Add document in internal form to the database: index the terms in
464
// Add document in internal form to the database: index the terms in
368
// the title abstract and body and add special terms for file name,
465
// the title abstract and body and add special terms for file name,
369
// date, mime type ... , create the document data record (more
466
// date, mime type ... , create the document data record (more
370
// metadata), and update database
467
// metadata), and update database
371
bool Db::add(const string &fn, const Doc &idoc, 
468
bool Db::add(const string &fn, const Doc &idoc, 
372
          const struct stat *stp)
469
          const struct stat *stp)
373
{
470
{
374
    LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
471
    LOGDEB1(("Db::add: fn %s\n", fn.c_str()));
375
    if (ndb == 0)
472
    if (m_ndb == 0)
376
    return false;
473
    return false;
377
474
378
    Doc doc = idoc;
475
    Doc doc = idoc;
379
476
380
    // Truncate abstract, title and keywords to reasonable lengths. If
477
    // Truncate abstract, title and keywords to reasonable lengths. If
...
...
511
608
512
    const char *fnc = fn.c_str();
609
    const char *fnc = fn.c_str();
513
    // Add db entry or update existing entry:
610
    // Add db entry or update existing entry:
514
    try {
611
    try {
515
    Xapian::docid did = 
612
    Xapian::docid did = 
516
        ndb->wdb.replace_document(uniterm.empty() ? pathterm : uniterm, 
613
        m_ndb->wdb.replace_document(uniterm.empty() ? pathterm : uniterm, 
517
                      newdocument);
614
                      newdocument);
518
    if (did < ndb->updated.size()) {
615
    if (did < m_ndb->updated.size()) {
519
        ndb->updated[did] = true;
616
        m_ndb->updated[did] = true;
520
        LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
617
        LOGDEB(("Db::add: docid %d updated [%s , %s]\n", did, fnc,
521
            doc.ipath.c_str()));
618
            doc.ipath.c_str()));
522
    } else {
619
    } else {
523
        LOGDEB(("Db::add: docid %d added [%s , %s]\n", did, fnc, 
620
        LOGDEB(("Db::add: docid %d added [%s , %s]\n", did, fnc, 
524
            doc.ipath.c_str()));
621
            doc.ipath.c_str()));
525
    }
622
    }
526
    } catch (...) {
623
    } catch (...) {
527
    // FIXME: is this ever actually needed?
624
    // FIXME: is this ever actually needed?
528
    try {
625
    try {
529
        ndb->wdb.add_document(newdocument);
626
        m_ndb->wdb.add_document(newdocument);
530
        LOGDEB(("Db::add: %s added (failed re-seek for duplicate)\n", 
627
        LOGDEB(("Db::add: %s added (failed re-seek for duplicate)\n", 
531
            fnc));
628
            fnc));
532
    } catch (...) {
629
    } catch (...) {
533
        LOGERR(("Db::add: failed again after replace_document\n"));
630
        LOGERR(("Db::add: failed again after replace_document\n"));
534
        return false;
631
        return false;
...
...
538
}
635
}
539
636
540
// Test if given filename has changed since last indexed:
637
// Test if given filename has changed since last indexed:
541
bool Db::needUpdate(const string &filename, const struct stat *stp)
638
bool Db::needUpdate(const string &filename, const struct stat *stp)
542
{
639
{
543
    if (ndb == 0)
640
    if (m_ndb == 0)
544
    return false;
641
    return false;
545
642
546
    // If no document exist with this path, we do need update
643
    // If no document exist with this path, we do need update
547
    string hash;
644
    string hash;
548
    pathHash(filename, hash, PATHHASHLEN);
645
    pathHash(filename, hash, PATHHASHLEN);
...
...
554
    // fmtime field which will be identical for all docs inside a
651
    // fmtime field which will be identical for all docs inside a
555
    // multi-document file (we currently always reindex all if the
652
    // multi-document file (we currently always reindex all if the
556
    // file changed)
653
    // file changed)
557
    Xapian::PostingIterator doc;
654
    Xapian::PostingIterator doc;
558
    try {
655
    try {
559
    if (!ndb->wdb.term_exists(pathterm)) {
656
    if (!m_ndb->wdb.term_exists(pathterm)) {
560
        LOGDEB1(("Db::needUpdate: no such path: %s\n", pathterm.c_str()));
657
        LOGDEB1(("Db::needUpdate: no such path: %s\n", pathterm.c_str()));
561
        return true;
658
        return true;
562
    }
659
    }
563
660
564
    Xapian::PostingIterator docid0 = ndb->wdb.postlist_begin(pathterm);
661
    Xapian::PostingIterator docid0 = m_ndb->wdb.postlist_begin(pathterm);
565
    for (Xapian::PostingIterator docid = docid0;
662
    for (Xapian::PostingIterator docid = docid0;
566
         docid != ndb->wdb.postlist_end(pathterm); docid++) {
663
         docid != m_ndb->wdb.postlist_end(pathterm); docid++) {
567
664
568
        Xapian::Document doc = ndb->wdb.get_document(*docid);
665
        Xapian::Document doc = m_ndb->wdb.get_document(*docid);
569
666
570
        // Check the date once. no need to look at the others if
667
        // Check the date once. no need to look at the others if
571
        // the db needs updating. Note that the fmtime used to be
668
        // the db needs updating. Note that the fmtime used to be
572
        // called mtime, and we're keeping compat
669
        // called mtime, and we're keeping compat
573
        if (docid == docid0) {
670
        if (docid == docid0) {
...
...
588
            return true;
685
            return true;
589
        } 
686
        } 
590
        }
687
        }
591
688
592
        // Db is up to date. Make a note that this document exists.
689
        // Db is up to date. Make a note that this document exists.
593
        if (*docid < ndb->updated.size())
690
        if (*docid < m_ndb->updated.size())
594
        ndb->updated[*docid] = true;
691
        m_ndb->updated[*docid] = true;
595
    }
692
    }
596
    return false;
693
    return false;
597
    } catch (const Xapian::Error &e) {
694
    } catch (const Xapian::Error &e) {
598
    ermsg = e.get_msg().c_str();
695
    ermsg = e.get_msg().c_str();
599
    } catch (...) {
696
    } catch (...) {
...
...
625
 * Delete stem db for given language
722
 * Delete stem db for given language
626
 */
723
 */
627
bool Db::deleteStemDb(const string& lang)
724
bool Db::deleteStemDb(const string& lang)
628
{
725
{
629
    LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
726
    LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
630
    if (ndb == 0)
727
    if (m_ndb == 0)
631
    return false;
728
    return false;
632
    if (ndb->isopen == false)
729
    if (m_ndb->m_isopen == false)
633
    return false;
730
    return false;
634
731
635
    string dir = stemdbname(ndb->basedir, lang);
732
    string dir = stemdbname(m_ndb->m_basedir, lang);
636
    if (wipedir(dir) == 0 && rmdir(dir.c_str()) == 0)
733
    if (wipedir(dir) == 0 && rmdir(dir.c_str()) == 0)
637
    return true;
734
    return true;
638
    return false;
735
    return false;
639
}
736
}
640
737
...
...
645
 * parent terms in the document data.
742
 * parent terms in the document data.
646
 */
743
 */
647
bool Db::createStemDb(const string& lang)
744
bool Db::createStemDb(const string& lang)
648
{
745
{
649
    LOGDEB(("Db::createStemDb(%s)\n", lang.c_str()));
746
    LOGDEB(("Db::createStemDb(%s)\n", lang.c_str()));
650
    if (ndb == 0)
747
    if (m_ndb == 0)
651
    return false;
748
    return false;
652
    if (ndb->isopen == false)
749
    if (m_ndb->m_isopen == false)
653
    return false;
750
    return false;
654
751
655
    // First build the in-memory stem database:
752
    // First build the in-memory stem database:
656
    // We walk the list of all terms, and stem each. 
753
    // We walk the list of all terms, and stem each. 
657
    //   If the stem is identical to the term, no need to create an entry
754
    //   If the stem is identical to the term, no need to create an entry
...
...
665
    int stemdiff=0;  // Count of all different stems
762
    int stemdiff=0;  // Count of all different stems
666
    int stemmultiple = 0; // Count of stems with multiple derivatives
763
    int stemmultiple = 0; // Count of stems with multiple derivatives
667
    try {
764
    try {
668
    Xapian::Stem stemmer(lang);
765
    Xapian::Stem stemmer(lang);
669
    Xapian::TermIterator it;
766
    Xapian::TermIterator it;
670
    for (it = ndb->wdb.allterms_begin(); 
767
    for (it = m_ndb->wdb.allterms_begin(); 
671
         it != ndb->wdb.allterms_end(); it++) {
768
         it != m_ndb->wdb.allterms_end(); it++) {
672
        // If it has any non-lowercase 7bit char, cant be stemmable
769
        // If it has any non-lowercase 7bit char, cant be stemmable
673
        string::iterator sit = (*it).begin(), eit = sit + (*it).length();
770
        string::iterator sit = (*it).begin(), eit = sit + (*it).length();
674
        if ((sit = find_if(sit, eit, p_notlowerorutf)) != eit) {
771
        if ((sit = find_if(sit, eit, p_notlowerorutf)) != eit) {
675
        ++nostem;
772
        ++nostem;
676
        // LOGDEB(("stemskipped: [%s], because of 0x%x\n", 
773
        // LOGDEB(("stemskipped: [%s], because of 0x%x\n", 
...
...
705
        rmdir(dir.c_str());
802
        rmdir(dir.c_str());
706
        }
803
        }
707
    }
804
    }
708
    };
805
    };
709
    // Create xapian database for stem relations
806
    // Create xapian database for stem relations
710
    string stemdbdir = stemdbname(ndb->basedir, lang);
807
    string stemdbdir = stemdbname(m_ndb->m_basedir, lang);
711
    // We want to get rid of the db dir in case of error. This gets disarmed
808
    // We want to get rid of the db dir in case of error. This gets disarmed
712
    // just before success return.
809
    // just before success return.
713
    DirWiper wiper(stemdbdir);
810
    DirWiper wiper(stemdbdir);
714
    const char *ermsg = "NOERROR";
811
    const char *ermsg = "NOERROR";
715
    Xapian::WritableDatabase sdb;
812
    Xapian::WritableDatabase sdb;
...
...
779
876
780
list<string> Db::getStemLangs()
877
list<string> Db::getStemLangs()
781
{
878
{
782
    list<string> dirs;
879
    list<string> dirs;
783
    LOGDEB(("Db::getStemLang\n"));
880
    LOGDEB(("Db::getStemLang\n"));
784
    if (ndb == 0)
881
    if (m_ndb == 0)
785
    return dirs;
882
    return dirs;
786
    string pattern = stemdirstem + "*";
883
    string pattern = stemdirstem + "*";
787
    dirs = path_dirglob(ndb->basedir, pattern);
884
    dirs = path_dirglob(m_ndb->m_basedir, pattern);
788
    for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
885
    for (list<string>::iterator it = dirs.begin(); it != dirs.end(); it++) {
789
    *it = path_basename(*it);
886
    *it = path_basename(*it);
790
    *it = it->substr(stemdirstem.length(), string::npos);
887
    *it = it->substr(stemdirstem.length(), string::npos);
791
    }
888
    }
792
    return dirs;
889
    return dirs;
...
...
799
 *  stem database while we are at it.
896
 *  stem database while we are at it.
800
 */
897
 */
801
bool Db::purge()
898
bool Db::purge()
802
{
899
{
803
    LOGDEB(("Db::purge\n"));
900
    LOGDEB(("Db::purge\n"));
804
    if (ndb == 0)
901
    if (m_ndb == 0)
805
    return false;
902
    return false;
806
    LOGDEB(("Db::purge: isopen %d iswritable %d\n", ndb->isopen, 
903
    LOGDEB(("Db::purge: m_isopen %d m_iswritable %d\n", m_ndb->m_isopen, 
807
        ndb->iswritable));
904
        m_ndb->m_iswritable));
808
    if (ndb->isopen == false || ndb->iswritable == false)
905
    if (m_ndb->m_isopen == false || m_ndb->m_iswritable == false)
809
    return false;
906
    return false;
810
907
811
    // There seems to be problems with the document delete code, when
908
    // There seems to be problems with the document delete code, when
812
    // we do this, the database is not actually updated. Especially,
909
    // we do this, the database is not actually updated. Especially,
813
    // if we delete a bunch of docs, so that there is a hole in the
910
    // if we delete a bunch of docs, so that there is a hole in the
814
    // docids at the beginning, we can't add anything (appears to work
911
    // docids at the beginning, we can't add anything (appears to work
815
    // and does nothing). Maybe related to the exceptions below when
912
    // and does nothing). Maybe related to the exceptions below when
816
    // trying to delete an unexistant document ?
913
    // trying to delete an unexistant document ?
817
    // Flushing before trying the deletes seeems to work around the problem
914
    // Flushing before trying the deletes seeems to work around the problem
818
    try {
915
    try {
819
    ndb->wdb.flush();
916
    m_ndb->wdb.flush();
820
    } catch (...) {
917
    } catch (...) {
821
    LOGDEB(("Db::purge: 1st flush failed\n"));
918
    LOGDEB(("Db::purge: 1st flush failed\n"));
822
    }
919
    }
823
    for (Xapian::docid docid = 1; docid < ndb->updated.size(); ++docid) {
920
    for (Xapian::docid docid = 1; docid < m_ndb->updated.size(); ++docid) {
824
    if (!ndb->updated[docid]) {
921
    if (!m_ndb->updated[docid]) {
825
        try {
922
        try {
826
        ndb->wdb.delete_document(docid);
923
        m_ndb->wdb.delete_document(docid);
827
        LOGDEB(("Db::purge: deleted document #%d\n", docid));
924
        LOGDEB(("Db::purge: deleted document #%d\n", docid));
828
        } catch (const Xapian::DocNotFoundError &) {
925
        } catch (const Xapian::DocNotFoundError &) {
829
        LOGDEB(("Db::purge: document #%d not found\n", docid));
926
        LOGDEB(("Db::purge: document #%d not found\n", docid));
830
        }
927
        }
831
    }
928
    }
832
    }
929
    }
833
    try {
930
    try {
834
    ndb->wdb.flush();
931
    m_ndb->wdb.flush();
835
    } catch (...) {
932
    } catch (...) {
836
    LOGDEB(("Db::purge: 2nd flush failed\n"));
933
    LOGDEB(("Db::purge: 2nd flush failed\n"));
837
    }
934
    }
838
    return true;
935
    return true;
839
}
936
}
840
937
841
/**
938
/**
842
 * Expand term to list of all terms which stem to the same term.
939
 * Expand term to list of all terms which stem to the same term.
843
 */
940
 */
844
static list<string> stemexpand(Native *ndb, string term, const string& lang)
941
static list<string> stemexpand(Native *m_ndb, string term, const string& lang)
845
{
942
{
846
    list<string> explist;
943
    list<string> explist;
847
    try {
944
    try {
848
    Xapian::Stem stemmer(lang);
945
    Xapian::Stem stemmer(lang);
849
    string stem = stemmer.stem_word(term);
946
    string stem = stemmer.stem_word(term);
850
    LOGDEB(("stemexpand: [%s] stem-> [%s]\n", term.c_str(), stem.c_str()));
947
    LOGDEB(("stemexpand: [%s] stem-> [%s]\n", term.c_str(), stem.c_str()));
851
    // Try to fetch the doc from the stem db
948
    // Try to fetch the doc from the stem db
852
    string stemdbdir = stemdbname(ndb->basedir, lang);
949
    string stemdbdir = stemdbname(m_ndb->m_basedir, lang);
853
    Xapian::Database sdb(stemdbdir);
950
    Xapian::Database sdb(stemdbdir);
854
    LOGDEB1(("stemexpand: %s lastdocid: %d\n", 
951
    LOGDEB1(("stemexpand: %s lastdocid: %d\n", 
855
        stemdbdir.c_str(), sdb.get_lastdocid()));
952
        stemdbdir.c_str(), sdb.get_lastdocid()));
856
    if (!sdb.term_exists(stem)) {
953
    if (!sdb.term_exists(stem)) {
857
        LOGDEB1(("Db::stemexpand: no term for %s\n", stem.c_str()));
954
        LOGDEB1(("Db::stemexpand: no term for %s\n", stem.c_str()));
...
...
923
//     composition of the stem-expanded terms (or a single term query).
1020
//     composition of the stem-expanded terms (or a single term query).
924
//   - Elements corresponding to a phrase are an OP_PHRASE composition of the
1021
//   - Elements corresponding to a phrase are an OP_PHRASE composition of the
925
//     phrase terms (no stem expansion in this case)
1022
//     phrase terms (no stem expansion in this case)
926
static void stringToXapianQueries(const string &iq,
1023
static void stringToXapianQueries(const string &iq,
927
                  const string& stemlang,
1024
                  const string& stemlang,
928
                  Native *ndb,
1025
                  Native *m_ndb,
929
                  list<Xapian::Query> &pqueries,
1026
                  list<Xapian::Query> &pqueries,
930
                  Db::QueryOpts opts = Db::QO_NONE)
1027
                  Db::QueryOpts opts = Db::QO_NONE)
931
{
1028
{
932
    string qstring = iq;
1029
    string qstring = iq;
933
1030
...
...
971
        list<string> exp;  
1068
        list<string> exp;  
972
        string term1;
1069
        string term1;
973
        dumb_string(term, term1);
1070
        dumb_string(term, term1);
974
        // Possibly perform stem compression/expansion
1071
        // Possibly perform stem compression/expansion
975
        if (!nostemexp && (opts & Db::QO_STEM)) {
1072
        if (!nostemexp && (opts & Db::QO_STEM)) {
976
            exp = stemexpand(ndb, term1, stemlang);
1073
            exp = stemexpand(m_ndb, term1, stemlang);
977
        } else {
1074
        } else {
978
            exp.push_back(term1);
1075
            exp.push_back(term1);
979
        }
1076
        }
980
1077
981
        // Push either term or OR of stem-expanded set
1078
        // Push either term or OR of stem-expanded set
...
...
999
bool Db::setQuery(const std::string &iqstring, QueryOpts opts, 
1096
bool Db::setQuery(const std::string &iqstring, QueryOpts opts, 
1000
               const string& stemlang)
1097
               const string& stemlang)
1001
{
1098
{
1002
    LOGDEB(("Db::setQuery: q: [%s], opts 0x%x, stemlang %s\n", 
1099
    LOGDEB(("Db::setQuery: q: [%s], opts 0x%x, stemlang %s\n", 
1003
        iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
1100
        iqstring.c_str(), (unsigned int)opts, stemlang.c_str()));
1004
    if (!ndb)
1101
    if (!m_ndb)
1005
    return false;
1102
    return false;
1006
    m_asdata.erase();
1103
    m_asdata.erase();
1007
    dbindices.clear();
1104
    m_dbindices.clear();
1008
    list<Xapian::Query> pqueries;
1105
    list<Xapian::Query> pqueries;
1009
    stringToXapianQueries(iqstring, stemlang, ndb, pqueries, opts);
1106
    stringToXapianQueries(iqstring, stemlang, m_ndb, pqueries, opts);
1010
    ndb->query = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(), 
1107
    m_ndb->query = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(), 
1011
                   pqueries.end());
1108
                   pqueries.end());
1012
    delete ndb->enquire;
1109
    delete m_ndb->enquire;
1013
    ndb->enquire = new Xapian::Enquire(ndb->db);
1110
    m_ndb->enquire = new Xapian::Enquire(m_ndb->db);
1014
    ndb->enquire->set_query(ndb->query);
1111
    m_ndb->enquire->set_query(m_ndb->query);
1015
    ndb->mset = Xapian::MSet();
1112
    m_ndb->mset = Xapian::MSet();
1016
    return true;
1113
    return true;
1017
}
1114
}
1018
1115
1019
// Prepare query out of "advanced search" data
1116
// Prepare query out of "advanced search" data
1020
bool Db::setQuery(AdvSearchData &sdata, QueryOpts opts, 
1117
bool Db::setQuery(AdvSearchData &sdata, QueryOpts opts, 
...
...
1033
    LOGDEB((" searched file types: %s\n", ft.c_str()));
1130
    LOGDEB((" searched file types: %s\n", ft.c_str()));
1034
    if (!sdata.topdir.empty())
1131
    if (!sdata.topdir.empty())
1035
    LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
1132
    LOGDEB((" restricted to: %s\n", sdata.topdir.c_str()));
1036
1133
1037
    m_asdata = sdata;
1134
    m_asdata = sdata;
1038
    dbindices.clear();
1135
    m_dbindices.clear();
1039
1136
1040
    if (!ndb)
1137
    if (!m_ndb)
1041
    return false;
1138
    return false;
1042
    list<Xapian::Query> pqueries;
1139
    list<Xapian::Query> pqueries;
1043
    Xapian::Query xq;
1140
    Xapian::Query xq;
1044
1141
1045
    if (!sdata.filename.empty()) {
1142
    if (!sdata.filename.empty()) {
...
...
1060
    } // else let it be
1157
    } // else let it be
1061
1158
1062
    LOGDEB((" pattern: [%s]\n", pattern.c_str()));
1159
    LOGDEB((" pattern: [%s]\n", pattern.c_str()));
1063
1160
1064
    // Match pattern against all file names in the db
1161
    // Match pattern against all file names in the db
1065
    Xapian::TermIterator it = ndb->db.allterms_begin(); 
1162
    Xapian::TermIterator it = m_ndb->db.allterms_begin(); 
1066
    it.skip_to("XSFN");
1163
    it.skip_to("XSFN");
1067
    list<string> names;
1164
    list<string> names;
1068
    for (;it != ndb->db.allterms_end(); it++) {
1165
    for (;it != m_ndb->db.allterms_end(); it++) {
1069
        if ((*it).find("XSFN") != 0)
1166
        if ((*it).find("XSFN") != 0)
1070
        break;
1167
        break;
1071
        string fn = (*it).substr(4);
1168
        string fn = (*it).substr(4);
1072
        LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
1169
        LOGDEB2(("Matching [%s] and [%s]\n", pattern.c_str(), fn.c_str()));
1073
        if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
1170
        if (fnmatch(pattern.c_str(), fn.c_str(), 0) != FNM_NOMATCH) {
...
...
1087
    // Build a query out of the matching file name terms.
1184
    // Build a query out of the matching file name terms.
1088
    xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
1185
    xq = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
1089
    }
1186
    }
1090
1187
1091
    if (!sdata.allwords.empty()) {
1188
    if (!sdata.allwords.empty()) {
1092
    stringToXapianQueries(sdata.allwords, stemlang, ndb, pqueries, opts);
1189
    stringToXapianQueries(sdata.allwords, stemlang, m_ndb, pqueries, opts);
1093
    if (!pqueries.empty()) {
1190
    if (!pqueries.empty()) {
1094
        Xapian::Query nq = 
1191
        Xapian::Query nq = 
1095
        Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
1192
        Xapian::Query(Xapian::Query::OP_AND, pqueries.begin(),
1096
                  pqueries.end());
1193
                  pqueries.end());
1097
        xq = xq.empty() ? nq :
1194
        xq = xq.empty() ? nq :
...
...
1099
        pqueries.clear();
1196
        pqueries.clear();
1100
    }
1197
    }
1101
    }
1198
    }
1102
1199
1103
    if (!sdata.orwords.empty()) {
1200
    if (!sdata.orwords.empty()) {
1104
    stringToXapianQueries(sdata.orwords, stemlang, ndb, pqueries, opts);
1201
    stringToXapianQueries(sdata.orwords, stemlang, m_ndb, pqueries, opts);
1105
    if (!pqueries.empty()) {
1202
    if (!pqueries.empty()) {
1106
        Xapian::Query nq = 
1203
        Xapian::Query nq = 
1107
        Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
1204
        Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
1108
                   pqueries.end());
1205
                   pqueries.end());
1109
        xq = xq.empty() ? nq :
1206
        xq = xq.empty() ? nq :
...
...
1112
    }
1209
    }
1113
    }
1210
    }
1114
1211
1115
    // We do no stem expansion on 'No' words. Should we ?
1212
    // We do no stem expansion on 'No' words. Should we ?
1116
    if (!sdata.nowords.empty()) {
1213
    if (!sdata.nowords.empty()) {
1117
    stringToXapianQueries(sdata.nowords, stemlang, ndb, pqueries);
1214
    stringToXapianQueries(sdata.nowords, stemlang, m_ndb, pqueries);
1118
    if (!pqueries.empty()) {
1215
    if (!pqueries.empty()) {
1119
        Xapian::Query nq;
1216
        Xapian::Query nq;
1120
        nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
1217
        nq = Xapian::Query(Xapian::Query::OP_OR, pqueries.begin(),
1121
                   pqueries.end());
1218
                   pqueries.end());
1122
        xq = xq.empty() ? nq :
1219
        xq = xq.empty() ? nq :
...
...
1126
    }
1223
    }
1127
1224
1128
    if (!sdata.phrase.empty()) {
1225
    if (!sdata.phrase.empty()) {
1129
    Xapian::Query nq;
1226
    Xapian::Query nq;
1130
    string s = string("\"") + sdata.phrase + string("\"");
1227
    string s = string("\"") + sdata.phrase + string("\"");
1131
    stringToXapianQueries(s, stemlang, ndb, pqueries);
1228
    stringToXapianQueries(s, stemlang, m_ndb, pqueries);
1132
    if (!pqueries.empty()) {
1229
    if (!pqueries.empty()) {
1133
        // There should be a single list element phrase query.
1230
        // There should be a single list element phrase query.
1134
        xq = xq.empty() ? *pqueries.begin() : 
1231
        xq = xq.empty() ? *pqueries.begin() : 
1135
        Xapian::Query(Xapian::Query::OP_AND, xq, *pqueries.begin());
1232
        Xapian::Query(Xapian::Query::OP_AND, xq, *pqueries.begin());
1136
        pqueries.clear();
1233
        pqueries.clear();
...
...
1147
        Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
1244
        Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
1148
    }
1245
    }
1149
    xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
1246
    xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
1150
    }
1247
    }
1151
1248
1152
    ndb->query = xq;
1249
    m_ndb->query = xq;
1153
    delete ndb->enquire;
1250
    delete m_ndb->enquire;
1154
    ndb->enquire = new Xapian::Enquire(ndb->db);
1251
    m_ndb->enquire = new Xapian::Enquire(m_ndb->db);
1155
    ndb->enquire->set_query(ndb->query);
1252
    m_ndb->enquire->set_query(m_ndb->query);
1156
    ndb->mset = Xapian::MSet();
1253
    m_ndb->mset = Xapian::MSet();
1157
    // Get the query description and trim the "Xapian::Query"
1254
    // Get the query description and trim the "Xapian::Query"
1158
    sdata.description = ndb->query.get_description();
1255
    sdata.description = m_ndb->query.get_description();
1159
    if (sdata.description.find("Xapian::Query") == 0)
1256
    if (sdata.description.find("Xapian::Query") == 0)
1160
    sdata.description = sdata.description.substr(strlen("Xapian::Query"));
1257
    sdata.description = sdata.description.substr(strlen("Xapian::Query"));
1161
    LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
1258
    LOGDEB(("Db::SetQuery: Q: %s\n", sdata.description.c_str()));
1162
    return true;
1259
    return true;
1163
}
1260
}
1164
1261
1165
bool Db::getQueryTerms(list<string>& terms)
1262
bool Db::getQueryTerms(list<string>& terms)
1166
{
1263
{
1167
    if (!ndb)
1264
    if (!m_ndb)
1168
    return false;
1265
    return false;
1169
1266
1170
    terms.clear();
1267
    terms.clear();
1171
    Xapian::TermIterator it;
1268
    Xapian::TermIterator it;
1172
    for (it = ndb->query.get_terms_begin(); it != ndb->query.get_terms_end();
1269
    for (it = m_ndb->query.get_terms_begin(); it != m_ndb->query.get_terms_end();
1173
     it++) {
1270
     it++) {
1174
    terms.push_back(*it);
1271
    terms.push_back(*it);
1175
    }
1272
    }
1176
    return true;
1273
    return true;
1177
}
1274
}
1178
1275
1276
// Mset size
1179
static const int qquantum = 30;
1277
static const int qquantum = 30;
1180
1278
1181
int Db::getResCnt()
1279
int Db::getResCnt()
1182
{
1280
{
1183
    if (!ndb || !ndb->enquire) {
1281
    if (!m_ndb || !m_ndb->enquire) {
1184
    LOGERR(("Db::getResCnt: no query opened\n"));
1282
    LOGERR(("Db::getResCnt: no query opened\n"));
1185
    return -1;
1283
    return -1;
1186
    }
1284
    }
1187
    if (ndb->mset.size() <= 0) {
1285
    if (m_ndb->mset.size() <= 0) {
1188
    try {
1286
    try {
1189
        ndb->mset = ndb->enquire->get_mset(0, qquantum);
1287
        m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
1190
    } catch (const Xapian::DatabaseModifiedError &error) {
1288
    } catch (const Xapian::DatabaseModifiedError &error) {
1191
        ndb->db.reopen();
1289
        m_ndb->db.reopen();
1192
        ndb->mset = ndb->enquire->get_mset(0, qquantum);
1290
        m_ndb->mset = m_ndb->enquire->get_mset(0, qquantum);
1193
    } catch (const Xapian::Error & error) {
1291
    } catch (const Xapian::Error & error) {
1194
        LOGERR(("enquire->get_mset: exception: %s\n", 
1292
        LOGERR(("enquire->get_mset: exception: %s\n", 
1195
            error.get_msg().c_str()));
1293
            error.get_msg().c_str()));
1196
        return -1;
1294
        return -1;
1197
    }
1295
    }
1198
    }
1296
    }
1199
1297
1200
    return ndb->mset.get_matches_lower_bound();
1298
    return m_ndb->mset.get_matches_lower_bound();
1201
}
1202
1203
// This class (friend to RclDb) exists so that we can have functions that 
1204
// access private RclDb data and have Xapian-specific parameters (so that we 
1205
// don't want them to appear in the public rcldb.h).
1206
class DbPops {
1207
 public:
1208
};
1299
}
1209
1300
1210
bool Native::dbDataToRclDoc(std::string &data, Doc &doc, 
1301
bool Native::dbDataToRclDoc(std::string &data, Doc &doc, 
1211
                int qopts,
1302
                int qopts,
1212
                Xapian::docid docid, const list<string>& terms)
1303
                Xapian::docid docid, const list<string>& terms)
1213
{
1304
{
...
...
1250
// sequence to the internal Xapian hole-y one (the holes being the documents 
1341
// sequence to the internal Xapian hole-y one (the holes being the documents 
1251
// that dont match the filter).
1342
// that dont match the filter).
1252
bool Db::getDoc(int exti, Doc &doc, int *percent)
1343
bool Db::getDoc(int exti, Doc &doc, int *percent)
1253
{
1344
{
1254
    LOGDEB1(("Db::getDoc: exti %d\n", exti));
1345
    LOGDEB1(("Db::getDoc: exti %d\n", exti));
1255
    if (!ndb || !ndb->enquire) {
1346
    if (!m_ndb || !m_ndb->enquire) {
1256
    LOGERR(("Db::getDoc: no query opened\n"));
1347
    LOGERR(("Db::getDoc: no query opened\n"));
1257
    return false;
1348
    return false;
1258
    }
1349
    }
1259
1350
1260
    // For now the only post-query filter is on dir subtree
1351
    // For now the only post-query filter is on dir subtree
...
...
1262
    LOGDEB1(("Topdir %s postqflt %d\n", m_asdata.topdir.c_str(), postqfilter));
1353
    LOGDEB1(("Topdir %s postqflt %d\n", m_asdata.topdir.c_str(), postqfilter));
1263
1354
1264
    int xapi;
1355
    int xapi;
1265
    if (postqfilter) {
1356
    if (postqfilter) {
1266
    // There is a postquery filter, does this fall in already known area ?
1357
    // There is a postquery filter, does this fall in already known area ?
1267
    if (exti >= (int)dbindices.size()) {
1358
    if (exti >= (int)m_dbindices.size()) {
1268
        // Have to fetch xapian docs and filter until we get
1359
        // Have to fetch xapian docs and filter until we get
1269
        // enough or fail
1360
        // enough or fail
1270
        dbindices.reserve(exti+1);
1361
        m_dbindices.reserve(exti+1);
1271
        // First xapian doc we fetch is the one after last stored 
1362
        // First xapian doc we fetch is the one after last stored 
1272
        int first = dbindices.size() > 0 ? dbindices.back() + 1 : 0;
1363
        int first = m_dbindices.size() > 0 ? m_dbindices.back() + 1 : 0;
1273
        // Loop until we get enough docs
1364
        // Loop until we get enough docs
1274
        while (exti >= (int)dbindices.size()) {
1365
        while (exti >= (int)m_dbindices.size()) {
1275
        LOGDEB(("Db::getDoc: fetching %d starting at %d\n",
1366
        LOGDEB(("Db::getDoc: fetching %d starting at %d\n",
1276
            qquantum, first));
1367
            qquantum, first));
1277
        try {
1368
        try {
1278
            ndb->mset = ndb->enquire->get_mset(first, qquantum);
1369
            m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
1279
        } catch (const Xapian::DatabaseModifiedError &error) {
1370
        } catch (const Xapian::DatabaseModifiedError &error) {
1280
            ndb->db.reopen();
1371
            m_ndb->db.reopen();
1281
            ndb->mset = ndb->enquire->get_mset(first, qquantum);
1372
            m_ndb->mset = m_ndb->enquire->get_mset(first, qquantum);
1282
        } catch (const Xapian::Error & error) {
1373
        } catch (const Xapian::Error & error) {
1283
          LOGERR(("enquire->get_mset: exception: %s\n", 
1374
          LOGERR(("enquire->get_mset: exception: %s\n", 
1284
              error.get_msg().c_str()));
1375
              error.get_msg().c_str()));
1285
          abort();
1376
          abort();
1286
        }
1377
        }
1287
1378
1288
        if (ndb->mset.empty()) {
1379
        if (m_ndb->mset.empty()) {
1289
            LOGDEB(("Db::getDoc: got empty mset\n"));
1380
            LOGDEB(("Db::getDoc: got empty mset\n"));
1290
            return false;
1381
            return false;
1291
        }
1382
        }
1292
        first = ndb->mset.get_firstitem();
1383
        first = m_ndb->mset.get_firstitem();
1293
        for (unsigned int i = 0; i < ndb->mset.size() ; i++) {
1384
        for (unsigned int i = 0; i < m_ndb->mset.size() ; i++) {
1294
            LOGDEB(("Db::getDoc: [%d]\n", i));
1385
            LOGDEB(("Db::getDoc: [%d]\n", i));
1295
            Xapian::Document xdoc = ndb->mset[i].get_document();
1386
            Xapian::Document xdoc = m_ndb->mset[i].get_document();
1296
            if (ndb->filterMatch(this, xdoc)) {
1387
            if (m_ndb->filterMatch(this, xdoc)) {
1297
            dbindices.push_back(first + i);
1388
            m_dbindices.push_back(first + i);
1298
            }
1389
            }
1299
        }
1390
        }
1300
        first = first + ndb->mset.size();
1391
        first = first + m_ndb->mset.size();
1301
        }
1392
        }
1302
    }
1393
    }
1303
    xapi = dbindices[exti];
1394
    xapi = m_dbindices[exti];
1304
    } else {
1395
    } else {
1305
    xapi = exti;
1396
    xapi = exti;
1306
    }
1397
    }
1307
1398
1308
1399
1309
    // From there on, we work with a xapian enquire item number. Fetch it
1400
    // From there on, we work with a xapian enquire item number. Fetch it
1310
    int first = ndb->mset.get_firstitem();
1401
    int first = m_ndb->mset.get_firstitem();
1311
    int last = first + ndb->mset.size() -1;
1402
    int last = first + m_ndb->mset.size() -1;
1312
1403
1313
    if (!(xapi >= first && xapi <= last)) {
1404
    if (!(xapi >= first && xapi <= last)) {
1314
    LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
1405
    LOGDEB(("Fetching for first %d, count %d\n", xapi, qquantum));
1315
    try {
1406
    try {
1316
      ndb->mset = ndb->enquire->get_mset(xapi, qquantum);
1407
      m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum);
1317
    } catch (const Xapian::DatabaseModifiedError &error) {
1408
    } catch (const Xapian::DatabaseModifiedError &error) {
1318
        ndb->db.reopen();
1409
        m_ndb->db.reopen();
1319
        ndb->mset = ndb->enquire->get_mset(xapi, qquantum);
1410
        m_ndb->mset = m_ndb->enquire->get_mset(xapi, qquantum);
1320
    } catch (const Xapian::Error & error) {
1411
    } catch (const Xapian::Error & error) {
1321
      LOGERR(("enquire->get_mset: exception: %s\n", 
1412
      LOGERR(("enquire->get_mset: exception: %s\n", 
1322
          error.get_msg().c_str()));
1413
          error.get_msg().c_str()));
1323
      abort();
1414
      abort();
1324
    }
1415
    }
1325
    if (ndb->mset.empty())
1416
    if (m_ndb->mset.empty())
1326
        return false;
1417
        return false;
1327
    first = ndb->mset.get_firstitem();
1418
    first = m_ndb->mset.get_firstitem();
1328
    last = first + ndb->mset.size() -1;
1419
    last = first + m_ndb->mset.size() -1;
1329
    }
1420
    }
1330
1421
1331
    LOGDEB1(("Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
1422
    LOGDEB1(("Db::getDoc: Qry [%s] win [%d-%d] Estimated results: %d",
1332
         ndb->query.get_description().c_str(), 
1423
         m_ndb->query.get_description().c_str(), 
1333
         first, last,
1424
         first, last,
1334
         ndb->mset.get_matches_lower_bound()));
1425
         m_ndb->mset.get_matches_lower_bound()));
1335
1426
1336
    Xapian::Document xdoc = ndb->mset[xapi-first].get_document();
1427
    Xapian::Document xdoc = m_ndb->mset[xapi-first].get_document();
1337
    Xapian::docid docid = *(ndb->mset[xapi-first]);
1428
    Xapian::docid docid = *(m_ndb->mset[xapi-first]);
1338
    if (percent)
1429
    if (percent)
1339
    *percent = ndb->mset.convert_to_percent(ndb->mset[xapi-first]);
1430
    *percent = m_ndb->mset.convert_to_percent(m_ndb->mset[xapi-first]);
1340
1431
1341
    // Parse xapian document's data and populate doc fields
1432
    // Parse xapian document's data and populate doc fields
1342
    string data = xdoc.get_data();
1433
    string data = xdoc.get_data();
1343
    list<string> terms;
1434
    list<string> terms;
1344
    getQueryTerms(terms);
1435
    getQueryTerms(terms);
1345
    return ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
1436
    return m_ndb->dbDataToRclDoc(data, doc, m_qOpts, docid, terms);
1346
}
1437
}
1347
1438
1348
// Retrieve document defined by file name and internal path. Very inefficient,
1439
// Retrieve document defined by file name and internal path. Very inefficient,
1349
// used only for history display. We'd need to enter path+ipath terms in the
1440
// used only for history display. We'd need to enter path+ipath terms in the
1350
// db if we wanted to make this more efficient.
1441
// db if we wanted to make this more efficient.
1351
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
1442
bool Db::getDoc(const string &fn, const string &ipath, Doc &doc, int *pc)
1352
{
1443
{
1353
    LOGDEB(("Db:getDoc: [%s] (%d) [%s]\n", fn.c_str(), fn.length(),
1444
    LOGDEB(("Db:getDoc: [%s] (%d) [%s]\n", fn.c_str(), fn.length(),
1354
        ipath.c_str()));
1445
        ipath.c_str()));
1355
    if (ndb == 0)
1446
    if (m_ndb == 0)
1356
    return false;
1447
    return false;
1357
1448
1358
    // Initialize what we can in any case. If this is history, caller
1449
    // Initialize what we can in any case. If this is history, caller
1359
    // will make partial display in case of error
1450
    // will make partial display in case of error
1360
    doc.ipath = ipath;
1451
    doc.ipath = ipath;
...
...
1367
    string pathterm  = "P" + hash;
1458
    string pathterm  = "P" + hash;
1368
    // Look for all documents with this path, searching for the one
1459
    // Look for all documents with this path, searching for the one
1369
    // with the appropriate ipath. This is very inefficient.
1460
    // with the appropriate ipath. This is very inefficient.
1370
    const char *ermsg = "";
1461
    const char *ermsg = "";
1371
    try {
1462
    try {
1372
    if (!ndb->db.term_exists(pathterm)) {
1463
    if (!m_ndb->db.term_exists(pathterm)) {
1373
        // Document found in history no longer in the database.
1464
        // Document found in history no longer in the database.
1374
        // We return true (because their might be other ok docs further)
1465
        // We return true (because their might be other ok docs further)
1375
        // but indicate the error with pc = -1
1466
        // but indicate the error with pc = -1
1376
        if (*pc) 
1467
        if (*pc) 
1377
        *pc = -1;
1468
        *pc = -1;
1378
        LOGINFO(("Db:getDoc: path inexistant: [%s] length %d\n",
1469
        LOGINFO(("Db:getDoc: path inexistant: [%s] length %d\n",
1379
             pathterm.c_str(), pathterm.length()));
1470
             pathterm.c_str(), pathterm.length()));
1380
        return true;
1471
        return true;
1381
    }
1472
    }
1382
    for (Xapian::PostingIterator docid = 
1473
    for (Xapian::PostingIterator docid = 
1383
         ndb->db.postlist_begin(pathterm);
1474
         m_ndb->db.postlist_begin(pathterm);
1384
         docid != ndb->db.postlist_end(pathterm); docid++) {
1475
         docid != m_ndb->db.postlist_end(pathterm); docid++) {
1385
1476
1386
        Xapian::Document xdoc = ndb->db.get_document(*docid);
1477
        Xapian::Document xdoc = m_ndb->db.get_document(*docid);
1387
        string data = xdoc.get_data();
1478
        string data = xdoc.get_data();
1388
        list<string> terms;
1479
        list<string> terms;
1389
        if (ndb->dbDataToRclDoc(data, doc, QO_NONE, *docid, terms) 
1480
        if (m_ndb->dbDataToRclDoc(data, doc, QO_NONE, *docid, terms) 
1390
        && doc.ipath == ipath)
1481
        && doc.ipath == ipath)
1391
        return true;
1482
        return true;
1392
    }
1483
    }
1393
    } catch (const Xapian::Error &e) {
1484
    } catch (const Xapian::Error &e) {
1394
    ermsg = e.get_msg().c_str();
1485
    ermsg = e.get_msg().c_str();