Switch to unified view

a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.5 2004-12-17 15:50:48 dockes Exp $ (C) 2004 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.6 2005-01-24 13:17:58 dockes Exp $ (C) 2004 J.F.Dockes";
3
#endif
3
#endif
4
4
5
#include <sys/stat.h>
5
#include <sys/stat.h>
6
6
7
#include <iostream>
7
#include <iostream>
...
...
12
12
13
#include "rcldb.h"
13
#include "rcldb.h"
14
#include "textsplit.h"
14
#include "textsplit.h"
15
#include "transcode.h"
15
#include "transcode.h"
16
#include "unacpp.h"
16
#include "unacpp.h"
17
#include "conftree.h"
17
18
18
#include "xapian.h"
19
#include "xapian.h"
19
20
20
// Data for a xapian database
21
// Data for a xapian database. There could actually be 2 different ones for
22
// indexing or query as there is not much in common.
21
class Native {
23
class Native {
22
 public:
24
 public:
23
    bool isopen;
25
    bool isopen;
24
    bool iswritable;
26
    bool iswritable;
25
    class Xapian::Database db;
27
    // Indexing
26
    class Xapian::WritableDatabase wdb;
28
    Xapian::WritableDatabase wdb;
27
    vector<bool> updated;
29
    vector<bool> updated;
28
30
31
    // Querying
32
    Xapian::Database db;
33
    Xapian::Query query;
29
    Native() : isopen(false), iswritable(false) {}
34
    Native() : isopen(false), iswritable(false) {}
30
35
31
};
36
};
32
37
33
Rcl::Db::Db() 
38
Rcl::Db::Db() 
...
...
35
    pdata = new Native;
40
    pdata = new Native;
36
}
41
}
37
42
38
Rcl::Db::~Db()
43
Rcl::Db::~Db()
39
{
44
{
45
    cerr << "Rcl::Db::~Db" << endl;
40
    if (pdata == 0)
46
    if (pdata == 0)
41
    return;
47
    return;
42
    Native *ndb = (Native *)pdata;
48
    Native *ndb = (Native *)pdata;
43
    cerr << "Db::~Db: isopen " << ndb->isopen << " iswritable " <<
49
    cerr << "Db::~Db: isopen " << ndb->isopen << " iswritable " <<
44
    ndb->iswritable << endl;
50
    ndb->iswritable << endl;
45
    try {
51
    try {
46
    // There is nothing to do for an ro db.
52
    // There is nothing to do for an ro db.
47
    if (ndb->isopen == false || ndb->iswritable == false) {
53
    if (ndb->isopen == false || ndb->iswritable == false) {
54
      cerr << "Deleting native database" << endl;
48
        delete ndb;
55
        delete ndb;
49
        return;
56
        return;
50
    }
57
    }
51
    ndb->wdb.flush();
58
    ndb->wdb.flush();
52
    delete ndb;
59
    delete ndb;
...
...
80
        ndb->iswritable = true;
87
        ndb->iswritable = true;
81
        break;
88
        break;
82
    case DbRO:
89
    case DbRO:
83
    default:
90
    default:
84
        ndb->iswritable = false;
91
        ndb->iswritable = false;
85
      cerr << "Not ready to open RO yet" << endl;
92
      ndb->db = Xapian::Auto::open(dir, Xapian::DB_OPEN);
86
      exit(1);
93
      break;
87
    }
94
    }
88
    ndb->isopen = true;
95
    ndb->isopen = true;
89
    return true;
96
    return true;
90
    } catch (const Xapian::Error &e) {
97
    } catch (const Xapian::Error &e) {
91
    cout << "Exception: " << e.get_msg() << endl;
98
    cout << "Exception: " << e.get_msg() << endl;
...
...
140
    Xapian::termpos curpos;  // Last position sent to callback
147
    Xapian::termpos curpos;  // Last position sent to callback
141
    wsData(Xapian::Document &d) : doc(d), basepos(1), curpos(0)
148
    wsData(Xapian::Document &d) : doc(d), basepos(1), curpos(0)
142
    {}
149
    {}
143
};
150
};
144
151
152
// Callback for the document to word splitting class during indexation
145
bool splitCb(void *cdata, const std::string &term, int pos)
153
static bool splitCb(void *cdata, const std::string &term, int pos)
146
{
154
{
147
    wsData *data = (wsData*)cdata;
155
    wsData *data = (wsData*)cdata;
148
156
149
    // cerr << "splitCb: term " << term << endl;
157
    // cerr << "splitCb: term " << term << endl;
150
    //string printable;
158
    //string printable;
...
...
170
{
178
{
171
    string inter;
179
    string inter;
172
    out.erase();
180
    out.erase();
173
    if (!unac_cpp(in, inter))
181
    if (!unac_cpp(in, inter))
174
    return false;
182
    return false;
175
    out.resize(inter.length());
183
    out.reserve(inter.length());
176
    for (unsigned int i = 0; i < inter.length(); i++) {
184
    for (unsigned int i = 0; i < inter.length(); i++) {
177
    if (inter[i] >= 'A' && inter[i] <= 'Z')
185
    if (inter[i] >= 'A' && inter[i] <= 'Z')
178
        out += inter[i] + 'a' - 'A';
186
        out += inter[i] + 'a' - 'A';
179
    else
187
    else
180
        out += inter[i];
188
        out += inter[i];
...
...
237
245
238
    if (1 /*dupes == DUPE_replace*/) {
246
    if (1 /*dupes == DUPE_replace*/) {
239
    // If this document has already been indexed, update the existing
247
    // If this document has already been indexed, update the existing
240
    // entry.
248
    // entry.
241
    try {
249
    try {
242
      Xapian::docid did = ndb->wdb.replace_document(pathterm, 
250
#if 0
243
                            newdocument);
251
      Xapian::docid did = 
252
#endif
253
      ndb->wdb.replace_document(pathterm, newdocument);
244
#if 0
254
#if 0
245
        if (did < updated.size()) {
255
        if (did < updated.size()) {
246
        updated[did] = true;
256
        updated[did] = true;
247
        //cout << "updated." << endl;
257
        //cout << "updated." << endl;
248
        } else {
258
        } else {
...
...
295
    return true;
305
    return true;
296
    }
306
    }
297
307
298
    return true;
308
    return true;
299
}
309
}
310
311
#include <vector>
312
313
class wsQData {
314
 public:
315
    vector<string> terms;
316
};
317
318
// Callback for the document to word splitting class during indexation
319
static bool splitQCb(void *cdata, const std::string &term, int )
320
{
321
    wsQData *data = (wsQData*)cdata;
322
323
    cerr << "splitQCb: term '" << term << "'" << endl;
324
    cerr << "splitQCb: term length: " << term.length() <<  endl;
325
    //string printable;
326
    //transcode(term, printable, "UTF-8", "ISO8859-1");
327
    //cerr << "Adding " << printable << endl;
328
329
    data->terms.push_back(term);
330
    return true;
331
}
332
333
bool Rcl::Db::setQuery(const std::string &querystring)
334
{
335
    wsQData splitData;
336
    TextSplit splitter(splitQCb, &splitData);
337
338
    string noacc;
339
    if (!dumb_string(querystring, noacc)) {
340
  return false;
341
    }
342
    //    noacc = querystring;
343
    splitter.text_to_words(noacc);
344
345
    Native *ndb = (Native *)pdata;
346
347
    //        splitData.terms.resize(0);
348
    //        splitData.terms.push_back(string("le"));
349
    ndb->query = Xapian::Query(Xapian::Query::OP_OR, splitData.terms.begin(), 
350
                 splitData.terms.end());
351
352
    return true;
353
}
354
355
bool Rcl::Db::getDoc(int i, Doc &doc)
356
{
357
    // cerr << "Rcl::Db::getDoc: " << i << endl;
358
    Native *ndb = (Native *)pdata;
359
360
    Xapian::Enquire enquire(ndb->db);
361
    enquire.set_query(ndb->query);
362
    Xapian::MSet matches = enquire.get_mset(i, 1);
363
364
    // cerr << "Query `" << ndb->query.get_description() << "'" <<
365
    // "Estimated results: " << matches.get_matches_lower_bound() << endl;
366
367
    if (matches.empty())
368
  return false;
369
370
    Xapian::Document xdoc = matches.begin().get_document();
371
372
    // Parse xapian document's data and populate doc fields
373
    string data = xdoc.get_data();
374
    ConfSimple parms(&data);
375
    parms.get(string("mtype"), doc.mimetype);
376
    parms.get(string("mtime"), doc.mtime);
377
    parms.get(string("url"), doc.url);
378
    return true;
379
}