a/src/index/beaglequeue.cpp b/src/index/beaglequeue.cpp
...
...
45
45
46
#include <sys/stat.h>
46
#include <sys/stat.h>
47
47
48
const string keybght("beagleHitType");
48
const string keybght("beagleHitType");
49
49
50
#define LL 2048
51
50
51
// Beagle creates a file named .xxx (where xxx is the name for the main file
52
// in the queue), to hold external metadata (http or created by Beagle).
53
// This class reads the .xxx, dotfile, and turns it into an Rcl::Doc holder
52
class BeagleDotFile {
54
class BeagleDotFile {
53
public:
55
public:
54
    BeagleDotFile(RclConfig *conf, const string& fn)
56
    BeagleDotFile(RclConfig *conf, const string& fn)
55
        : m_conf(conf), m_fn(fn)
57
        : m_conf(conf), m_fn(fn)
56
    { }
58
    {}
57
59
60
    // Read input line, strip it of eol and return as c++ string
58
    bool readLine(string& line)
61
    bool readLine(string& line)
59
    {
62
    {
63
        static const int LL = 2048;
60
        char cline[LL]; 
64
        char cline[LL]; 
61
        cline[0] = 0;
65
        cline[0] = 0;
62
        m_input.getline(cline, LL-1);
66
        m_input.getline(cline, LL-1);
63
        if (!m_input.good()) {
67
        if (!m_input.good()) {
64
            if (m_input.bad()) {
68
            if (m_input.bad()) {
...
...
99
        doc.meta[keybght] = line;
103
        doc.meta[keybght] = line;
100
        if (!readLine(line))
104
        if (!readLine(line))
101
            return false;
105
            return false;
102
        doc.mimetype = line;
106
        doc.mimetype = line;
103
107
104
        // We set the bookmarks mtype as html, the text is empty
108
        // We set the bookmarks mtype as html (the text is empty
105
        // anyway, so that the html viewer will be called on 'Open'
109
        // anyway), so that the html viewer will be called on 'Open'
106
        bool isbookmark = false;
110
        bool isbookmark = false;
107
        if (!stringlowercmp("bookmark", doc.meta[keybght])) {
111
        if (!stringlowercmp("bookmark", doc.meta[keybght])) {
108
            isbookmark = true;
112
            isbookmark = true;
109
            doc.mimetype = "text/html";
113
            doc.mimetype = "text/html";
110
        }
114
        }
...
...
148
            string caname = m_conf->fieldCanon(*it);
152
            string caname = m_conf->fieldCanon(*it);
149
            doc.meta[caname].append(ss + *valuep);
153
            doc.meta[caname].append(ss + *valuep);
150
        }
154
        }
151
155
152
        // Finally build the confsimple that we will save to the
156
        // Finally build the confsimple that we will save to the
153
        // cache, out of document fields. This could also be done in
157
        // cache, from the doc fields. This could also be done in
154
        // parallel with the doc.meta build above, but simpler this way.
158
        // parallel with the doc.meta build above, but simpler this
159
        // way.  We need it because not all interesting doc fields are
160
        // in the meta array (ie: mimetype, url), and we want
161
        // something homogenous and easy to save.
155
        for (map<string,string>::const_iterator it = doc.meta.begin();
162
        for (map<string,string>::const_iterator it = doc.meta.begin();
156
             it != doc.meta.end(); it++) {
163
             it != doc.meta.end(); it++) {
157
            m_fields.set((*it).first, (*it).second, "");
164
            m_fields.set((*it).first, (*it).second, "");
158
        }
165
        }
159
        m_fields.set("url", doc.url, "");
166
        m_fields.set("url", doc.url, "");
...
...
167
    string m_fn;
174
    string m_fn;
168
    ifstream m_input;
175
    ifstream m_input;
169
};
176
};
170
177
171
const string badtmpdirname = "/no/such/dir/really/can/exist";
178
const string badtmpdirname = "/no/such/dir/really/can/exist";
179
180
// Initialize. Compute paths and create a temporary directory that will be
181
// used by internfile()
172
BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
182
BeagleQueueIndexer::BeagleQueueIndexer(RclConfig *cnf, Rcl::Db *db,
173
                                       DbIxStatusUpdater *updfunc)
183
                                       DbIxStatusUpdater *updfunc)
174
    : m_config(cnf), m_db(db), m_cache(0), m_updater(updfunc), 
184
    : m_config(cnf), m_db(db), m_cache(0), m_updater(updfunc), 
175
      m_nocacheindex(false)
185
      m_nocacheindex(false)
176
{
186
{
...
...
214
    }
224
    }
215
    }
225
    }
216
    deleteZ(m_cache);
226
    deleteZ(m_cache);
217
}
227
}
218
228
229
// Read  document from cache. Return the metadata as an Rcl::Doc
230
// @param htt Beagle Hit Type 
219
bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc, 
231
bool BeagleQueueIndexer::getFromCache(const string& udi, Rcl::Doc &dotdoc, 
220
                                      string& data, string *htt)
232
                                      string& data, string *htt)
221
{
233
{
222
    string dict;
234
    string dict;
223
235
...
...
241
        cf.get(*it, dotdoc.meta[*it], "");
253
        cf.get(*it, dotdoc.meta[*it], "");
242
    }
254
    }
243
    return true;
255
    return true;
244
}
256
}
245
257
258
// Index document stored in the cache. 
246
bool BeagleQueueIndexer::indexFromCache(const string& udi)
259
bool BeagleQueueIndexer::indexFromCache(const string& udi)
247
{
260
{
248
    if (!m_db)
261
    if (!m_db)
249
        return false;
262
        return false;
250
263
...
...
302
315
303
bool BeagleQueueIndexer::index()
316
bool BeagleQueueIndexer::index()
304
{
317
{
305
    if (!m_db)
318
    if (!m_db)
306
        return false;
319
        return false;
307
    LOGDEB(("BeagleQueueIndexer::processqueue: dir: [%s]\n", 
320
    LOGDEB(("BeagleQueueIndexer::processqueue: [%s]\n", m_queuedir.c_str()));
308
            m_queuedir.c_str()));
309
    m_config->setKeyDir(m_queuedir);
321
    m_config->setKeyDir(m_queuedir);
310
322
311
    // First check that files in the cache are in the index, in case this
323
    // First check/index files found in the cache. If the index was reset,
324
    // this actually does work, else it sets the existence flags (avoid
312
    // has been reset. We don't do this when called from indexFiles
325
    // purging). We don't do this when called from indexFiles
313
    if (!m_nocacheindex) {
326
    if (!m_nocacheindex) {
314
        bool eof;
327
        bool eof;
315
        if (!m_cache->rewind(eof)) {
328
        if (!m_cache->rewind(eof)) {
329
            // rewind can return eof if the cache is empty
316
            if (!eof)
330
            if (!eof)
317
                return false;
331
                return false;
318
        }
332
        }
333
334
        // The cache is walked in chronogical order, but we want to
335
        // index the newest files first (there can be several versions
336
        // of a given file in the cache). Have to revert the
337
        // list. This would be a problem with a big cache, because the
338
        // udis can be big (ie 150 chars), and would be more
339
        // efficiently performed by the cache, which could use the
340
        // smaller offsets.
341
        //
342
        // Another approach would be to just walk chronogical and
343
        // reindex all versions: would waste processing but save
344
        // memory
319
        vector<string> alludis;
345
        vector<string> alludis;
320
        alludis.reserve(20000);
346
        alludis.reserve(20000);
321
        while (m_cache->next(eof)) {
347
        while (m_cache->next(eof)) {
322
            string dict;
348
            string dict;
323
            m_cache->getcurrentdict(dict);
349
            m_cache->getcurrentdict(dict);
...
...
338
                }
364
                }
339
            }
365
            }
340
        }
366
        }
341
    }
367
    }
342
368
369
    // Finally index the queue
343
    FsTreeWalker walker(FsTreeWalker::FtwNoRecurse);
370
    FsTreeWalker walker(FsTreeWalker::FtwNoRecurse);
344
    walker.addSkippedName(".*");
371
    walker.addSkippedName(".*");
345
    FsTreeWalker::Status status =walker.walk(m_queuedir, *this);
372
    FsTreeWalker::Status status =walker.walk(m_queuedir, *this);
346
    LOGDEB(("BeagleQueueIndexer::processqueue: done: status %d\n", status));
373
    LOGDEB(("BeagleQueueIndexer::processqueue: done: status %d\n", status));
347
    return true;
374
    return true;
348
}
375
}
349
376
377
// Index a list of files (sent by the real time monitor)
350
bool BeagleQueueIndexer::indexFiles(list<string>& files)
378
bool BeagleQueueIndexer::indexFiles(list<string>& files)
351
{
379
{
352
    LOGDEB(("BeagleQueueIndexer::indexFiles\n"));
380
    LOGDEB(("BeagleQueueIndexer::indexFiles\n"));
353
381
354
    if (!m_db) {
382
    if (!m_db) {
...
...
487
        doc.meta[Rcl::Doc::keybcknd] = "BGL";
515
        doc.meta[Rcl::Doc::keybcknd] = "BGL";
488
        if (!m_db->addOrUpdate(udi, "", doc)) 
516
        if (!m_db->addOrUpdate(udi, "", doc)) 
489
            return FsTreeWalker::FtwError;
517
            return FsTreeWalker::FtwError;
490
    }
518
    }
491
519
492
493
    // Copy to cache
520
    // Copy to cache
494
    {
521
    {
495
        // doc fields not in meta, needing saving to the cache
522
        // doc fields not in meta, needing saving to the cache
496
        dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
523
        dotfile.m_fields.set("fmtime", dotdoc.fmtime, "");
497
        dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");
524
        dotfile.m_fields.set("fbytes", dotdoc.fbytes, "");