Switch to unified view

a/src/index/indexer.cpp b/src/index/indexer.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.7 2005-03-17 14:02:05 dockes Exp $ (C) 2004 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: indexer.cpp,v 1.8 2005-03-17 15:35:49 dockes Exp $ (C) 2004 J.F.Dockes";
3
#endif
3
#endif
4
#include <stdio.h>
4
#include <stdio.h>
5
#include <sys/stat.h>
5
#include <sys/stat.h>
6
6
7
#include <strings.h>
7
#include <strings.h>
...
...
30
#ifndef deleteZ
30
#ifndef deleteZ
31
#define deleteZ(X) {delete X;X = 0;}
31
#define deleteZ(X) {delete X;X = 0;}
32
#endif
32
#endif
33
33
34
/**
34
/**
35
 * Bunch holder for data used while indexing a directory tree
35
 * Bunch holder for data used while indexing a directory tree. This also the
36
 * tree walker callback object (the processone method gets called for every 
37
 * file or directory).
36
 */
38
 */
37
class DbIndexer : public FsTreeWalkerCB {
39
class DbIndexer : public FsTreeWalkerCB {
38
    FsTreeWalker walker;
40
    FsTreeWalker walker;
39
    RclConfig *config;
41
    RclConfig *config;
40
    string dbdir;
42
    string dbdir;
...
...
45
    DbIndexer(RclConfig *cnf, const string &dbd, list<string> *top) 
47
    DbIndexer(RclConfig *cnf, const string &dbd, list<string> *top) 
46
    : config(cnf), dbdir(dbd), topdirs(top)
48
    : config(cnf), dbdir(dbd), topdirs(top)
47
    { }
49
    { }
48
50
49
    virtual ~DbIndexer() {
51
    virtual ~DbIndexer() {
52
  // Maybe clean up temporary directory
50
    if (tmpdir.length()) {
53
    if (tmpdir.length()) {
51
        wipedir(tmpdir);
54
        wipedir(tmpdir);
52
        if (rmdir(tmpdir.c_str()) < 0) {
55
        if (rmdir(tmpdir.c_str()) < 0) {
53
        LOGERR(("DbIndexer::~DbIndexer: cant clear temp dir %s\n",
56
        LOGERR(("DbIndexer::~DbIndexer: cant clear temp dir %s\n",
54
            tmpdir.c_str()));
57
            tmpdir.c_str()));
...
...
57
    }
60
    }
58
61
59
    FsTreeWalker::Status 
62
    FsTreeWalker::Status 
60
    processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
63
    processone(const std::string &, const struct stat *, FsTreeWalker::CbFlag);
61
64
65
    // The top level entry point. 
62
    bool index();
66
    bool index();
63
};
67
};
64
68
69
70
// Top level file system tree index method for updating a given database.
71
//
72
// We create the temporary directory, open the database, then call a
73
// file system walk for each top-level directory.
74
// When walking is done, we create the stem databases and close the main db.
65
bool DbIndexer::index()
75
bool DbIndexer::index()
66
{
76
{
67
    string tdir;
77
    string tdir;
68
78
69
    if (!maketmpdir(tmpdir)) {
79
    if (!maketmpdir(tmpdir)) {
...
...
109
119
110
120
111
/** 
121
/** 
112
 * This function gets called for every file and directory found by the
122
 * This function gets called for every file and directory found by the
113
 * tree walker. It checks with the db if the file has changed and needs to
123
 * tree walker. It checks with the db if the file has changed and needs to
114
 * be reindexed. If so, it calls an appropriate handler depending on the mime
124
 * be reindexed. If so, it calls internfile() which will identify the
115
 * type, which is responsible for populating an Rcl::Doc.
125
 * file type and call an appropriate handler to create documents in
126
 * internal form, which we then add to the database.
127
 *
116
 * Accent and majuscule handling are performed by the db module when doing
128
 * Accent and majuscule handling are performed by the db module when doing
117
 * the actual indexing work.
129
 * the actual indexing work. The Rcl::Doc created by internfile()
130
   contains pretty raw utf8 data.
118
 */
131
 */
119
FsTreeWalker::Status 
132
FsTreeWalker::Status 
120
DbIndexer::processone(const std::string &fn, const struct stat *stp, 
133
DbIndexer::processone(const std::string &fn, const struct stat *stp, 
121
           FsTreeWalker::CbFlag flg)
134
           FsTreeWalker::CbFlag flg)
122
{
135
{
...
...
162
    string topdirs;
175
    string topdirs;
163
    if (conf->get("topdirs", topdirs, "") == 0) {
176
    if (conf->get("topdirs", topdirs, "") == 0) {
164
    LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
177
    LOGERR(("ConfIndexer::index: no top directories in configuration\n"));
165
    return false;
178
    return false;
166
    }
179
    }
167
168
    // Group the directories by database: it is important that all
169
    // directories for a database be indexed at once so that deleted
170
    // file cleanup works 
171
    list<string> tdl; // List of directories to be indexed
180
    list<string> tdl; // List of directories to be indexed
172
    if (!ConfTree::stringToStrings(topdirs, tdl)) {
181
    if (!ConfTree::stringToStrings(topdirs, tdl)) {
173
    LOGERR(("ConfIndexer::index: parse error for directory list\n"));
182
    LOGERR(("ConfIndexer::index: parse error for directory list\n"));
174
    return false;
183
    return false;
175
    }
184
    }
176
185
186
    // Each top level directory to be indexed can be associated with a
187
    // different database. We first group the directories by database:
188
    // it is important that all directories for a database be indexed
189
    // at once so that deleted file cleanup works
177
    list<string>::iterator dirit;
190
    list<string>::iterator dirit;
178
    map<string, list<string> > dbmap;
191
    map<string, list<string> > dbmap;
179
    map<string, list<string> >::iterator dbit;
192
    map<string, list<string> >::iterator dbit;
180
    for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
193
    for (dirit = tdl.begin(); dirit != tdl.end(); dirit++) {
181
    string db;
194
    string db;
...
...
194
    } else {
207
    } else {
195
        dbit->second.push_back(dir);
208
        dbit->second.push_back(dir);
196
    }
209
    }
197
    }
210
    }
198
211
212
    // Index each directory group in turn
199
    for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
213
    for (dbit = dbmap.begin(); dbit != dbmap.end(); dbit++) {
200
    //cout << dbit->first << " -> ";
214
    //cout << dbit->first << " -> ";
201
    //list<string>::const_iterator dit;
215
    //list<string>::const_iterator dit;
202
    //for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) {
216
    //for (dit = dbit->second.begin(); dit != dbit->second.end(); dit++) {
203
    //    cout << *dit << " ";
217
    //    cout << *dit << " ";