Switch to unified view

a/src/index/recollindex.cpp b/src/index/recollindex.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.2 2004-12-14 17:54:16 dockes Exp $ (C) 2004 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: recollindex.cpp,v 1.3 2004-12-15 15:00:37 dockes Exp $ (C) 2004 J.F.Dockes";
3
#endif
3
#endif
4
5
#include <sys/stat.h>
4
6
5
#include <strings.h>
7
#include <strings.h>
6
8
7
#include <iostream>
9
#include <iostream>
8
10
...
...
12
#include "fstreewalk.h"
14
#include "fstreewalk.h"
13
#include "mimetype.h"
15
#include "mimetype.h"
14
#include "rcldb.h"
16
#include "rcldb.h"
15
#include "readfile.h"
17
#include "readfile.h"
16
#include "indexer.h"
18
#include "indexer.h"
19
#include "csguess.h"
20
#include "transcode.h"
17
21
18
using namespace std;
22
using namespace std;
19
23
20
24
21
Rcl::Doc* textPlainToDoc(RclConfig *conf, const string &fn, 
25
bool textPlainToDoc(RclConfig *conf, const string &fn, 
22
           const string &mtype)
26
           const string &mtype, Rcl::Doc &docout)
23
{
27
{
28
    string otext;
29
    if (!file_to_string(fn, otext))
30
  return false;
31
  
32
    // Try to guess charset, then convert to utf-8, and fill document fields
33
    string charset;
34
    if (conf->guesscharset) {
35
  charset = csguess(otext, conf->defcharset);
36
    } else
37
  charset = conf->defcharset;
38
    string utf8;
39
    if (transcode(otext, charset, utf8, "UTF-8"))
40
  return 0;
41
42
    Rcl::Doc out;
43
    out.origcharset = charset;
44
    out.text = utf8;
45
    docout = out;
24
    return 0;
46
    return true;
25
}
47
}
26
48
49
// Map of mime types to internal interner functions. This could just as well 
50
// be an if else if suite inside getMimeHandler(), but this is prettier ?
27
static map<string, MimeHandlerFunc> ihandlers;
51
static map<string, MimeHandlerFunc> ihandlers;
52
// Static object to get the map to be initialized at program start.
28
class IHandler_Init {
53
class IHandler_Init {
29
 public:
54
 public:
30
    IHandler_Init() {
55
    IHandler_Init() {
31
    ihandlers["text/plain"] = textPlainToDoc;
56
    ihandlers["text/plain"] = textPlainToDoc;
57
  // Add new associations here when needed
32
    }
58
    }
33
};
59
};
34
static IHandler_Init ihandleriniter;
60
static IHandler_Init ihandleriniter;
61
35
62
36
/**
63
/**
37
 * Return handler function for given mime type
64
 * Return handler function for given mime type
38
 */
65
 */
39
MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
66
MimeHandlerFunc getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
...
...
73
    } else {
100
    } else {
74
    return 0;
101
    return 0;
75
    }
102
    }
76
}
103
}
77
104
105
/**
106
 * Bunch holder for data used while indexing a directory tree
107
 */
78
class DirIndexer {
108
class DirIndexer {
79
    FsTreeWalker walker;
109
    FsTreeWalker walker;
80
    RclConfig *config;
110
    RclConfig *config;
81
    string topdir;
111
    string topdir;
82
    string dbdir;
112
    string dbdir;
...
...
93
    void index();
123
    void index();
94
};
124
};
95
125
96
void DirIndexer::index()
126
void DirIndexer::index()
97
{
127
{
98
#if 0
99
    if (!db.open(dbdir, Rcl::Db::DbUpd)) {
128
    if (!db.open(dbdir, Rcl::Db::DbUpd)) {
100
    cerr << "Error opening database in " << dbdir << " for " <<
129
    cerr << "Error opening database in " << dbdir << " for " <<
101
        topdir << endl;
130
        topdir << endl;
102
    return;
131
    return;
103
    }
132
    }
104
#endif
105
    walker.walk(topdir, indexfile, this);
133
    walker.walk(topdir, indexfile, this);
106
#if 0
107
    if (!db.close()) {
134
    if (!db.close()) {
108
    cerr << "Error closing database in " << dbdir << " for " <<
135
    cerr << "Error closing database in " << dbdir << " for " <<
109
        topdir << endl;
136
        topdir << endl;
110
    return;
137
    return;
111
    }
138
    }
112
#endif
113
}
139
}
114
140
141
/** 
142
 * This function gets called for every file and directory found by the
143
 * tree walker. Adjust parameters and index files if/when needed.
144
 */
115
FsTreeWalker::Status 
145
FsTreeWalker::Status 
116
indexfile(void *cdata, const std::string &fn, const struct stat *stp, 
146
indexfile(void *cdata, const std::string &fn, const struct stat *stp, 
117
      FsTreeWalker::CbFlag flg)
147
      FsTreeWalker::CbFlag flg)
118
{
148
{
119
    DirIndexer *me = (DirIndexer *)cdata;
149
    DirIndexer *me = (DirIndexer *)cdata;
...
...
142
    if (!fun) {
172
    if (!fun) {
143
    // No handler for this type, for now :(
173
    // No handler for this type, for now :(
144
    return FsTreeWalker::FtwOk;
174
    return FsTreeWalker::FtwOk;
145
    }
175
    }
146
176
147
    // Check if file has already been indexed, and has changed since
177
    if (!me->db.needUpdate(fn, stp))
148
    // - Make path term, 
178
  return FsTreeWalker::FtwOk;
149
    // - query db: postlist_begin->docid
150
    // - fetch doc (get_document(docid)
151
    // - check date field, maybe skip
152
179
153
    // Turn file into a document. The document has fields for title, body 
180
    // Turn file into a document. The document has fields for title, body 
154
    // etc.,  all text converted to utf8
181
    // etc.,  all text converted to utf8
155
    Rcl::Doc *doc = fun(me->config, fn,  mime);
182
    Rcl::Doc doc;
183
    if (!fun(me->config, fn,  mime, doc))
184
  return FsTreeWalker::FtwOk;
156
185
157
#if 0
158
    // Set up xapian document, add postings and misc fields, 
186
    // Set up xapian document, add postings and misc fields, 
159
    // add to or update database.
187
    // add to or update database.
160
    dbadd(doc);
188
    if (!me->db.add(fn, doc))
161
#endif
189
  return FsTreeWalker::FtwError;
162
190
163
    return FsTreeWalker::FtwOk;
191
    return FsTreeWalker::FtwOk;
164
}
192
}
193
165
194
166
195
167
int main(int argc, const char **argv)
196
int main(int argc, const char **argv)
168
{
197
{
169
    RclConfig *config = new RclConfig;
198
    RclConfig *config = new RclConfig;
...
...
178
    cerr << "No top directories in configuration" << endl;
207
    cerr << "No top directories in configuration" << endl;
179
    exit(1);
208
    exit(1);
180
    }
209
    }
181
    vector<string> tdl;
210
    vector<string> tdl;
182
    if (ConfTree::stringToStrings(topdirs, tdl)) {
211
    if (ConfTree::stringToStrings(topdirs, tdl)) {
183
    for (int i = 0; i < tdl.size(); i++) {
212
    for (unsigned int i = 0; i < tdl.size(); i++) {
184
        string topdir = tdl[i];
213
        string topdir = tdl[i];
185
        cout << topdir << endl;
214
        cout << topdir << endl;
186
        string dbdir;
215
        string dbdir;
187
        if (conf->get("dbdir", dbdir, topdir) == 0) {
216
        if (conf->get("dbdir", dbdir, topdir) == 0) {
188
        cerr << "No database directory in configuration for " 
217
        cerr << "No database directory in configuration for "