|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.31 2005-10-20 11:33:49 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.32 2005-11-05 14:40:50 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
#include <stdio.h>
|
4 |
#include <stdio.h>
|
5 |
#include <sys/stat.h>
|
5 |
#include <sys/stat.h>
|
6 |
|
6 |
|
7 |
#include <iostream>
|
7 |
#include <iostream>
|
|
... |
|
... |
336 |
// - sample
|
336 |
// - sample
|
337 |
// - caption (title limited to 100 chars)
|
337 |
// - caption (title limited to 100 chars)
|
338 |
// - mime type
|
338 |
// - mime type
|
339 |
string record = "url=file://" + fn;
|
339 |
string record = "url=file://" + fn;
|
340 |
record += "\nmtype=" + doc.mimetype;
|
340 |
record += "\nmtype=" + doc.mimetype;
|
341 |
record += "\nmtime=" + doc.mtime;
|
341 |
record += "\nfmtime=" + doc.fmtime;
|
|
|
342 |
if (!doc.dmtime.empty())
|
|
|
343 |
record += "\ndmtime=" + doc.dmtime;
|
342 |
record += "\norigcharset=" + doc.origcharset;
|
344 |
record += "\norigcharset=" + doc.origcharset;
|
343 |
record += "\ncaption=" + doc.title;
|
345 |
record += "\ncaption=" + doc.title;
|
344 |
record += "\nkeywords=" + doc.keywords;
|
346 |
record += "\nkeywords=" + doc.keywords;
|
345 |
record += "\nabstract=" + doc.abstract;
|
347 |
record += "\nabstract=" + doc.abstract;
|
346 |
if (!doc.ipath.empty()) {
|
348 |
if (!doc.ipath.empty()) {
|
|
... |
|
... |
349 |
|
351 |
|
350 |
record += "\n";
|
352 |
record += "\n";
|
351 |
LOGDEB1(("Newdocument data: %s\n", record.c_str()));
|
353 |
LOGDEB1(("Newdocument data: %s\n", record.c_str()));
|
352 |
newdocument.set_data(record);
|
354 |
newdocument.set_data(record);
|
353 |
|
355 |
|
354 |
time_t mtime = atol(doc.mtime.c_str());
|
356 |
time_t mtime = atol(doc.dmtime.empty() ? doc.fmtime.c_str() :
|
|
|
357 |
doc.dmtime.c_str());
|
355 |
struct tm *tm = localtime(&mtime);
|
358 |
struct tm *tm = localtime(&mtime);
|
356 |
char buf[9];
|
359 |
char buf[9];
|
357 |
sprintf(buf, "%04d%02d%02d",tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
|
360 |
sprintf(buf, "%04d%02d%02d",tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
|
358 |
newdocument.add_term("D" + string(buf)); // Date (YYYYMMDD)
|
361 |
newdocument.add_term("D" + string(buf)); // Date (YYYYMMDD)
|
359 |
buf[7] = '\0';
|
362 |
buf[7] = '\0';
|
|
... |
|
... |
397 |
string pathterm = "P" + filename;
|
400 |
string pathterm = "P" + filename;
|
398 |
if (!ndb->wdb.term_exists(pathterm)) {
|
401 |
if (!ndb->wdb.term_exists(pathterm)) {
|
399 |
return true;
|
402 |
return true;
|
400 |
}
|
403 |
}
|
401 |
|
404 |
|
402 |
// Look for all documents with this path. Check the update time (once).
|
405 |
// Look for all documents with this path. We need to look at all
|
403 |
// If the db is up to date, set the update flags for all documents
|
406 |
// to set their existence flag.
|
|
|
407 |
// We check the update time on the spe
|
404 |
Xapian::PostingIterator doc;
|
408 |
Xapian::PostingIterator doc;
|
405 |
try {
|
409 |
try {
|
406 |
Xapian::PostingIterator docid0 = ndb->wdb.postlist_begin(pathterm);
|
410 |
Xapian::PostingIterator docid0 = ndb->wdb.postlist_begin(pathterm);
|
407 |
for (Xapian::PostingIterator docid = docid0;
|
411 |
for (Xapian::PostingIterator docid = docid0;
|
408 |
docid != ndb->wdb.postlist_end(pathterm); docid++) {
|
412 |
docid != ndb->wdb.postlist_end(pathterm); docid++) {
|
409 |
|
413 |
|
410 |
Xapian::Document doc = ndb->wdb.get_document(*docid);
|
414 |
Xapian::Document doc = ndb->wdb.get_document(*docid);
|
411 |
|
415 |
|
412 |
// Check the date once. no need to look at the others if the
|
416 |
// Check the date once. no need to look at the others if
|
413 |
// db needs updating.
|
417 |
// the db needs updating. Note that the fmtime used to be
|
|
|
418 |
// called mtime, and we're keeping compat
|
414 |
if (docid == docid0) {
|
419 |
if (docid == docid0) {
|
415 |
string data = doc.get_data();
|
420 |
string data = doc.get_data();
|
416 |
const char *cp = strstr(data.c_str(), "mtime=");
|
421 |
const char *cp = strstr(data.c_str(), "fmtime=");
|
|
|
422 |
if (cp) {
|
|
|
423 |
cp += 7;
|
|
|
424 |
} else {
|
|
|
425 |
cp = strstr(data.c_str(), "mtime=");
|
|
|
426 |
if (cp)
|
417 |
cp += 6;
|
427 |
cp+= 6;
|
|
|
428 |
}
|
418 |
long mtime = atol(cp);
|
429 |
long mtime = cp ? atol(cp) : 0;
|
419 |
if (mtime < stp->st_mtime) {
|
430 |
if (mtime < stp->st_mtime) {
|
|
|
431 |
LOGDEB2(("Need update: Db Doc mtime %ld file mtime %ld\n",
|
|
|
432 |
(long)mtime, (long)stp->st_mtime));
|
420 |
// Db is not up to date. Let's index the file
|
433 |
// Db is not up to date. Let's index the file
|
421 |
return true;
|
434 |
return true;
|
422 |
}
|
435 |
}
|
423 |
}
|
436 |
}
|
424 |
|
437 |
|
|
... |
|
... |
1025 |
string data = xdoc.get_data();
|
1038 |
string data = xdoc.get_data();
|
1026 |
LOGDEB1(("Rcl::Db::getDoc: data: %s\n", data.c_str()));
|
1039 |
LOGDEB1(("Rcl::Db::getDoc: data: %s\n", data.c_str()));
|
1027 |
ConfSimple parms(&data);
|
1040 |
ConfSimple parms(&data);
|
1028 |
parms.get(string("url"), doc.url);
|
1041 |
parms.get(string("url"), doc.url);
|
1029 |
parms.get(string("mtype"), doc.mimetype);
|
1042 |
parms.get(string("mtype"), doc.mimetype);
|
1030 |
parms.get(string("mtime"), doc.mtime);
|
1043 |
parms.get(string("fmtime"), doc.fmtime);
|
|
|
1044 |
parms.get(string("dmtime"), doc.dmtime);
|
1031 |
parms.get(string("origcharset"), doc.origcharset);
|
1045 |
parms.get(string("origcharset"), doc.origcharset);
|
1032 |
parms.get(string("caption"), doc.title);
|
1046 |
parms.get(string("caption"), doc.title);
|
1033 |
parms.get(string("keywords"), doc.keywords);
|
1047 |
parms.get(string("keywords"), doc.keywords);
|
1034 |
parms.get(string("abstract"), doc.abstract);
|
1048 |
parms.get(string("abstract"), doc.abstract);
|
1035 |
parms.get(string("ipath"), doc.ipath);
|
1049 |
parms.get(string("ipath"), doc.ipath);
|