recoll / Code / Diff of /src/rcldb/rcldb.cpp

Diff of /src/rcldb/rcldb.cpp [09302b] .. [5ebcb0]

Switch to unified view


#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.32 2005-11-05 14:40:50 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <stdio.h>
#include <sys/stat.h>

#include <iostream>
...
    // - sample
    // - caption (title limited to 100 chars)
    // - mime type 
    string record = "url=file://" + fn;
    record += "\nmtype=" + doc.mimetype;
    record += "\nfmtime=" + doc.fmtime;
    if (!doc.dmtime.empty())
  record += "\ndmtime=" + doc.dmtime;
    record += "\norigcharset=" + doc.origcharset;
    record += "\ncaption=" + doc.title;
    record += "\nkeywords=" + doc.keywords;
    record += "\nabstract=" + doc.abstract;
    if (!doc.ipath.empty()) {
...

    record += "\n";
    LOGDEB1(("Newdocument data: %s\n", record.c_str()));
    newdocument.set_data(record);

    time_t mtime = atol(doc.dmtime.empty() ? doc.fmtime.c_str() : 
          doc.dmtime.c_str());
    struct tm *tm = localtime(&mtime);
    char buf[9];
    sprintf(buf, "%04d%02d%02d",tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
    newdocument.add_term("D" + string(buf)); // Date (YYYYMMDD)
    buf[7] = '\0';
...
    string pathterm  = "P" + filename;
    if (!ndb->wdb.term_exists(pathterm)) {
    return true;
    }

    // Look for all documents with this path. We need to look at all
    // to set their existence flag.
    // We check the update time on the spe
    Xapian::PostingIterator doc;
    try {
    Xapian::PostingIterator docid0 = ndb->wdb.postlist_begin(pathterm);
    for (Xapian::PostingIterator docid = docid0;
         docid != ndb->wdb.postlist_end(pathterm); docid++) {

        Xapian::Document doc = ndb->wdb.get_document(*docid);

        // Check the date once. no need to look at the others if
      // the db needs updating. Note that the fmtime used to be
      // called mtime, and we're keeping compat
        if (docid == docid0) {
        string data = doc.get_data();
        const char *cp = strstr(data.c_str(), "fmtime=");
      if (cp) {
          cp += 7;
      } else {
          cp = strstr(data.c_str(), "mtime=");
          if (cp)
         cp+= 6;
      }
        long mtime = cp ? atol(cp) : 0;
        if (mtime < stp->st_mtime) {
          LOGDEB2(("Need update: Db Doc mtime %ld file mtime %ld\n", 
               (long)mtime, (long)stp->st_mtime));
            // Db is not up to date. Let's index the file
            return true;
        } 
        }

...
    string data = xdoc.get_data();
    LOGDEB1(("Rcl::Db::getDoc: data: %s\n", data.c_str()));
    ConfSimple parms(&data);
    parms.get(string("url"), doc.url);
    parms.get(string("mtype"), doc.mimetype);
    parms.get(string("fmtime"), doc.fmtime);
    parms.get(string("dmtime"), doc.dmtime);
    parms.get(string("origcharset"), doc.origcharset);
    parms.get(string("caption"), doc.title);
    parms.get(string("keywords"), doc.keywords);
    parms.get(string("abstract"), doc.abstract);
    parms.get(string("ipath"), doc.ipath);

	a/src/rcldb/rcldb.cpp		b/src/rcldb/rcldb.cpp
1	#ifndef lint	1	#ifndef lint
2	static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.31 2005-10-20 11:33:49 dockes Exp $ (C) 2004 J.F.Dockes";	2	static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.32 2005-11-05 14:40:50 dockes Exp $ (C) 2004 J.F.Dockes";
3	#endif	3	#endif
4	#include <stdio.h>	4	#include <stdio.h>
5	#include <sys/stat.h>	5	#include <sys/stat.h>
6		6
7	#include <iostream>	7	#include <iostream>
	...		...
336	// - sample	336	// - sample
337	// - caption (title limited to 100 chars)	337	// - caption (title limited to 100 chars)
338	// - mime type	338	// - mime type
339	string record = "url=file://" + fn;	339	string record = "url=file://" + fn;
340	record += "\nmtype=" + doc.mimetype;	340	record += "\nmtype=" + doc.mimetype;
341	record += "\nmtime=" + doc.mtime;	341	record += "\nfmtime=" + doc.fmtime;
		342	if (!doc.dmtime.empty())
		343	record += "\ndmtime=" + doc.dmtime;
342	record += "\norigcharset=" + doc.origcharset;	344	record += "\norigcharset=" + doc.origcharset;
343	record += "\ncaption=" + doc.title;	345	record += "\ncaption=" + doc.title;
344	record += "\nkeywords=" + doc.keywords;	346	record += "\nkeywords=" + doc.keywords;
345	record += "\nabstract=" + doc.abstract;	347	record += "\nabstract=" + doc.abstract;
346	if (!doc.ipath.empty()) {	348	if (!doc.ipath.empty()) {
	...		...
349		351
350	record += "\n";	352	record += "\n";
351	LOGDEB1(("Newdocument data: %s\n", record.c_str()));	353	LOGDEB1(("Newdocument data: %s\n", record.c_str()));
352	newdocument.set_data(record);	354	newdocument.set_data(record);
353		355
354	time_t mtime = atol(doc.mtime.c_str());	356	time_t mtime = atol(doc.dmtime.empty() ? doc.fmtime.c_str() :
		357	doc.dmtime.c_str());
355	struct tm *tm = localtime(&mtime);	358	struct tm *tm = localtime(&mtime);
356	char buf[9];	359	char buf[9];
357	sprintf(buf, "%04d%02d%02d",tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);	360	sprintf(buf, "%04d%02d%02d",tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
358	newdocument.add_term("D" + string(buf)); // Date (YYYYMMDD)	361	newdocument.add_term("D" + string(buf)); // Date (YYYYMMDD)
359	buf[7] = '\0';	362	buf[7] = '\0';
	...		...
397	string pathterm = "P" + filename;	400	string pathterm = "P" + filename;
398	if (!ndb->wdb.term_exists(pathterm)) {	401	if (!ndb->wdb.term_exists(pathterm)) {
399	return true;	402	return true;
400	}	403	}
401		404
402	// Look for all documents with this path. Check the update time (once).	405	// Look for all documents with this path. We need to look at all
403	// If the db is up to date, set the update flags for all documents	406	// to set their existence flag.
		407	// We check the update time on the spe
404	Xapian::PostingIterator doc;	408	Xapian::PostingIterator doc;
405	try {	409	try {
406	Xapian::PostingIterator docid0 = ndb->wdb.postlist_begin(pathterm);	410	Xapian::PostingIterator docid0 = ndb->wdb.postlist_begin(pathterm);
407	for (Xapian::PostingIterator docid = docid0;	411	for (Xapian::PostingIterator docid = docid0;
408	docid != ndb->wdb.postlist_end(pathterm); docid++) {	412	docid != ndb->wdb.postlist_end(pathterm); docid++) {
409		413
410	Xapian::Document doc = ndb->wdb.get_document(*docid);	414	Xapian::Document doc = ndb->wdb.get_document(*docid);
411		415
412	// Check the date once. no need to look at the others if the	416	// Check the date once. no need to look at the others if
413	// db needs updating.	417	// the db needs updating. Note that the fmtime used to be
		418	// called mtime, and we're keeping compat
414	if (docid == docid0) {	419	if (docid == docid0) {
415	string data = doc.get_data();	420	string data = doc.get_data();
416	const char *cp = strstr(data.c_str(), "mtime=");	421	const char *cp = strstr(data.c_str(), "fmtime=");
		422	if (cp) {
		423	cp += 7;
		424	} else {
		425	cp = strstr(data.c_str(), "mtime=");
		426	if (cp)
417	cp += 6;	427	cp+= 6;
		428	}
418	long mtime = atol(cp);	429	long mtime = cp ? atol(cp) : 0;
419	if (mtime < stp->st_mtime) {	430	if (mtime < stp->st_mtime) {
		431	LOGDEB2(("Need update: Db Doc mtime %ld file mtime %ld\n",
		432	(long)mtime, (long)stp->st_mtime));
420	// Db is not up to date. Let's index the file	433	// Db is not up to date. Let's index the file
421	return true;	434	return true;
422	}	435	}
423	}	436	}
424		437
	...		...
1025	string data = xdoc.get_data();	1038	string data = xdoc.get_data();
1026	LOGDEB1(("Rcl::Db::getDoc: data: %s\n", data.c_str()));	1039	LOGDEB1(("Rcl::Db::getDoc: data: %s\n", data.c_str()));
1027	ConfSimple parms(&data);	1040	ConfSimple parms(&data);
1028	parms.get(string("url"), doc.url);	1041	parms.get(string("url"), doc.url);
1029	parms.get(string("mtype"), doc.mimetype);	1042	parms.get(string("mtype"), doc.mimetype);
1030	parms.get(string("mtime"), doc.mtime);	1043	parms.get(string("fmtime"), doc.fmtime);
		1044	parms.get(string("dmtime"), doc.dmtime);
1031	parms.get(string("origcharset"), doc.origcharset);	1045	parms.get(string("origcharset"), doc.origcharset);
1032	parms.get(string("caption"), doc.title);	1046	parms.get(string("caption"), doc.title);
1033	parms.get(string("keywords"), doc.keywords);	1047	parms.get(string("keywords"), doc.keywords);
1034	parms.get(string("abstract"), doc.abstract);	1048	parms.get(string("abstract"), doc.abstract);
1035	parms.get(string("ipath"), doc.ipath);	1049	parms.get(string("ipath"), doc.ipath);