recoll / Code / Diff of /src/rcldb/rcldb.cpp

Diff of /src/rcldb/rcldb.cpp [8a1ea3] .. [cf2c0b]

Switch to unified view


...
    if (!splitter.text_to_words(doc.text))
        LOGDEB(("Db::addOrUpdate: split failed for main text\n"));

    ////// Special terms for other metadata. No positions for these.
    // Mime type
    newdocument.add_boolean_term(wrap_prefix(mimetype_prefix) + doc.mimetype);

    // Simple file name indexed unsplit for specific "file name"
    // searches. This is not the same as a filename: clause inside the
    // query language.
    // We also add a term for the filename extension if any.
...
        // a pathological case anyway
        if (fn.size() > 230)
        utf8truncate(fn, 230);
        string::size_type pos = fn.rfind('.');
        if (pos != string::npos && pos != fn.length() - 1) {
        newdocument.add_boolean_term(wrap_prefix(fileext_prefix) + 
                      fn.substr(pos + 1));
        }
        newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn, 0);
    }
    }

    // Udi unique term: this is used for file existence/uptodate
    // checks, and unique id for the replace_document() call.
    string uniterm = make_uniterm(udi);
    newdocument.add_boolean_term(uniterm);
    // Parent term. This is used to find all descendents, mostly to delete them 
    // when the parent goes away
    if (!parent_udi.empty()) {
    newdocument.add_boolean_term(make_parentterm(parent_udi));
    }
    // Dates etc.
    time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() : 
             doc.dmtime.c_str());
    struct tm *tm = localtime(&mtime);
    char buf[9];
    snprintf(buf, 9, "%04d%02d%02d",
        tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
    // Date (YYYYMMDD)
    newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf)); 
    // Month (YYYYMM)
    buf[6] = '\0';
    newdocument.add_boolean_term(wrap_prefix(xapmonth_prefix) + string(buf));
    // Year (YYYY)
    buf[4] = '\0';
    newdocument.add_boolean_term(wrap_prefix(xapyear_prefix) + string(buf)); 


    //////////////////////////////////////////////////////////////////
    // Document data record. omindex has the following nl separated fields:
    // - url

	a/src/rcldb/rcldb.cpp		b/src/rcldb/rcldb.cpp
	...		...
973	if (!splitter.text_to_words(doc.text))	973	if (!splitter.text_to_words(doc.text))
974	LOGDEB(("Db::addOrUpdate: split failed for main text\n"));	974	LOGDEB(("Db::addOrUpdate: split failed for main text\n"));
975		975
976	////// Special terms for other metadata. No positions for these.	976	////// Special terms for other metadata. No positions for these.
977	// Mime type	977	// Mime type
978	newdocument.add_term(wrap_prefix(mimetype_prefix) + doc.mimetype);	978	newdocument.add_boolean_term(wrap_prefix(mimetype_prefix) + doc.mimetype);
979		979
980	// Simple file name indexed unsplit for specific "file name"	980	// Simple file name indexed unsplit for specific "file name"
981	// searches. This is not the same as a filename: clause inside the	981	// searches. This is not the same as a filename: clause inside the
982	// query language.	982	// query language.
983	// We also add a term for the filename extension if any.	983	// We also add a term for the filename extension if any.
	...		...
989	// a pathological case anyway	989	// a pathological case anyway
990	if (fn.size() > 230)	990	if (fn.size() > 230)
991	utf8truncate(fn, 230);	991	utf8truncate(fn, 230);
992	string::size_type pos = fn.rfind('.');	992	string::size_type pos = fn.rfind('.');
993	if (pos != string::npos && pos != fn.length() - 1) {	993	if (pos != string::npos && pos != fn.length() - 1) {
994	newdocument.add_term(wrap_prefix(fileext_prefix) +	994	newdocument.add_boolean_term(wrap_prefix(fileext_prefix) +
995	fn.substr(pos + 1));	995	fn.substr(pos + 1));
996	}	996	}
997	newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn);	997	newdocument.add_term(wrap_prefix(unsplitfilename_prefix) + fn, 0);
998	}	998	}
999	}	999	}
1000		1000
1001	// Udi unique term: this is used for file existence/uptodate	1001	// Udi unique term: this is used for file existence/uptodate
1002	// checks, and unique id for the replace_document() call.	1002	// checks, and unique id for the replace_document() call.
1003	string uniterm = make_uniterm(udi);	1003	string uniterm = make_uniterm(udi);
1004	newdocument.add_term(uniterm);	1004	newdocument.add_boolean_term(uniterm);
1005	// Parent term. This is used to find all descendents, mostly to delete them	1005	// Parent term. This is used to find all descendents, mostly to delete them
1006	// when the parent goes away	1006	// when the parent goes away
1007	if (!parent_udi.empty()) {	1007	if (!parent_udi.empty()) {
1008	newdocument.add_term(make_parentterm(parent_udi));	1008	newdocument.add_boolean_term(make_parentterm(parent_udi));
1009	}	1009	}
1010	// Dates etc.	1010	// Dates etc.
1011	time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :	1011	time_t mtime = atoll(doc.dmtime.empty() ? doc.fmtime.c_str() :
1012	doc.dmtime.c_str());	1012	doc.dmtime.c_str());
1013	struct tm *tm = localtime(&mtime);	1013	struct tm *tm = localtime(&mtime);
1014	char buf[9];	1014	char buf[9];
1015	snprintf(buf, 9, "%04d%02d%02d",	1015	snprintf(buf, 9, "%04d%02d%02d",
1016	tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);	1016	tm->tm_year+1900, tm->tm_mon + 1, tm->tm_mday);
1017	// Date (YYYYMMDD)	1017	// Date (YYYYMMDD)
1018	newdocument.add_term(wrap_prefix(xapday_prefix) + string(buf));	1018	newdocument.add_boolean_term(wrap_prefix(xapday_prefix) + string(buf));
1019	// Month (YYYYMM)	1019	// Month (YYYYMM)
1020	buf[6] = '\0';	1020	buf[6] = '\0';
1021	newdocument.add_term(wrap_prefix(xapmonth_prefix) + string(buf));	1021	newdocument.add_boolean_term(wrap_prefix(xapmonth_prefix) + string(buf));
1022	// Year (YYYY)	1022	// Year (YYYY)
1023	buf[4] = '\0';	1023	buf[4] = '\0';
1024	newdocument.add_term(wrap_prefix(xapyear_prefix) + string(buf));	1024	newdocument.add_boolean_term(wrap_prefix(xapyear_prefix) + string(buf));
1025		1025
1026		1026
1027	//////////////////////////////////////////////////////////////////	1027	//////////////////////////////////////////////////////////////////
1028	// Document data record. omindex has the following nl separated fields:	1028	// Document data record. omindex has the following nl separated fields:
1029	// - url	1029	// - url