recoll / Code / Diff of /src/rcldb/rcldb.cpp

Diff of /src/rcldb/rcldb.cpp [0e7a78] .. [022e0e]

Switch to unified view


#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.134 2008-07-01 11:51:51 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
...
    } catch (const string &s) {
        ermsg = s;
        if (ermsg.empty()) 
        ermsg = "Empty error message"; 
    } catch (const char *s) {
        ermsg = s ? s : string();
        if (ermsg.empty()) 
        ermsg = "Empty error message"; 
    } catch (...) {
        ermsg= "Unknown xapian error (not Xapian::Error or string)";
        break;
...
    list<string> iterms;
    query->getQueryTerms(iterms);

    list<string> terms = noPrefixList(iterms);
    if (terms.empty()) {
  return string();
    }

    // Retrieve db-wide frequencies for the query terms
    if (query->m_nq->termfreqs.empty()) {
    double doccnt = db.get_doccount();
...
    m_db->m_synthAbsLen /(7 * (m_db->m_synthAbsWordCtxLen+1));
    LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));
    // This can't happen, but would crash us
    if (totalweight == 0.0) {
    LOGERR(("makeAbstract: 0 totalweight!\n"));
  return string();
    }

    // Let's go populate
    for (multimap<double, string>::reverse_iterator qit = byQ.rbegin(); 
     qit != byQ.rend(); qit++) {
...
        chron.millis(), qtermposs.size()));

    // This can happen if there are term occurences in the keywords
    // etc. but not elsewhere ?
    if (qtermposs.size() == 0) 
  return string();

    // Walk all document's terms position lists and populate slots
    // around the query terms. We arbitrarily truncate the list to
    // avoid taking forever. If we do cutoff, the abstract may be
    // inconsistant (missing words, potentially altering meaning),
...
bool Db::reOpen()
{
    if (m_ndb && m_ndb->m_isopen) {
    if (!close())
        return false;
    if (!open(m_basedir, string(), m_mode, true)) {
        return false;
    }
    }
    return true;
}
...
bool Db::fieldToPrefix(const string& fldname, string &pfx)
{
    // This is the default table
    static map<string, string> fldToPrefs;
    if (fldToPrefs.empty()) {
    fldToPrefs["abstract"] = string();
    fldToPrefs["ext"] = "XE";

    fldToPrefs["title"] = "S";
    fldToPrefs["caption"] = "S";
    fldToPrefs["subject"] = "S";
...
//
// Note that we always return true (but set out to "" on error). We don't
// want to stop indexation because of a bad string
bool dumb_string(const string &in, string &out)
{
    out.clear();
    if (in.empty())
    return true;

    string s1 = neutchars(in, "\n\r");
    if (!unacmaybefold(s1, out, "UTF-8", true)) {
    LOGINFO(("dumb_string: unac failed for [%s]\n", in.c_str()));
    out.clear();
    // See comment at start of func
    return true;
    }
    return true;
}

	a/src/rcldb/rcldb.cpp		b/src/rcldb/rcldb.cpp
1	#ifndef lint	1	#ifndef lint
2	static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.133 2008-06-13 18:22:46 dockes Exp $ (C) 2004 J.F.Dockes";	2	static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.134 2008-07-01 11:51:51 dockes Exp $ (C) 2004 J.F.Dockes";
3	#endif	3	#endif
4	/*	4	/*
5	* This program is free software; you can redistribute it and/or modify	5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by	6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or	7	* the Free Software Foundation; either version 2 of the License, or
	...		...
120	} catch (const string &s) {	120	} catch (const string &s) {
121	ermsg = s;	121	ermsg = s;
122	if (ermsg.empty())	122	if (ermsg.empty())
123	ermsg = "Empty error message";	123	ermsg = "Empty error message";
124	} catch (const char *s) {	124	} catch (const char *s) {
125	ermsg = s ? s : "";	125	ermsg = s ? s : string();
126	if (ermsg.empty())	126	if (ermsg.empty())
127	ermsg = "Empty error message";	127	ermsg = "Empty error message";
128	} catch (...) {	128	} catch (...) {
129	ermsg= "Unknown xapian error (not Xapian::Error or string)";	129	ermsg= "Unknown xapian error (not Xapian::Error or string)";
130	break;	130	break;
	...		...
200	list<string> iterms;	200	list<string> iterms;
201	query->getQueryTerms(iterms);	201	query->getQueryTerms(iterms);
202		202
203	list<string> terms = noPrefixList(iterms);	203	list<string> terms = noPrefixList(iterms);
204	if (terms.empty()) {	204	if (terms.empty()) {
205	return "";	205	return string();
206	}	206	}
207		207
208	// Retrieve db-wide frequencies for the query terms	208	// Retrieve db-wide frequencies for the query terms
209	if (query->m_nq->termfreqs.empty()) {	209	if (query->m_nq->termfreqs.empty()) {
210	double doccnt = db.get_doccount();	210	double doccnt = db.get_doccount();
	...		...
287	m_db->m_synthAbsLen /(7 * (m_db->m_synthAbsWordCtxLen+1));	287	m_db->m_synthAbsLen /(7 * (m_db->m_synthAbsWordCtxLen+1));
288	LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));	288	LOGABS(("makeAbstract:%d: mxttloccs %d\n", chron.ms(), maxtotaloccs));
289	// This can't happen, but would crash us	289	// This can't happen, but would crash us
290	if (totalweight == 0.0) {	290	if (totalweight == 0.0) {
291	LOGERR(("makeAbstract: 0 totalweight!\n"));	291	LOGERR(("makeAbstract: 0 totalweight!\n"));
292	return "";	292	return string();
293	}	293	}
294		294
295	// Let's go populate	295	// Let's go populate
296	for (multimap<double, string>::reverse_iterator qit = byQ.rbegin();	296	for (multimap<double, string>::reverse_iterator qit = byQ.rbegin();
297	qit != byQ.rend(); qit++) {	297	qit != byQ.rend(); qit++) {
	...		...
346	chron.millis(), qtermposs.size()));	346	chron.millis(), qtermposs.size()));
347		347
348	// This can happen if there are term occurences in the keywords	348	// This can happen if there are term occurences in the keywords
349	// etc. but not elsewhere ?	349	// etc. but not elsewhere ?
350	if (qtermposs.size() == 0)	350	if (qtermposs.size() == 0)
351	return "";	351	return string();
352		352
353	// Walk all document's terms position lists and populate slots	353	// Walk all document's terms position lists and populate slots
354	// around the query terms. We arbitrarily truncate the list to	354	// around the query terms. We arbitrarily truncate the list to
355	// avoid taking forever. If we do cutoff, the abstract may be	355	// avoid taking forever. If we do cutoff, the abstract may be
356	// inconsistant (missing words, potentially altering meaning),	356	// inconsistant (missing words, potentially altering meaning),
	...		...
591	bool Db::reOpen()	591	bool Db::reOpen()
592	{	592	{
593	if (m_ndb && m_ndb->m_isopen) {	593	if (m_ndb && m_ndb->m_isopen) {
594	if (!close())	594	if (!close())
595	return false;	595	return false;
596	if (!open(m_basedir, "", m_mode, true)) {	596	if (!open(m_basedir, string(), m_mode, true)) {
597	return false;	597	return false;
598	}	598	}
599	}	599	}
600	return true;	600	return true;
601	}	601	}
	...		...
682	bool Db::fieldToPrefix(const string& fldname, string &pfx)	682	bool Db::fieldToPrefix(const string& fldname, string &pfx)
683	{	683	{
684	// This is the default table	684	// This is the default table
685	static map<string, string> fldToPrefs;	685	static map<string, string> fldToPrefs;
686	if (fldToPrefs.empty()) {	686	if (fldToPrefs.empty()) {
687	fldToPrefs["abstract"] = "";	687	fldToPrefs["abstract"] = string();
688	fldToPrefs["ext"] = "XE";	688	fldToPrefs["ext"] = "XE";
689		689
690	fldToPrefs["title"] = "S";	690	fldToPrefs["title"] = "S";
691	fldToPrefs["caption"] = "S";	691	fldToPrefs["caption"] = "S";
692	fldToPrefs["subject"] = "S";	692	fldToPrefs["subject"] = "S";
	...		...
776	//	776	//
777	// Note that we always return true (but set out to "" on error). We don't	777	// Note that we always return true (but set out to "" on error). We don't
778	// want to stop indexation because of a bad string	778	// want to stop indexation because of a bad string
779	bool dumb_string(const string &in, string &out)	779	bool dumb_string(const string &in, string &out)
780	{	780	{
781	out.erase();	781	out.clear();
782	if (in.empty())	782	if (in.empty())
783	return true;	783	return true;
784		784
785	string s1 = neutchars(in, "\n\r");	785	string s1 = neutchars(in, "\n\r");
786	if (!unacmaybefold(s1, out, "UTF-8", true)) {	786	if (!unacmaybefold(s1, out, "UTF-8", true)) {
787	LOGINFO(("dumb_string: unac failed for [%s]\n", in.c_str()));	787	LOGINFO(("dumb_string: unac failed for [%s]\n", in.c_str()));
788	out.erase();	788	out.clear();
789	// See comment at start of func	789	// See comment at start of func
790	return true;	790	return true;
791	}	791	}
792	return true;	792	return true;
793	}	793	}