recoll / Code / Diff of /src/rcldb/rcldb.cpp

Diff of /src/rcldb/rcldb.cpp [8bde98] .. [b536c9]

Switch to unified view


#ifndef lint
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.76 2006-09-13 13:53:35 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
/*
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
...
#define MIN(A,B) (A<B?A:B)
#endif
#ifndef NO_NAMESPACES
namespace Rcl {
#endif












// Truncate longer path and uniquize with hash . The goal for this is
// to avoid xapian max term length limitations, not to gain space (we
// gain very little even with very short maxlens like 30)
#define PATHHASHLEN 150
...

// Data for a xapian database. There could actually be 2 different
// ones for indexing or query as there is not much in common.
class Native {
 public:
    Db *m_db;
    bool m_isopen;
    bool m_iswritable;
    Db::OpenMode m_mode;
    string m_basedir;

...
    bool dbDataToRclDoc(std::string &data, Doc &doc, 
            int qopts,
            Xapian::docid docid,
            const list<string>& terms);

    Native(Db *db) 
  : m_db(db),
      m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0) 
    { }
    ~Native() {
    delete enquire;
    }
    bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
...
    }

};

Db::Db() 
    : m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
      m_synthAbsWordCtxLen(4)
{
    m_ndb = new Native(this);
}

Db::~Db()
{
    LOGDEB1(("Db::~Db\n"));
...
    if (m_ndb->m_iswritable == true) {
        m_ndb->wdb.flush();
        LOGDEB(("Rcl:Db: Called xapian flush\n"));
    }
    delete m_ndb;
    m_ndb = new Native(this);
    if (m_ndb)
        return true;
    } catch (const Xapian::Error &e) {
    ermsg = e.get_msg().c_str();
    } catch (const string &s) {
...
    return true;
    }
    return true;
}

// Let our user set the parameters for abstract processing
void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
{
    LOGDEB(("Db::setAbstractParams: trunc %d syntlen %d ctxlen %d\n",
      idxtrunc, syntlen, syntctxlen));
    if (idxtrunc > 0 && idxtrunc < 2000)
  m_idxAbsTruncLen = idxtrunc;
    if (syntlen > 0 && syntlen < 2000)
  m_synthAbsLen = syntlen;
    if (syntctxlen > 0 && syntctxlen < 20)
  m_synthAbsWordCtxLen = syntctxlen;
}

// Add document in internal form to the database: index the terms in
// the title abstract and body and add special terms for file name,
// date, mime type ... , create the document data record (more
// metadata), and update database
bool Db::add(const string &fn, const Doc &idoc, 
...

    Doc doc = idoc;

    // Truncate abstract, title and keywords to reasonable lengths. If
    // abstract is currently empty, we make up one with the beginning
    // of the document. This is then not indexed, but part of the doc
    // data so that we can return it to a query without having to
    // decode the original file.
    bool syntabs = false;
    if (doc.abstract.empty()) {
    syntabs = true;
    doc.abstract = rclSyntAbs + 
      truncate_to_word(doc.text, m_idxAbsTruncLen);
    } else {
    doc.abstract = truncate_to_word(doc.abstract, m_idxAbsTruncLen);
    }
    doc.abstract = neutchars(doc.abstract, "\n\r");
    doc.title = truncate_to_word(doc.title, 100);
    doc.keywords = truncate_to_word(doc.keywords, 300);

...
    return false;
    }
    splitter.text_to_words(noacc);
    splitData.basepos += splitData.curpos + 100;

    // Split and index abstract. We don't do this if it is synthetic
    // any more (this used to give a relevance boost to the beginning
    // of text, why ?)
    LOGDEB2(("Db::add: split abstract [%s]\n", doc.abstract.c_str()));
    if (!syntabs) {
  // syntabs indicator test kept here in case we want to go back
  // to indexing synthetic abstracts one day
  if (!dumb_string(syntabs ? doc.abstract.substr(rclSyntAbs.length()) : 
           doc.abstract, noacc)) {
        LOGERR(("Db::add: dumb_string failed\n"));
        return false;
  }
  splitter.text_to_words(noacc);
    }
    splitData.basepos += splitData.curpos + 100;

    ////// Special terms for metadata
    // Mime type
    newdocument.add_term("T" + doc.mimetype);
...
    parms.get(string("dmtime"), doc.dmtime);
    parms.get(string("origcharset"), doc.origcharset);
    parms.get(string("caption"), doc.title);
    parms.get(string("keywords"), doc.keywords);
    parms.get(string("abstract"), doc.abstract);
    // Possibly remove synthetic abstract indicator (if it's there, we
    // used to index the beginning of the text as abstract).
    bool syntabs = false;
    if (doc.abstract.find(rclSyntAbs) == 0) {
    doc.abstract = doc.abstract.substr(rclSyntAbs.length());
    syntabs = true;
    }
    // If the option is set and the abstract is synthetic or empty , build 
    // abstract from position data. 
    if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
    LOGDEB(("dbDataToRclDoc:: building abstract from position data\n"));
    if (doc.abstract.empty() || syntabs || 
        (qopts & Db::QO_REPLACE_ABSTRACT))
        doc.abstract = makeAbstract(docid, terms);
    } 
    parms.get(string("ipath"), doc.ipath);
    parms.get(string("fbytes"), doc.fbytes);
    parms.get(string("dbytes"), doc.dbytes);
    doc.xdocid = docid;
    return true;
...
    // Go through the list of query terms. For each entry in each
    // position list, populate the slot in the document buffer, and
    // remember the position and its neigbours
    vector<unsigned int> qtermposs; // The term positions
    set<unsigned int> chunkposs; // All the positions we shall populate
    int totaloccs = 0;
    for (list<string>::const_iterator qit = terms.begin(); qit != terms.end();
     qit++) {
    Xapian::PositionIterator pos;
    // There may be query terms not in this doc. This raises an
    // exception when requesting the position list, we just catch it.
...
        for (pos = db.positionlist_begin(docid, *qit); 
         pos != db.positionlist_end(docid, *qit); pos++) {
        unsigned int ipos = *pos;
        LOGDEB1(("Abstract: [%s] at %d\n", qit->c_str(), ipos));
        // Possibly extend the array. Do it in big chunks
      if (ipos + m_db->m_synthAbsWordCtxLen >= buf.size()) {
          buf.resize(ipos + m_db->m_synthAbsWordCtxLen + 1000);
        }
        buf[ipos] = *qit;
        // Remember the term position
        qtermposs.push_back(ipos);
        // Add adjacent slots to the set to populate at next step
      for (unsigned int ii = MAX(0, ipos-m_db->m_synthAbsWordCtxLen); 
           ii <= MIN(ipos+m_db->m_synthAbsWordCtxLen, buf.size()-1); ii++) {
            chunkposs.insert(ii);
        }
        // Limit the number of occurences we keep for each
        // term. The abstract has a finite length anyway !
        if (occurrences++ > 10)
            break;
        }
    } catch (...) {
    }
  // Limit total size
  if (totaloccs++ > 100)
      break;
    }

    LOGDEB1(("Abstract:%d:chosen number of positions %d. Populating\n", 
        chron.millis(), qtermposs.size()));

...
    // Extract data around the first (in random order) term positions,
    // and store the chunks in the map
    for (vector<unsigned int>::const_iterator it = qtermposs.begin();
     it != qtermposs.end(); it++) {
    unsigned int ipos = *it;
  unsigned int start = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
  unsigned int end = MIN(ipos+m_db->m_synthAbsWordCtxLen, buf.size()-1);
    string chunk;
    for (unsigned int ii = start; ii <= end; ii++) {
        if (!buf[ii].empty()) {
        chunk += buf[ii] + " ";
        abslen += buf[ii].length();
        }
      if (int(abslen) > m_db->m_synthAbsLen)
        break;
    }
    if (end != buf.size()-1)
        chunk += "... ";
    mabs[ipos] = chunk;
  if (int(abslen) > m_db->m_synthAbsLen)
        break;
    }

    // Build the abstract by walking the map (in order of position)
    string abstract;

	a/src/rcldb/rcldb.cpp		b/src/rcldb/rcldb.cpp
1	#ifndef lint	1	#ifndef lint
2	static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.75 2006-05-09 10:15:14 dockes Exp $ (C) 2004 J.F.Dockes";	2	static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.76 2006-09-13 13:53:35 dockes Exp $ (C) 2004 J.F.Dockes";
3	#endif	3	#endif
4	/*	4	/*
5	* This program is free software; you can redistribute it and/or modify	5	* This program is free software; you can redistribute it and/or modify
6	* it under the terms of the GNU General Public License as published by	6	* it under the terms of the GNU General Public License as published by
7	* the Free Software Foundation; either version 2 of the License, or	7	* the Free Software Foundation; either version 2 of the License, or
	...		...
54	#define MIN(A,B) (A<B?A:B)	54	#define MIN(A,B) (A<B?A:B)
55	#endif	55	#endif
56	#ifndef NO_NAMESPACES	56	#ifndef NO_NAMESPACES
57	namespace Rcl {	57	namespace Rcl {
58	#endif	58	#endif
59	// This is how long an abstract we keep or build from beginning of text when
60	// indexing. It only has an influence on the size of the db as we are free
61	// to shorten it again when displaying
62	#define INDEX_ABSTRACT_SIZE 250
63
64	// This is the size of the abstract that we synthetize out of query
65	// term contexts at query time
66	#define MA_ABSTRACT_SIZE 250
67	// This is how many words (context size) we keep around query terms
68	// when building the abstract
69	#define MA_EXTRACT_WIDTH 4
70		59
71	// Truncate longer path and uniquize with hash . The goal for this is	60	// Truncate longer path and uniquize with hash . The goal for this is
72	// to avoid xapian max term length limitations, not to gain space (we	61	// to avoid xapian max term length limitations, not to gain space (we
73	// gain very little even with very short maxlens like 30)	62	// gain very little even with very short maxlens like 30)
74	#define PATHHASHLEN 150	63	#define PATHHASHLEN 150
	...		...
79		68
80	// Data for a xapian database. There could actually be 2 different	69	// Data for a xapian database. There could actually be 2 different
81	// ones for indexing or query as there is not much in common.	70	// ones for indexing or query as there is not much in common.
82	class Native {	71	class Native {
83	public:	72	public:
		73	Db *m_db;
84	bool m_isopen;	74	bool m_isopen;
85	bool m_iswritable;	75	bool m_iswritable;
86	Db::OpenMode m_mode;	76	Db::OpenMode m_mode;
87	string m_basedir;	77	string m_basedir;
88		78
	...		...
104	bool dbDataToRclDoc(std::string &data, Doc &doc,	94	bool dbDataToRclDoc(std::string &data, Doc &doc,
105	int qopts,	95	int qopts,
106	Xapian::docid docid,	96	Xapian::docid docid,
107	const list<string>& terms);	97	const list<string>& terms);
108		98
109	Native()	99	Native(Db *db)
		100	: m_db(db),
110	: m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)	101	m_isopen(false), m_iswritable(false), m_mode(Db::DbRO), enquire(0)
111	{ }	102	{ }
112	~Native() {	103	~Native() {
113	delete enquire;	104	delete enquire;
114	}	105	}
115	bool filterMatch(Db *rdb, Xapian::Document &xdoc) {	106	bool filterMatch(Db *rdb, Xapian::Document &xdoc) {
	...		...
147	}	138	}
148		139
149	};	140	};
150		141
151	Db::Db()	142	Db::Db()
152	: m_qOpts(QO_NONE)	143	: m_qOpts(QO_NONE), m_idxAbsTruncLen(250), m_synthAbsLen(250),
		144	m_synthAbsWordCtxLen(4)
153	{	145	{
154	m_ndb = new Native;	146	m_ndb = new Native(this);
155	}	147	}
156		148
157	Db::~Db()	149	Db::~Db()
158	{	150	{
159	LOGDEB1(("Db::~Db\n"));	151	LOGDEB1(("Db::~Db\n"));
	...		...
280	if (m_ndb->m_iswritable == true) {	272	if (m_ndb->m_iswritable == true) {
281	m_ndb->wdb.flush();	273	m_ndb->wdb.flush();
282	LOGDEB(("Rcl:Db: Called xapian flush\n"));	274	LOGDEB(("Rcl:Db: Called xapian flush\n"));
283	}	275	}
284	delete m_ndb;	276	delete m_ndb;
285	m_ndb = new Native;	277	m_ndb = new Native(this);
286	if (m_ndb)	278	if (m_ndb)
287	return true;	279	return true;
288	} catch (const Xapian::Error &e) {	280	} catch (const Xapian::Error &e) {
289	ermsg = e.get_msg().c_str();	281	ermsg = e.get_msg().c_str();
290	} catch (const string &s) {	282	} catch (const string &s) {
	...		...
440	return true;	432	return true;
441	}	433	}
442	return true;	434	return true;
443	}	435	}
444		436
		437	// Let our user set the parameters for abstract processing
		438	void Db::setAbstractParams(int idxtrunc, int syntlen, int syntctxlen)
		439	{
		440	LOGDEB(("Db::setAbstractParams: trunc %d syntlen %d ctxlen %d\n",
		441	idxtrunc, syntlen, syntctxlen));
		442	if (idxtrunc > 0 && idxtrunc < 2000)
		443	m_idxAbsTruncLen = idxtrunc;
		444	if (syntlen > 0 && syntlen < 2000)
		445	m_synthAbsLen = syntlen;
		446	if (syntctxlen > 0 && syntctxlen < 20)
		447	m_synthAbsWordCtxLen = syntctxlen;
		448	}
		449
445	// Add document in internal form to the database: index the terms in	450	// Add document in internal form to the database: index the terms in
446	// the title abstract and body and add special terms for file name,	451	// the title abstract and body and add special terms for file name,
447	// date, mime type ... , create the document data record (more	452	// date, mime type ... , create the document data record (more
448	// metadata), and update database	453	// metadata), and update database
449	bool Db::add(const string &fn, const Doc &idoc,	454	bool Db::add(const string &fn, const Doc &idoc,
	...		...
455		460
456	Doc doc = idoc;	461	Doc doc = idoc;
457		462
458	// Truncate abstract, title and keywords to reasonable lengths. If	463	// Truncate abstract, title and keywords to reasonable lengths. If
459	// abstract is currently empty, we make up one with the beginning	464	// abstract is currently empty, we make up one with the beginning
460	// of the document.	465	// of the document. This is then not indexed, but part of the doc
		466	// data so that we can return it to a query without having to
		467	// decode the original file.
461	bool syntabs = false;	468	bool syntabs = false;
462	if (doc.abstract.empty()) {	469	if (doc.abstract.empty()) {
463	syntabs = true;	470	syntabs = true;
464	doc.abstract = rclSyntAbs +	471	doc.abstract = rclSyntAbs +
465	truncate_to_word(doc.text, INDEX_ABSTRACT_SIZE);	472	truncate_to_word(doc.text, m_idxAbsTruncLen);
466	} else {	473	} else {
467	doc.abstract = truncate_to_word(doc.abstract, INDEX_ABSTRACT_SIZE);	474	doc.abstract = truncate_to_word(doc.abstract, m_idxAbsTruncLen);
468	}	475	}
469	doc.abstract = neutchars(doc.abstract, "\n\r");	476	doc.abstract = neutchars(doc.abstract, "\n\r");
470	doc.title = truncate_to_word(doc.title, 100);	477	doc.title = truncate_to_word(doc.title, 100);
471	doc.keywords = truncate_to_word(doc.keywords, 300);	478	doc.keywords = truncate_to_word(doc.keywords, 300);
472		479
	...		...
511	return false;	518	return false;
512	}	519	}
513	splitter.text_to_words(noacc);	520	splitter.text_to_words(noacc);
514	splitData.basepos += splitData.curpos + 100;	521	splitData.basepos += splitData.curpos + 100;
515		522
516	// Split and index abstract	523	// Split and index abstract. We don't do this if it is synthetic
		524	// any more (this used to give a relevance boost to the beginning
		525	// of text, why ?)
517	LOGDEB2(("Db::add: split abstract [%s]\n", doc.abstract.c_str()));	526	LOGDEB2(("Db::add: split abstract [%s]\n", doc.abstract.c_str()));
		527	if (!syntabs) {
		528	// syntabs indicator test kept here in case we want to go back
		529	// to indexing synthetic abstracts one day
518	if (!dumb_string(syntabs ? doc.abstract.substr(rclSyntAbs.length()) :	530	if (!dumb_string(syntabs ? doc.abstract.substr(rclSyntAbs.length()) :
519	doc.abstract, noacc)) {	531	doc.abstract, noacc)) {
520	LOGERR(("Db::add: dumb_string failed\n"));	532	LOGERR(("Db::add: dumb_string failed\n"));
521	return false;	533	return false;
522	}	534	}
523	splitter.text_to_words(noacc);	535	splitter.text_to_words(noacc);
		536	}
524	splitData.basepos += splitData.curpos + 100;	537	splitData.basepos += splitData.curpos + 100;
525		538
526	////// Special terms for metadata	539	////// Special terms for metadata
527	// Mime type	540	// Mime type
528	newdocument.add_term("T" + doc.mimetype);	541	newdocument.add_term("T" + doc.mimetype);
	...		...
1180	parms.get(string("dmtime"), doc.dmtime);	1193	parms.get(string("dmtime"), doc.dmtime);
1181	parms.get(string("origcharset"), doc.origcharset);	1194	parms.get(string("origcharset"), doc.origcharset);
1182	parms.get(string("caption"), doc.title);	1195	parms.get(string("caption"), doc.title);
1183	parms.get(string("keywords"), doc.keywords);	1196	parms.get(string("keywords"), doc.keywords);
1184	parms.get(string("abstract"), doc.abstract);	1197	parms.get(string("abstract"), doc.abstract);
		1198	// Possibly remove synthetic abstract indicator (if it's there, we
		1199	// used to index the beginning of the text as abstract).
1185	bool syntabs = false;	1200	bool syntabs = false;
1186	if (doc.abstract.find(rclSyntAbs) == 0) {	1201	if (doc.abstract.find(rclSyntAbs) == 0) {
1187	doc.abstract = doc.abstract.substr(rclSyntAbs.length());	1202	doc.abstract = doc.abstract.substr(rclSyntAbs.length());
1188	syntabs = true;	1203	syntabs = true;
1189	}	1204	}
		1205	// If the option is set and the abstract is synthetic or empty , build
		1206	// abstract from position data.
1190	if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {	1207	if ((qopts & Db::QO_BUILD_ABSTRACT) && !terms.empty()) {
1191	LOGDEB1(("dbDataToRclDoc:: building abstract from position data\n"));	1208	LOGDEB(("dbDataToRclDoc:: building abstract from position data\n"));
1192	if (doc.abstract.empty() \|\| syntabs \|\|	1209	if (doc.abstract.empty() \|\| syntabs \|\|
1193	(qopts & Db::QO_REPLACE_ABSTRACT))	1210	(qopts & Db::QO_REPLACE_ABSTRACT))
1194	doc.abstract = makeAbstract(docid, terms);	1211	doc.abstract = makeAbstract(docid, terms);
1195	}	1212	}
1196	parms.get(string("ipath"), doc.ipath);	1213	parms.get(string("ipath"), doc.ipath);
1197	parms.get(string("fbytes"), doc.fbytes);	1214	parms.get(string("fbytes"), doc.fbytes);
1198	parms.get(string("dbytes"), doc.dbytes);	1215	parms.get(string("dbytes"), doc.dbytes);
1199	doc.xdocid = docid;	1216	doc.xdocid = docid;
1200	return true;	1217	return true;
	...		...
1395	// Go through the list of query terms. For each entry in each	1412	// Go through the list of query terms. For each entry in each
1396	// position list, populate the slot in the document buffer, and	1413	// position list, populate the slot in the document buffer, and
1397	// remember the position and its neigbours	1414	// remember the position and its neigbours
1398	vector<unsigned int> qtermposs; // The term positions	1415	vector<unsigned int> qtermposs; // The term positions
1399	set<unsigned int> chunkposs; // All the positions we shall populate	1416	set<unsigned int> chunkposs; // All the positions we shall populate
		1417	int totaloccs = 0;
1400	for (list<string>::const_iterator qit = terms.begin(); qit != terms.end();	1418	for (list<string>::const_iterator qit = terms.begin(); qit != terms.end();
1401	qit++) {	1419	qit++) {
1402	Xapian::PositionIterator pos;	1420	Xapian::PositionIterator pos;
1403	// There may be query terms not in this doc. This raises an	1421	// There may be query terms not in this doc. This raises an
1404	// exception when requesting the position list, we just catch it.	1422	// exception when requesting the position list, we just catch it.
	...		...
1407	for (pos = db.positionlist_begin(docid, *qit);	1425	for (pos = db.positionlist_begin(docid, *qit);
1408	pos != db.positionlist_end(docid, *qit); pos++) {	1426	pos != db.positionlist_end(docid, *qit); pos++) {
1409	unsigned int ipos = *pos;	1427	unsigned int ipos = *pos;
1410	LOGDEB1(("Abstract: [%s] at %d\n", qit->c_str(), ipos));	1428	LOGDEB1(("Abstract: [%s] at %d\n", qit->c_str(), ipos));
1411	// Possibly extend the array. Do it in big chunks	1429	// Possibly extend the array. Do it in big chunks
1412	if (ipos + MA_EXTRACT_WIDTH >= buf.size()) {	1430	if (ipos + m_db->m_synthAbsWordCtxLen >= buf.size()) {
1413	buf.resize(ipos + MA_EXTRACT_WIDTH + 1000);	1431	buf.resize(ipos + m_db->m_synthAbsWordCtxLen + 1000);
1414	}	1432	}
1415	buf[ipos] = *qit;	1433	buf[ipos] = *qit;
1416	// Remember the term position	1434	// Remember the term position
1417	qtermposs.push_back(ipos);	1435	qtermposs.push_back(ipos);
1418	// Add adjacent slots to the set to populate at next step	1436	// Add adjacent slots to the set to populate at next step
1419	for (unsigned int ii = MAX(0, ipos-MA_EXTRACT_WIDTH);	1437	for (unsigned int ii = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
1420	ii <= MIN(ipos+MA_EXTRACT_WIDTH, buf.size()-1); ii++) {	1438	ii <= MIN(ipos+m_db->m_synthAbsWordCtxLen, buf.size()-1); ii++) {
1421	chunkposs.insert(ii);	1439	chunkposs.insert(ii);
1422	}	1440	}
1423	// Limit the number of occurences we keep for each	1441	// Limit the number of occurences we keep for each
1424	// term. The abstract has a finite length anyway !	1442	// term. The abstract has a finite length anyway !
1425	if (occurrences++ > 10)	1443	if (occurrences++ > 10)
1426	break;	1444	break;
1427	}	1445	}
1428	} catch (...) {	1446	} catch (...) {
1429	}	1447	}
		1448	// Limit total size
		1449	if (totaloccs++ > 100)
		1450	break;
1430	}	1451	}
1431		1452
1432	LOGDEB1(("Abstract:%d:chosen number of positions %d. Populating\n",	1453	LOGDEB1(("Abstract:%d:chosen number of positions %d. Populating\n",
1433	chron.millis(), qtermposs.size()));	1454	chron.millis(), qtermposs.size()));
1434		1455
	...		...
1468	// Extract data around the first (in random order) term positions,	1489	// Extract data around the first (in random order) term positions,
1469	// and store the chunks in the map	1490	// and store the chunks in the map
1470	for (vector<unsigned int>::const_iterator it = qtermposs.begin();	1491	for (vector<unsigned int>::const_iterator it = qtermposs.begin();
1471	it != qtermposs.end(); it++) {	1492	it != qtermposs.end(); it++) {
1472	unsigned int ipos = *it;	1493	unsigned int ipos = *it;
1473	unsigned int start = MAX(0, ipos-MA_EXTRACT_WIDTH);	1494	unsigned int start = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
1474	unsigned int end = MIN(ipos+MA_EXTRACT_WIDTH, buf.size()-1);	1495	unsigned int end = MIN(ipos+m_db->m_synthAbsWordCtxLen, buf.size()-1);
1475	string chunk;	1496	string chunk;
1476	for (unsigned int ii = start; ii <= end; ii++) {	1497	for (unsigned int ii = start; ii <= end; ii++) {
1477	if (!buf[ii].empty()) {	1498	if (!buf[ii].empty()) {
1478	chunk += buf[ii] + " ";	1499	chunk += buf[ii] + " ";
1479	abslen += buf[ii].length();	1500	abslen += buf[ii].length();
1480	}	1501	}
1481	if (abslen > MA_ABSTRACT_SIZE)	1502	if (int(abslen) > m_db->m_synthAbsLen)
1482	break;	1503	break;
1483	}	1504	}
1484	if (end != buf.size()-1)	1505	if (end != buf.size()-1)
1485	chunk += "... ";	1506	chunk += "... ";
1486	mabs[ipos] = chunk;	1507	mabs[ipos] = chunk;
1487	if (abslen > MA_ABSTRACT_SIZE)	1508	if (int(abslen) > m_db->m_synthAbsLen)
1488	break;	1509	break;
1489	}	1510	}
1490		1511
1491	// Build the abstract by walking the map (in order of position)	1512	// Build the abstract by walking the map (in order of position)
1492	string abstract;	1513	string abstract;