|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.148 2008-10-07 06:44:23 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: rcldb.cpp,v 1.149 2008-12-04 11:49:59 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
331 |
string emptys;
|
331 |
string emptys;
|
332 |
try {
|
332 |
try {
|
333 |
unsigned int occurrences = 0;
|
333 |
unsigned int occurrences = 0;
|
334 |
for (pos = db.positionlist_begin(docid, qterm);
|
334 |
for (pos = db.positionlist_begin(docid, qterm);
|
335 |
pos != db.positionlist_end(docid, qterm); pos++) {
|
335 |
pos != db.positionlist_end(docid, qterm); pos++) {
|
336 |
unsigned int ipos = *pos;
|
336 |
int ipos = *pos;
|
337 |
if (ipos < baseTextPosition) // Not in text body
|
337 |
if (ipos < int(baseTextPosition)) // Not in text body
|
338 |
continue;
|
338 |
continue;
|
339 |
LOGABS(("makeAbstract: [%s] at %d occurrences %d maxoccs %d\n",
|
339 |
LOGABS(("makeAbstract: [%s] at %d occurrences %d maxoccs %d\n",
|
340 |
qterm.c_str(), ipos, occurrences, maxoccs));
|
340 |
qterm.c_str(), ipos, occurrences, maxoccs));
|
341 |
// Remember the term position
|
341 |
// Remember the term position
|
342 |
qtermposs.push_back(ipos);
|
342 |
qtermposs.push_back(ipos);
|
343 |
// Add adjacent slots to the set to populate at next step
|
343 |
// Add adjacent slots to the set to populate at next step
|
344 |
unsigned int sta = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
|
344 |
unsigned int sta = MAX(0, ipos-m_db->m_synthAbsWordCtxLen);
|
345 |
unsigned int sto = ipos+m_db->m_synthAbsWordCtxLen;
|
345 |
unsigned int sto = ipos+m_db->m_synthAbsWordCtxLen;
|
346 |
for (unsigned int ii = sta; ii <= sto; ii++) {
|
346 |
for (unsigned int ii = sta; ii <= sto; ii++) {
|
347 |
if (ii == ipos)
|
347 |
if (ii == (unsigned int)ipos)
|
348 |
sparseDoc[ii] = qterm;
|
348 |
sparseDoc[ii] = qterm;
|
349 |
else
|
349 |
else
|
350 |
sparseDoc[ii] = emptys;
|
350 |
sparseDoc[ii] = emptys;
|
351 |
}
|
351 |
}
|
352 |
// Limit to allocated occurences and total size
|
352 |
// Limit to allocated occurences and total size
|
|
... |
|
... |
1364 |
list<string> more;
|
1364 |
list<string> more;
|
1365 |
StemDb::stemExpand(*it, lang, term, more);
|
1365 |
StemDb::stemExpand(*it, lang, term, more);
|
1366 |
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
|
1366 |
LOGDEB1(("Db::stemExpand: Got %d from %s\n",
|
1367 |
more.size(), it->c_str()));
|
1367 |
more.size(), it->c_str()));
|
1368 |
result.insert(result.end(), more.begin(), more.end());
|
1368 |
result.insert(result.end(), more.begin(), more.end());
|
|
|
1369 |
if (result.size() >= (unsigned int)max)
|
|
|
1370 |
break;
|
1369 |
}
|
1371 |
}
|
1370 |
LOGDEB1(("Db:::stemExpand: final count %d \n", result.size()));
|
1372 |
LOGDEB1(("Db:::stemExpand: final count %d \n", result.size()));
|
1371 |
return true;
|
1373 |
return true;
|
1372 |
}
|
1374 |
}
|
1373 |
|
1375 |
|