|
a/src/rcldb/stemdb.cpp |
|
b/src/rcldb/stemdb.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.6 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.7 2007-01-19 15:19:51 dockes Exp $ (C) 2005 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
|
4 |
|
5 |
/**
|
5 |
/**
|
6 |
* Management of the auxiliary databases listing stems and their expansion
|
6 |
* Management of the auxiliary databases listing stems and their expansion
|
7 |
* terms
|
7 |
* terms
|
|
... |
|
... |
226 |
{
|
226 |
{
|
227 |
try {
|
227 |
try {
|
228 |
Xapian::Stem stemmer(lang);
|
228 |
Xapian::Stem stemmer(lang);
|
229 |
string stem = stemmer.stem_word(term);
|
229 |
string stem = stemmer.stem_word(term);
|
230 |
LOGDEB(("stemExpand: [%s] stem-> [%s]\n", term.c_str(), stem.c_str()));
|
230 |
LOGDEB(("stemExpand: [%s] stem-> [%s]\n", term.c_str(), stem.c_str()));
|
231 |
// Try to fetch the doc from the stem db
|
231 |
|
|
|
232 |
// Open stem database
|
232 |
string stemdbdir = stemdbname(dbdir, lang);
|
233 |
string stemdbdir = stemdbname(dbdir, lang);
|
233 |
Xapian::Database sdb(stemdbdir);
|
234 |
Xapian::Database sdb(stemdbdir);
|
234 |
LOGDEB1(("stemExpand: %s lastdocid: %d\n",
|
235 |
LOGDEB0(("stemExpand: %s lastdocid: %d\n",
|
235 |
stemdbdir.c_str(), sdb.get_lastdocid()));
|
236 |
stemdbdir.c_str(), sdb.get_lastdocid()));
|
|
|
237 |
|
|
|
238 |
// Try to fetch the doc from the stem db
|
236 |
if (!sdb.term_exists(stem)) {
|
239 |
if (!sdb.term_exists(stem)) {
|
237 |
LOGDEB1(("Db::stemExpand: no term for %s\n", stem.c_str()));
|
240 |
LOGDEB0(("Db::stemExpand: no term for %s\n", stem.c_str()));
|
238 |
result.push_back(term);
|
241 |
} else {
|
239 |
return true;
|
|
|
240 |
}
|
|
|
241 |
Xapian::PostingIterator did = sdb.postlist_begin(stem);
|
242 |
Xapian::PostingIterator did = sdb.postlist_begin(stem);
|
242 |
if (did == sdb.postlist_end(stem)) {
|
243 |
if (did == sdb.postlist_end(stem)) {
|
243 |
LOGDEB1(("stemExpand: no term(1) for %s\n",stem.c_str()));
|
244 |
LOGDEB0(("stemExpand: no term(1) for %s\n",stem.c_str()));
|
244 |
result.push_back(term);
|
245 |
} else {
|
245 |
return true;
|
|
|
246 |
}
|
|
|
247 |
Xapian::Document doc = sdb.get_document(*did);
|
246 |
Xapian::Document doc = sdb.get_document(*did);
|
248 |
string data = doc.get_data();
|
247 |
string data = doc.get_data();
|
249 |
|
248 |
|
250 |
// Build expansion list from database data
|
249 |
// Build expansion list from database data No need for
|
251 |
// No need for a conftree, but we need to massage the data a little
|
250 |
// a conftree, but we need to massage the data a
|
|
|
251 |
// little
|
252 |
string::size_type pos = data.find_first_of("=");
|
252 |
string::size_type pos = data.find_first_of("=");
|
253 |
++pos;
|
253 |
++pos;
|
254 |
string::size_type pos1 = data.find_last_of("\n");
|
254 |
string::size_type pos1 = data.find_last_of("\n");
|
255 |
if (pos == string::npos || pos1 == string::npos ||pos1 <= pos) { // ??
|
255 |
if (pos == string::npos || pos1 == string::npos ||
|
256 |
result.push_back(term);
|
256 |
pos1 <= pos) {
|
257 |
return true;
|
257 |
// ??
|
258 |
}
|
258 |
} else {
|
259 |
stringToStrings(data.substr(pos, pos1-pos), result);
|
259 |
stringToStrings(data.substr(pos, pos1-pos), result);
|
|
|
260 |
}
|
|
|
261 |
}
|
|
|
262 |
}
|
260 |
|
263 |
|
261 |
// If the user term itself is not in the list, add it.
|
264 |
// If the user term or stem are not in the list, add them
|
262 |
if (find(result.begin(), result.end(), term) == result.end()) {
|
265 |
if (find(result.begin(), result.end(), term) == result.end()) {
|
263 |
result.push_back(term);
|
266 |
result.push_back(term);
|
264 |
}
|
267 |
}
|
|
|
268 |
if (find(result.begin(), result.end(), stem) == result.end()) {
|
|
|
269 |
result.push_back(stem);
|
|
|
270 |
}
|
265 |
LOGDEB(("stemExpand: %s -> %s\n", stem.c_str(),
|
271 |
LOGDEB0(("stemExpand: %s -> %s\n", stem.c_str(),
|
266 |
stringlistdisp(result).c_str()));
|
272 |
stringlistdisp(result).c_str()));
|
|
|
273 |
|
267 |
} catch (...) {
|
274 |
} catch (...) {
|
268 |
LOGERR(("stemExpand: error accessing stem db. dbdir [%s] lang [%s]\n",
|
275 |
LOGERR(("stemExpand: error accessing stem db. dbdir [%s] lang [%s]\n",
|
269 |
dbdir.c_str(), lang.c_str()));
|
276 |
dbdir.c_str(), lang.c_str()));
|
270 |
result.push_back(term);
|
277 |
result.push_back(term);
|
271 |
return false;
|
278 |
return false;
|
272 |
}
|
279 |
}
|
|
|
280 |
|
273 |
return true;
|
281 |
return true;
|
274 |
}
|
282 |
}
|
275 |
|
283 |
|
276 |
}
|
284 |
}
|
277 |
}
|
285 |
}
|