|
a/src/rcldb/stemdb.cpp |
|
b/src/rcldb/stemdb.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.10 2007-08-01 10:04:53 dockes Exp $ (C) 2005 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.11 2007-11-08 09:34:17 dockes Exp $ (C) 2005 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
|
4 |
|
5 |
/**
|
5 |
/**
|
6 |
* Management of the auxiliary databases listing stems and their expansion
|
6 |
* Management of the auxiliary databases listing stems and their expansion
|
7 |
* terms
|
7 |
* terms
|
|
... |
|
... |
82 |
* parent terms in the document data.
|
82 |
* parent terms in the document data.
|
83 |
*/
|
83 |
*/
|
84 |
bool createDb(Xapian::Database& xdb, const string& dbdir, const string& lang)
|
84 |
bool createDb(Xapian::Database& xdb, const string& dbdir, const string& lang)
|
85 |
{
|
85 |
{
|
86 |
LOGDEB(("StemDb::createDb(%s)\n", lang.c_str()));
|
86 |
LOGDEB(("StemDb::createDb(%s)\n", lang.c_str()));
|
|
|
87 |
Chrono cron;
|
87 |
|
88 |
|
88 |
// First build the in-memory stem database:
|
89 |
// First build the in-memory stem database:
|
89 |
// We walk the list of all terms, and stem each.
|
90 |
// We walk the list of all terms, and stem each.
|
90 |
// If the stem is identical to the term, no need to create an entry
|
91 |
// If the stem is identical to the term, no need to create an entry
|
91 |
// Else, we add an entry to the multimap.
|
92 |
// Else, we add an entry to the multimap.
|
|
... |
|
... |
130 |
} catch (...) {
|
131 |
} catch (...) {
|
131 |
LOGERR(("Db::createStemDb: build failed: no stemmer for %s ? \n",
|
132 |
LOGERR(("Db::createStemDb: build failed: no stemmer for %s ? \n",
|
132 |
lang.c_str()));
|
133 |
lang.c_str()));
|
133 |
return false;
|
134 |
return false;
|
134 |
}
|
135 |
}
|
|
|
136 |
LOGDEB1(("StemDb::createDb(%s): in memory map built: %.2f S\n",
|
|
|
137 |
lang.c_str(), cron.secs()));
|
135 |
|
138 |
|
136 |
// Create xapian database for stem relations
|
139 |
// Create xapian database for stem relations
|
137 |
string stemdbdir = stemdbname(dbdir, lang);
|
140 |
string stemdbdir = stemdbname(dbdir, lang);
|
138 |
// We want to get rid of the db dir in case of error. This gets disarmed
|
141 |
// We want to get rid of the db dir in case of error. This gets disarmed
|
139 |
// just before success return.
|
142 |
// just before success return.
|
|
... |
|
... |
188 |
record += "\n";
|
191 |
record += "\n";
|
189 |
LOGDEB1(("stemdocument data: %s\n", record.c_str()));
|
192 |
LOGDEB1(("stemdocument data: %s\n", record.c_str()));
|
190 |
newdocument.set_data(record);
|
193 |
newdocument.set_data(record);
|
191 |
try {
|
194 |
try {
|
192 |
sdb.replace_document(stem, newdocument);
|
195 |
sdb.replace_document(stem, newdocument);
|
|
|
196 |
//sdb.add_document(newdocument);
|
193 |
} catch (...) {
|
197 |
} catch (...) {
|
194 |
LOGERR(("Db::createstemdb: replace failed\n"));
|
198 |
LOGERR(("Db::createstemdb: replace failed\n"));
|
195 |
return false;
|
199 |
return false;
|
196 |
}
|
200 |
}
|
197 |
}
|
201 |
}
|
|
... |
|
... |
199 |
stem = it->first;
|
203 |
stem = it->first;
|
200 |
derivs.push_back(it->second);
|
204 |
derivs.push_back(it->second);
|
201 |
// cerr << "\n" << stem << " " << it->second;
|
205 |
// cerr << "\n" << stem << " " << it->second;
|
202 |
}
|
206 |
}
|
203 |
}
|
207 |
}
|
|
|
208 |
LOGDEB1(("StemDb::createDb(%s): done: %.2f S\n",
|
|
|
209 |
lang.c_str(), cron.secs()));
|
204 |
LOGDEB(("Stem map size: %d stems %d mult %d no %d const %d\n",
|
210 |
LOGDEB(("Stem map size: %d stems %d mult %d no %d const %d\n",
|
205 |
assocs.size(), stemdiff, stemmultiple, nostem, stemconst));
|
211 |
assocs.size(), stemdiff, stemmultiple, nostem, stemconst));
|
206 |
wiper.do_it = false;
|
212 |
wiper.do_it = false;
|
207 |
return true;
|
213 |
return true;
|
208 |
}
|
214 |
}
|