Switch to unified view

a/src/rcldb/stemdb.cpp b/src/rcldb/stemdb.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.5 2006-10-09 16:37:08 dockes Exp $ (C) 2005 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.6 2006-12-19 12:11:21 dockes Exp $ (C) 2005 J.F.Dockes";
3
#endif
3
#endif
4
4
5
/**
5
/**
6
 * Management of the auxiliary databases listing stems and their expansion 
6
 * Management of the auxiliary databases listing stems and their expansion 
7
 * terms
7
 * terms
...
...
204
        assocs.size(), stemdiff, stemmultiple, nostem, stemconst));
204
        assocs.size(), stemdiff, stemmultiple, nostem, stemconst));
205
    wiper.do_it = false;
205
    wiper.do_it = false;
206
    return true;
206
    return true;
207
}
207
}
208
208
209
static string stringlistdisp(const list<string>& sl)
210
{
211
    string s;
212
    for (list<string>::const_iterator it = sl.begin(); it!= sl.end(); it++)
213
  s += "[" + *it + "] ";
214
    if (!s.empty())
215
  s.erase(s.length()-1);
216
    return s;
217
}
218
209
/**
219
/**
210
 * Expand term to list of all terms which stem to the same term.
220
 * Expand term to list of all terms which stem to the same term.
211
 */
221
 */
212
list<string> stemExpand(const string& dbdir, const string& lang,
222
bool stemExpand(const std::string& dbdir, 
223
      const std::string& lang,
213
         const string& term)
224
        const std::string& term,
225
      list<string>& result)
214
{
226
{
215
    list<string> explist;
216
    try {
227
    try {
217
    Xapian::Stem stemmer(lang);
228
    Xapian::Stem stemmer(lang);
218
    string stem = stemmer.stem_word(term);
229
    string stem = stemmer.stem_word(term);
219
    LOGDEB(("stemExpand: [%s] stem-> [%s]\n", term.c_str(), stem.c_str()));
230
    LOGDEB(("stemExpand: [%s] stem-> [%s]\n", term.c_str(), stem.c_str()));
220
    // Try to fetch the doc from the stem db
231
    // Try to fetch the doc from the stem db
...
...
222
    Xapian::Database sdb(stemdbdir);
233
    Xapian::Database sdb(stemdbdir);
223
    LOGDEB1(("stemExpand: %s lastdocid: %d\n", 
234
    LOGDEB1(("stemExpand: %s lastdocid: %d\n", 
224
        stemdbdir.c_str(), sdb.get_lastdocid()));
235
        stemdbdir.c_str(), sdb.get_lastdocid()));
225
    if (!sdb.term_exists(stem)) {
236
    if (!sdb.term_exists(stem)) {
226
        LOGDEB1(("Db::stemExpand: no term for %s\n", stem.c_str()));
237
        LOGDEB1(("Db::stemExpand: no term for %s\n", stem.c_str()));
227
        explist.push_back(term);
238
        result.push_back(term);
228
        return explist;
239
        return true;
229
    }
240
    }
230
    Xapian::PostingIterator did = sdb.postlist_begin(stem);
241
    Xapian::PostingIterator did = sdb.postlist_begin(stem);
231
    if (did == sdb.postlist_end(stem)) {
242
    if (did == sdb.postlist_end(stem)) {
232
        LOGDEB1(("stemExpand: no term(1) for %s\n",stem.c_str()));
243
        LOGDEB1(("stemExpand: no term(1) for %s\n",stem.c_str()));
233
        explist.push_back(term);
244
        result.push_back(term);
234
        return explist;
245
        return true;
235
    }
246
    }
236
    Xapian::Document doc = sdb.get_document(*did);
247
    Xapian::Document doc = sdb.get_document(*did);
237
    string data = doc.get_data();
248
    string data = doc.get_data();
238
249
239
    // Build expansion list from database data
250
    // Build expansion list from database data
240
    // No need for a conftree, but we need to massage the data a little
251
    // No need for a conftree, but we need to massage the data a little
241
    string::size_type pos = data.find_first_of("=");
252
    string::size_type pos = data.find_first_of("=");
242
    ++pos;
253
    ++pos;
243
    string::size_type pos1 = data.find_last_of("\n");
254
    string::size_type pos1 = data.find_last_of("\n");
244
    if (pos == string::npos || pos1 == string::npos ||pos1 <= pos) { // ??
255
    if (pos == string::npos || pos1 == string::npos ||pos1 <= pos) { // ??
245
        explist.push_back(term);
256
        result.push_back(term);
246
        return explist;
257
        return true;
247
    }
258
    }
248
    stringToStrings(data.substr(pos, pos1-pos), explist);
259
    stringToStrings(data.substr(pos, pos1-pos), result);
249
260
250
    // If the user term itself is not in the list, add it.
261
    // If the user term itself is not in the list, add it.
251
    if (find(explist.begin(), explist.end(), term) == explist.end()) {
262
    if (find(result.begin(), result.end(), term) == result.end()) {
252
        explist.push_back(term);
263
        result.push_back(term);
253
    }
264
    }
254
    LOGDEB(("stemExpand: %s ->  %s\n", stem.c_str(),
265
    LOGDEB(("stemExpand: %s ->  %s\n", stem.c_str(),
255
        stringlistdisp(explist).c_str()));
266
        stringlistdisp(result).c_str()));
256
    } catch (...) {
267
    } catch (...) {
257
    LOGERR(("stemExpand: error accessing stem db. dbdir [%s] lang [%s]\n",
268
    LOGERR(("stemExpand: error accessing stem db. dbdir [%s] lang [%s]\n",
258
        dbdir.c_str(), lang.c_str()));
269
        dbdir.c_str(), lang.c_str()));
259
    explist.push_back(term);
270
    result.push_back(term);
260
  return explist;
271
  return false;
261
    }
272
    }
262
    return explist;
273
    return true;
263
}
274
}
264
275
265
}
276
}
266
}
277
}