|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
889 |
int res = -1;
|
889 |
int res = -1;
|
890 |
if (!m_ndb || !m_ndb->m_isopen)
|
890 |
if (!m_ndb || !m_ndb->m_isopen)
|
891 |
return -1;
|
891 |
return -1;
|
892 |
|
892 |
|
893 |
string term;
|
893 |
string term;
|
894 |
if (!unacmaybefold(_term, term, "UTF-8", true)) {
|
894 |
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
895 |
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
|
895 |
LOGINFO(("Db::termDocCnt: unac failed for [%s]\n", _term.c_str()));
|
896 |
return 0;
|
896 |
return 0;
|
897 |
}
|
897 |
}
|
898 |
|
898 |
|
899 |
if (m_stops.isStop(term)) {
|
899 |
if (m_stops.isStop(term)) {
|
|
... |
|
... |
1115 |
string Db::getSpellingSuggestion(const string& word)
|
1115 |
string Db::getSpellingSuggestion(const string& word)
|
1116 |
{
|
1116 |
{
|
1117 |
if (m_ndb == 0)
|
1117 |
if (m_ndb == 0)
|
1118 |
return string();
|
1118 |
return string();
|
1119 |
string term;
|
1119 |
string term;
|
1120 |
if (!unacmaybefold(word, term, "UTF-8", true)) {
|
1120 |
if (!unacmaybefold(word, term, "UTF-8", UNACOP_UNACFOLD)) {
|
1121 |
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
|
1121 |
LOGINFO(("Db::getSpelling: unac failed for [%s]\n", word.c_str()));
|
1122 |
return string();
|
1122 |
return string();
|
1123 |
}
|
1123 |
}
|
1124 |
if (!isSpellingCandidate(term))
|
1124 |
if (!isSpellingCandidate(term))
|
1125 |
return string();
|
1125 |
return string();
|
|
... |
|
... |
1314 |
// term prefix We also add a term for the filename extension if
|
1314 |
// term prefix We also add a term for the filename extension if
|
1315 |
// any.
|
1315 |
// any.
|
1316 |
string utf8fn;
|
1316 |
string utf8fn;
|
1317 |
if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
|
1317 |
if (doc.getmeta(Doc::keyfn, &utf8fn) && !utf8fn.empty()) {
|
1318 |
string fn;
|
1318 |
string fn;
|
1319 |
if (unacmaybefold(utf8fn, fn, "UTF-8", true)) {
|
1319 |
if (unacmaybefold(utf8fn, fn, "UTF-8", UNACOP_UNACFOLD)) {
|
1320 |
// We should truncate after extracting the extension, but this is
|
1320 |
// We should truncate after extracting the extension, but this is
|
1321 |
// a pathological case anyway
|
1321 |
// a pathological case anyway
|
1322 |
if (fn.size() > 230)
|
1322 |
if (fn.size() > 230)
|
1323 |
utf8truncate(fn, 230);
|
1323 |
utf8truncate(fn, 230);
|
1324 |
string::size_type pos = fn.rfind('.');
|
1324 |
string::size_type pos = fn.rfind('.');
|
|
... |
|
... |
1608 |
{
|
1608 |
{
|
1609 |
LOGDEB(("Db::getStemLang\n"));
|
1609 |
LOGDEB(("Db::getStemLang\n"));
|
1610 |
vector<string> langs;
|
1610 |
vector<string> langs;
|
1611 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1611 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1612 |
return langs;
|
1612 |
return langs;
|
1613 |
langs = StemDb::getLangs(m_ndb->xrdb);
|
1613 |
StemDb db(m_ndb->xrdb);
|
|
|
1614 |
db.getMembers(langs);
|
1614 |
return langs;
|
1615 |
return langs;
|
1615 |
}
|
1616 |
}
|
1616 |
|
1617 |
|
1617 |
/**
|
1618 |
/**
|
1618 |
* Delete stem db for given language
|
1619 |
* Delete stem db for given language
|
|
... |
|
... |
1620 |
bool Db::deleteStemDb(const string& lang)
|
1621 |
bool Db::deleteStemDb(const string& lang)
|
1621 |
{
|
1622 |
{
|
1622 |
LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
|
1623 |
LOGDEB(("Db::deleteStemDb(%s)\n", lang.c_str()));
|
1623 |
if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable)
|
1624 |
if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable)
|
1624 |
return false;
|
1625 |
return false;
|
1625 |
return StemDb::deleteDb(m_ndb->xwdb, lang);
|
1626 |
WritableStemDb db(m_ndb->xwdb);
|
|
|
1627 |
return db.deleteMember(lang);
|
1626 |
}
|
1628 |
}
|
1627 |
|
1629 |
|
1628 |
/**
|
1630 |
/**
|
1629 |
* Create database of stem to parents associations for a given language.
|
1631 |
* Create database of stem to parents associations for a given language.
|
1630 |
* We walk the list of all terms, stem them, and create another Xapian db
|
1632 |
* We walk the list of all terms, stem them, and create another Xapian db
|
|
... |
|
... |
1637 |
if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable) {
|
1639 |
if (m_ndb == 0 || m_ndb->m_isopen == false || !m_ndb->m_iswritable) {
|
1638 |
LOGERR(("createStemDb: db not open or not writable\n"));
|
1640 |
LOGERR(("createStemDb: db not open or not writable\n"));
|
1639 |
return false;
|
1641 |
return false;
|
1640 |
}
|
1642 |
}
|
1641 |
|
1643 |
|
1642 |
return StemDb::createDb(m_ndb->xwdb, lang);
|
1644 |
WritableStemDb db(m_ndb->xwdb);
|
|
|
1645 |
return db.createDb(lang);
|
1643 |
}
|
1646 |
}
|
1644 |
|
1647 |
|
1645 |
/**
|
1648 |
/**
|
1646 |
* This is called at the end of an indexing session, to delete the
|
1649 |
* This is called at the end of an indexing session, to delete the
|
1647 |
* documents for files that are no longer there. This can ONLY be called
|
1650 |
* documents for files that are no longer there. This can ONLY be called
|
|
... |
|
... |
1848 |
TermMatchResult& result, int max)
|
1851 |
TermMatchResult& result, int max)
|
1849 |
{
|
1852 |
{
|
1850 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1853 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1851 |
return false;
|
1854 |
return false;
|
1852 |
vector<string> exp;
|
1855 |
vector<string> exp;
|
|
|
1856 |
StemDb db(m_ndb->xrdb);
|
1853 |
if (!StemDb::stemExpand(m_ndb->xrdb, langs, term, exp))
|
1857 |
if (!db.stemExpand(langs, term, exp))
|
1854 |
return false;
|
1858 |
return false;
|
1855 |
result.entries.insert(result.entries.end(), exp.begin(), exp.end());
|
1859 |
result.entries.insert(result.entries.end(), exp.begin(), exp.end());
|
1856 |
return true;
|
1860 |
return true;
|
1857 |
}
|
1861 |
}
|
1858 |
|
1862 |
|
|
... |
|
... |
1891 |
if (!m_reason.empty())
|
1895 |
if (!m_reason.empty())
|
1892 |
return false;
|
1896 |
return false;
|
1893 |
|
1897 |
|
1894 |
// Get rid of capitals and accents
|
1898 |
// Get rid of capitals and accents
|
1895 |
string droot;
|
1899 |
string droot;
|
1896 |
if (!unacmaybefold(root, droot, "UTF-8", true)) {
|
1900 |
if (!unacmaybefold(root, droot, "UTF-8", UNACOP_UNACFOLD)) {
|
1897 |
LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
|
1901 |
LOGERR(("Db::termMatch: unac failed for [%s]\n", root.c_str()));
|
1898 |
return false;
|
1902 |
return false;
|
1899 |
}
|
1903 |
}
|
1900 |
string nochars = typ == ET_WILD ? cstr_wildSpecChars : cstr_regSpecChars;
|
1904 |
string nochars = typ == ET_WILD ? cstr_wildSpecChars : cstr_regSpecChars;
|
1901 |
|
1905 |
|