|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
1666 |
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
1666 |
int operator()(const TermMatchEntry& l, const TermMatchEntry& r) {
|
1667 |
return !l.term.compare(r.term);
|
1667 |
return !l.term.compare(r.term);
|
1668 |
}
|
1668 |
}
|
1669 |
};
|
1669 |
};
|
1670 |
|
1670 |
|
|
|
1671 |
#ifdef RCL_INDEX_STRIPCHARS
|
1671 |
bool Db::stemExpand(const string &langs, const string &term,
|
1672 |
bool Db::stemExpand(const string &langs, const string &term,
|
1672 |
TermMatchResult& result)
|
1673 |
TermMatchResult& result)
|
1673 |
{
|
1674 |
{
|
1674 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1675 |
if (m_ndb == 0 || m_ndb->m_isopen == false)
|
1675 |
return false;
|
1676 |
return false;
|
|
... |
|
... |
1678 |
if (!db.stemExpand(langs, term, exp))
|
1679 |
if (!db.stemExpand(langs, term, exp))
|
1679 |
return false;
|
1680 |
return false;
|
1680 |
result.entries.insert(result.entries.end(), exp.begin(), exp.end());
|
1681 |
result.entries.insert(result.entries.end(), exp.begin(), exp.end());
|
1681 |
return true;
|
1682 |
return true;
|
1682 |
}
|
1683 |
}
|
|
|
1684 |
#endif
|
1683 |
|
1685 |
|
1684 |
/** Add prefix to all strings in list.
|
1686 |
/** Add prefix to all strings in list.
|
1685 |
* @param prefix already wrapped prefix
|
1687 |
* @param prefix already wrapped prefix
|
1686 |
*/
|
1688 |
*/
|
1687 |
static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
|
1689 |
static void addPrefix(vector<TermMatchEntry>& terms, const string& prefix)
|
|
... |
|
... |
1691 |
for (vector<TermMatchEntry>::iterator it = terms.begin();
|
1693 |
for (vector<TermMatchEntry>::iterator it = terms.begin();
|
1692 |
it != terms.end(); it++)
|
1694 |
it != terms.end(); it++)
|
1693 |
it->term.insert(0, prefix);
|
1695 |
it->term.insert(0, prefix);
|
1694 |
}
|
1696 |
}
|
1695 |
|
1697 |
|
1696 |
// Find all index terms that match a wildcard or regular expression
|
1698 |
bool Db::dbStats(DbStats& res)
|
1697 |
// If field is set, we return a list of appropriately prefixed terms (which
|
|
|
1698 |
// are going to be used to build a Xapian query).
|
|
|
1699 |
bool Db::termMatch(MatchType typ, const string &lang,
|
|
|
1700 |
const string &_root,
|
|
|
1701 |
TermMatchResult& res,
|
|
|
1702 |
int max,
|
|
|
1703 |
const string& field)
|
|
|
1704 |
{
|
1699 |
{
|
1705 |
if (!m_ndb || !m_ndb->m_isopen)
|
1700 |
if (!m_ndb || !m_ndb->m_isopen)
|
1706 |
return false;
|
1701 |
return false;
|
1707 |
Xapian::Database xdb = m_ndb->xrdb;
|
1702 |
Xapian::Database xdb = m_ndb->xrdb;
|
1708 |
|
1703 |
|
|
... |
|
... |
1711 |
res.mindoclen = xdb.get_doclength_lower_bound();
|
1706 |
res.mindoclen = xdb.get_doclength_lower_bound();
|
1712 |
res.maxdoclen = xdb.get_doclength_upper_bound();
|
1707 |
res.maxdoclen = xdb.get_doclength_upper_bound();
|
1713 |
, xdb, m_reason);
|
1708 |
, xdb, m_reason);
|
1714 |
if (!m_reason.empty())
|
1709 |
if (!m_reason.empty())
|
1715 |
return false;
|
1710 |
return false;
|
|
|
1711 |
return true;
|
|
|
1712 |
}
|
1716 |
|
1713 |
|
1717 |
string droot = _root;
|
1714 |
// Find all index terms that match a wildcard or regular expression If
|
|
|
1715 |
// field is set, we return a list of appropriately prefixed terms
|
|
|
1716 |
// (which are going to be used to build a Xapian query). This routine
|
|
|
1717 |
// performs case/diacritics/stemming expansion and possibly calls
|
|
|
1718 |
// idxTermMatch for wildcard/regexp expansion and filtering against
|
|
|
1719 |
// the main index terms.
|
|
|
1720 |
bool Db::termMatch(int typ_sens, const string &lang,
|
|
|
1721 |
const string &_term,
|
|
|
1722 |
TermMatchResult& res,
|
|
|
1723 |
int max,
|
|
|
1724 |
const string& field)
|
|
|
1725 |
{
|
|
|
1726 |
int matchtyp = matchTypeTp(typ_sens);
|
|
|
1727 |
if (!m_ndb || !m_ndb->m_isopen)
|
|
|
1728 |
return false;
|
|
|
1729 |
Xapian::Database xrdb = m_ndb->xrdb;
|
1718 |
|
1730 |
|
1719 |
// If index is stripped, get rid of capitals and accents
|
1731 |
bool diac_sensitive = (typ_sens & ET_DIACSENS) != 0;
|
|
|
1732 |
bool case_sensitive = (typ_sens & ET_CASESENS) != 0;
|
|
|
1733 |
|
|
|
1734 |
bool stripped = false;
|
|
|
1735 |
#ifdef RCL_INDEX_STRIPCHARS
|
|
|
1736 |
stripped = true;
|
|
|
1737 |
#else
|
|
|
1738 |
stripped = o_index_stripchars;
|
|
|
1739 |
#endif
|
|
|
1740 |
|
|
|
1741 |
LOGDEB(("Db::TermMatch: typ %d diacsens %d casesens %d lang [%s] term [%s] "
|
|
|
1742 |
"max %d field [%s] stripped %d\n",
|
|
|
1743 |
matchtyp, diac_sensitive, case_sensitive, lang.c_str(),
|
|
|
1744 |
_term.c_str(), max, field.c_str(), stripped));
|
|
|
1745 |
|
|
|
1746 |
// If index is stripped, no case or diac expansion can be needed:
|
|
|
1747 |
// for the processing inside this routine, everything looks like
|
|
|
1748 |
// we're all-sensitive: no use of expansion db.
|
|
|
1749 |
// Also, convert input to lowercase and strip its accents.
|
|
|
1750 |
string term = _term;
|
|
|
1751 |
if (stripped) {
|
|
|
1752 |
diac_sensitive = case_sensitive = true;
|
|
|
1753 |
if (!unacmaybefold(_term, term, "UTF-8", UNACOP_UNACFOLD)) {
|
|
|
1754 |
LOGERR(("Db::termMatch: unac failed for [%s]\n", _term.c_str()));
|
|
|
1755 |
return false;
|
|
|
1756 |
}
|
|
|
1757 |
}
|
|
|
1758 |
|
1720 |
#ifndef RCL_INDEX_STRIPCHARS
|
1759 |
#ifndef RCL_INDEX_STRIPCHARS
|
1721 |
if (o_index_stripchars)
|
1760 |
// The case/diac expansion db
|
|
|
1761 |
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
|
|
|
1762 |
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
|
|
|
1763 |
#endif // RCL_INDEX_STRIPCHARS
|
|
|
1764 |
|
|
|
1765 |
|
|
|
1766 |
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
|
|
|
1767 |
#ifdef RCL_INDEX_STRIPCHARS
|
|
|
1768 |
idxTermMatch(typ_sens, lang, term, res, max, field);
|
|
|
1769 |
#else
|
|
|
1770 |
RefCntr<StrMatcher> matcher;
|
|
|
1771 |
if (matchtyp == ET_WILD) {
|
|
|
1772 |
matcher = RefCntr<StrMatcher>(new StrWildMatcher(term));
|
|
|
1773 |
} else {
|
|
|
1774 |
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(term));
|
|
|
1775 |
}
|
|
|
1776 |
if (!diac_sensitive || !case_sensitive) {
|
|
|
1777 |
// Perform case/diac expansion on the exp as appropriate and
|
|
|
1778 |
// expand the result.
|
|
|
1779 |
vector<string> exp;
|
|
|
1780 |
if (diac_sensitive) {
|
|
|
1781 |
// Expand for diacritics and case, filtering for same diacritics
|
|
|
1782 |
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
|
|
1783 |
synac.synKeyExpand(matcher.getptr(), exp, &foldtrans);
|
|
|
1784 |
} else if (case_sensitive) {
|
|
|
1785 |
// Expand for diacritics and case, filtering for same case
|
|
|
1786 |
SynTermTransUnac unactrans(UNACOP_UNAC);
|
|
|
1787 |
synac.synKeyExpand(matcher.getptr(), exp, &unactrans);
|
|
|
1788 |
} else {
|
|
|
1789 |
// Expand for diacritics and case, no filtering
|
|
|
1790 |
synac.synKeyExpand(matcher.getptr(), exp);
|
|
|
1791 |
}
|
|
|
1792 |
// Retrieve additional info and filter against the index itself
|
|
|
1793 |
for (vector<string>::const_iterator it = exp.begin();
|
|
|
1794 |
it != exp.end(); it++) {
|
|
|
1795 |
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
|
|
1796 |
}
|
|
|
1797 |
} else {
|
|
|
1798 |
idxTermMatch(typ_sens, lang, term, res, max, field);
|
|
|
1799 |
}
|
|
|
1800 |
|
|
|
1801 |
#endif // RCL_INDEX_STRIPCHARS
|
|
|
1802 |
|
|
|
1803 |
} else {
|
|
|
1804 |
// Expansion is STEM or NONE (which may still need case/diac exp)
|
|
|
1805 |
|
|
|
1806 |
#ifdef RCL_INDEX_STRIPCHARS
|
|
|
1807 |
|
|
|
1808 |
idxTermMatch(Rcl::Db::ET_STEM, lang, term, res, max, field);
|
|
|
1809 |
|
|
|
1810 |
#else
|
|
|
1811 |
vector<string> lexp;
|
|
|
1812 |
if (diac_sensitive && case_sensitive) {
|
|
|
1813 |
// No case/diac expansion
|
|
|
1814 |
lexp.push_back(term);
|
|
|
1815 |
} else if (diac_sensitive) {
|
|
|
1816 |
// Expand for accents and case, filtering for same accents,
|
|
|
1817 |
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
|
|
1818 |
synac.synExpand(term, lexp, &foldtrans);
|
|
|
1819 |
} else if (case_sensitive) {
|
|
|
1820 |
// Expand for accents and case, filtering for same case
|
|
|
1821 |
SynTermTransUnac unactrans(UNACOP_UNAC);
|
|
|
1822 |
synac.synExpand(term, lexp, &unactrans);
|
|
|
1823 |
} else {
|
|
|
1824 |
// We are neither accent- nor case- sensitive and may need stem
|
|
|
1825 |
// expansion or not. Expand for accents and case
|
|
|
1826 |
synac.synExpand(term, lexp);
|
|
|
1827 |
}
|
|
|
1828 |
|
|
|
1829 |
if (matchTypeTp(typ_sens) == ET_STEM) {
|
|
|
1830 |
// Need stem expansion. Lowercase the result of accent and case
|
|
|
1831 |
// expansion for input to stemdb.
|
|
|
1832 |
for (unsigned int i = 0; i < lexp.size(); i++) {
|
|
|
1833 |
string lower;
|
|
|
1834 |
unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
|
|
|
1835 |
lexp[i] = lower;
|
|
|
1836 |
}
|
|
|
1837 |
sort(lexp.begin(), lexp.end());
|
|
|
1838 |
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
|
|
1839 |
StemDb sdb(xrdb);
|
|
|
1840 |
vector<string> exp1;
|
|
|
1841 |
for (vector<string>::const_iterator it = lexp.begin();
|
|
|
1842 |
it != lexp.end(); it++) {
|
|
|
1843 |
sdb.stemExpand(lang, *it, exp1);
|
|
|
1844 |
}
|
|
|
1845 |
LOGDEB(("ExpTerm: stem exp-> %s\n", stringsToString(exp1).c_str()));
|
|
|
1846 |
|
|
|
1847 |
// Expand the resulting list for case (all stemdb content
|
|
|
1848 |
// is lowercase)
|
|
|
1849 |
lexp.clear();
|
|
|
1850 |
for (vector<string>::const_iterator it = exp1.begin();
|
|
|
1851 |
it != exp1.end(); it++) {
|
|
|
1852 |
synac.synExpand(*it, lexp);
|
|
|
1853 |
}
|
|
|
1854 |
sort(lexp.begin(), lexp.end());
|
|
|
1855 |
lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
|
|
|
1856 |
}
|
|
|
1857 |
|
|
|
1858 |
// Filter the result and get the stats, possibly add prefixes.
|
|
|
1859 |
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
|
|
|
1860 |
for (vector<string>::const_iterator it = lexp.begin();
|
|
|
1861 |
it != lexp.end(); it++) {
|
|
|
1862 |
idxTermMatch(Rcl::Db::ET_WILD, "", *it, res, max, field);
|
|
|
1863 |
}
|
|
|
1864 |
}
|
1722 |
#endif
|
1865 |
#endif
|
1723 |
if (!unacmaybefold(_root, droot, "UTF-8", UNACOP_UNACFOLD)) {
|
1866 |
|
1724 |
LOGERR(("Db::termMatch: unac failed for [%s]\n", _root.c_str()));
|
1867 |
TermMatchCmpByTerm tcmp;
|
|
|
1868 |
sort(res.entries.begin(), res.entries.end(), tcmp);
|
|
|
1869 |
TermMatchTermEqual teq;
|
|
|
1870 |
vector<TermMatchEntry>::iterator uit =
|
|
|
1871 |
unique(res.entries.begin(), res.entries.end(), teq);
|
|
|
1872 |
res.entries.resize(uit - res.entries.begin());
|
|
|
1873 |
TermMatchCmpByWcf wcmp;
|
|
|
1874 |
sort(res.entries.begin(), res.entries.end(), wcmp);
|
|
|
1875 |
if (max > 0) {
|
|
|
1876 |
// Would need a small max and big stem expansion...
|
|
|
1877 |
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
|
|
1878 |
}
|
|
|
1879 |
return true;
|
|
|
1880 |
}
|
|
|
1881 |
|
|
|
1882 |
// Second phase of wildcard/regexp term expansion after case/diac
|
|
|
1883 |
// expansion: expand against main index terms
|
|
|
1884 |
bool Db::idxTermMatch(int typ_sens, const string &lang,
|
|
|
1885 |
const string &root,
|
|
|
1886 |
TermMatchResult& res,
|
|
|
1887 |
int max,
|
|
|
1888 |
const string& field)
|
|
|
1889 |
{
|
|
|
1890 |
int typ = matchTypeTp(typ_sens);
|
|
|
1891 |
|
|
|
1892 |
#ifndef RCL_INDEX_STRIPCHARS
|
|
|
1893 |
if (typ == ET_STEM) {
|
|
|
1894 |
LOGFATAL(("RCLDB: internal error: idxTermMatch called with ET_STEM\n"));
|
|
|
1895 |
abort();
|
|
|
1896 |
}
|
|
|
1897 |
#endif
|
|
|
1898 |
|
|
|
1899 |
if (!m_ndb || !m_ndb->m_isopen)
|
1725 |
return false;
|
1900 |
return false;
|
1726 |
}
|
1901 |
Xapian::Database xdb = m_ndb->xrdb;
|
1727 |
|
|
|
1728 |
string nochars = typ == ET_WILD ? cstr_wildSpecStChars :
|
|
|
1729 |
cstr_regSpecStChars;
|
|
|
1730 |
|
1902 |
|
1731 |
string prefix;
|
1903 |
string prefix;
|
1732 |
if (!field.empty()) {
|
1904 |
if (!field.empty()) {
|
1733 |
const FieldTraits *ftp = 0;
|
1905 |
const FieldTraits *ftp = 0;
|
1734 |
if (!fieldToTraits(field, &ftp) || ftp->pfx.empty()) {
|
1906 |
if (!fieldToTraits(field, &ftp) || ftp->pfx.empty()) {
|
|
... |
|
... |
1738 |
prefix = wrap_prefix(ftp->pfx);
|
1910 |
prefix = wrap_prefix(ftp->pfx);
|
1739 |
}
|
1911 |
}
|
1740 |
}
|
1912 |
}
|
1741 |
res.prefix = prefix;
|
1913 |
res.prefix = prefix;
|
1742 |
|
1914 |
|
|
|
1915 |
#ifdef RCL_INDEX_STRIPCHARS
|
1743 |
if (typ == ET_STEM) {
|
1916 |
if (typ == ET_STEM) {
|
1744 |
if (!stemExpand(lang, droot, res))
|
1917 |
if (!stemExpand(lang, root, res))
|
1745 |
return false;
|
1918 |
return false;
|
1746 |
for (vector<TermMatchEntry>::iterator it = res.entries.begin();
|
1919 |
for (vector<TermMatchEntry>::iterator it = res.entries.begin();
|
1747 |
it != res.entries.end(); it++) {
|
1920 |
it != res.entries.end(); it++) {
|
1748 |
XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
|
1921 |
XAPTRY(it->wcf = xdb.get_collection_freq(it->term);
|
1749 |
it->docs = xdb.get_termfreq(it->term),
|
1922 |
it->docs = xdb.get_termfreq(it->term),
|
|
... |
|
... |
1752 |
return false;
|
1925 |
return false;
|
1753 |
LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
|
1926 |
LOGDEB1(("termMatch: %d [%s]\n", it->wcf, it->term.c_str()));
|
1754 |
}
|
1927 |
}
|
1755 |
if (!prefix.empty())
|
1928 |
if (!prefix.empty())
|
1756 |
addPrefix(res.entries, prefix);
|
1929 |
addPrefix(res.entries, prefix);
|
1757 |
} else {
|
1930 |
} else
|
1758 |
regex_t reg;
|
1931 |
#endif
|
1759 |
int errcode;
|
1932 |
{
|
|
|
1933 |
RefCntr<StrMatcher> matcher;
|
1760 |
if (typ == ET_REGEXP) {
|
1934 |
if (typ == ET_REGEXP) {
|
1761 |
if ((errcode = regcomp(®, droot.c_str(),
|
1935 |
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(root));
|
1762 |
REG_EXTENDED|REG_NOSUB))) {
|
1936 |
if (!matcher->ok()) {
|
1763 |
char errbuf[200];
|
|
|
1764 |
regerror(errcode, ®, errbuf, 199);
|
|
|
1765 |
LOGERR(("termMatch: regcomp failed: %s\n", errbuf));
|
1937 |
LOGERR(("termMatch: regcomp failed: %s\n",
|
1766 |
res.entries.push_back(string(errbuf));
|
1938 |
matcher->getreason().c_str()))
|
1767 |
regfree(®);
|
|
|
1768 |
return false;
|
1939 |
return false;
|
1769 |
}
|
1940 |
}
|
|
|
1941 |
} else if (typ == ET_WILD) {
|
|
|
1942 |
matcher = RefCntr<StrMatcher>(new StrWildMatcher(root));
|
1770 |
}
|
1943 |
}
|
1771 |
|
1944 |
|
1772 |
// Find the initial section before any special char
|
1945 |
// Find the initial section before any special char
|
1773 |
string::size_type es = droot.find_first_of(nochars);
|
1946 |
string::size_type es = string::npos;
|
|
|
1947 |
if (matcher.isNotNull()) {
|
|
|
1948 |
es = matcher->baseprefixlen();
|
|
|
1949 |
}
|
1774 |
string is;
|
1950 |
string is;
|
1775 |
switch (es) {
|
1951 |
switch (es) {
|
1776 |
case string::npos: is = prefix + droot; break;
|
1952 |
case string::npos: is = prefix + root; break;
|
1777 |
case 0: is = prefix; break;
|
1953 |
case 0: is = prefix; break;
|
1778 |
default: is = prefix + droot.substr(0, es); break;
|
1954 |
default: is = prefix + root.substr(0, es); break;
|
1779 |
}
|
1955 |
}
|
1780 |
LOGDEB1(("termMatch: initsec: [%s]\n", is.c_str()));
|
1956 |
LOGDEB2(("termMatch: initsec: [%s]\n", is.c_str()));
|
1781 |
|
1957 |
|
1782 |
for (int tries = 0; tries < 2; tries++) {
|
1958 |
for (int tries = 0; tries < 2; tries++) {
|
1783 |
try {
|
1959 |
try {
|
1784 |
Xapian::TermIterator it = xdb.allterms_begin();
|
1960 |
Xapian::TermIterator it = xdb.allterms_begin();
|
1785 |
if (!is.empty())
|
1961 |
if (!is.empty())
|
|
... |
|
... |
1792 |
string term;
|
1968 |
string term;
|
1793 |
if (!prefix.empty())
|
1969 |
if (!prefix.empty())
|
1794 |
term = (*it).substr(prefix.length());
|
1970 |
term = (*it).substr(prefix.length());
|
1795 |
else
|
1971 |
else
|
1796 |
term = *it;
|
1972 |
term = *it;
|
1797 |
if (typ == ET_WILD) {
|
1973 |
|
1798 |
if (fnmatch(droot.c_str(), term.c_str(), 0) ==
|
1974 |
if (matcher.isNotNull() && !matcher->match(term))
|
1799 |
FNM_NOMATCH)
|
1975 |
continue;
|
1800 |
continue;
|
1976 |
|
1801 |
} else {
|
|
|
1802 |
if (regexec(®, term.c_str(), 0, 0, 0))
|
|
|
1803 |
continue;
|
|
|
1804 |
}
|
|
|
1805 |
// Do we want stem expansion here? We don't do it for now
|
|
|
1806 |
res.entries.push_back(TermMatchEntry(*it,
|
1977 |
res.entries.push_back(
|
1807 |
xdb.get_collection_freq(*it),
|
1978 |
TermMatchEntry(*it, xdb.get_collection_freq(*it),
|
1808 |
it.get_termfreq()));
|
1979 |
it.get_termfreq()));
|
1809 |
|
1980 |
|
1810 |
// The problem with truncating here is that this is done
|
1981 |
// The problem with truncating here is that this is done
|
1811 |
// alphabetically and we may not keep the most frequent
|
1982 |
// alphabetically and we may not keep the most frequent
|
1812 |
// terms. OTOH, not doing it may stall the program if
|
1983 |
// terms. OTOH, not doing it may stall the program if
|
1813 |
// we are walking the whole term list. We compromise
|
1984 |
// we are walking the whole term list. We compromise
|
|
... |
|
... |
1826 |
}
|
1997 |
}
|
1827 |
if (!m_reason.empty()) {
|
1998 |
if (!m_reason.empty()) {
|
1828 |
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
1999 |
LOGERR(("termMatch: %s\n", m_reason.c_str()));
|
1829 |
return false;
|
2000 |
return false;
|
1830 |
}
|
2001 |
}
|
1831 |
|
|
|
1832 |
if (typ == ET_REGEXP) {
|
|
|
1833 |
regfree(®);
|
|
|
1834 |
}
|
|
|
1835 |
|
|
|
1836 |
}
|
2002 |
}
|
1837 |
|
2003 |
|
1838 |
TermMatchCmpByTerm tcmp;
|
|
|
1839 |
sort(res.entries.begin(), res.entries.end(), tcmp);
|
|
|
1840 |
TermMatchTermEqual teq;
|
|
|
1841 |
vector<TermMatchEntry>::iterator uit =
|
|
|
1842 |
unique(res.entries.begin(), res.entries.end(), teq);
|
|
|
1843 |
res.entries.resize(uit - res.entries.begin());
|
|
|
1844 |
TermMatchCmpByWcf wcmp;
|
|
|
1845 |
sort(res.entries.begin(), res.entries.end(), wcmp);
|
|
|
1846 |
if (max > 0) {
|
|
|
1847 |
// Would need a small max and big stem expansion...
|
|
|
1848 |
res.entries.resize(MIN(res.entries.size(), (unsigned int)max));
|
|
|
1849 |
}
|
|
|
1850 |
return true;
|
2004 |
return true;
|
1851 |
}
|
2005 |
}
|
1852 |
|
2006 |
|
1853 |
/** Term list walking. */
|
2007 |
/** Term list walking. */
|
1854 |
class TermIter {
|
2008 |
class TermIter {
|