|
a/src/rcldb/searchdata.cpp |
|
b/src/rcldb/searchdata.cpp |
|
... |
|
... |
199 |
return true;
|
199 |
return true;
|
200 |
}
|
200 |
}
|
201 |
|
201 |
|
202 |
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
|
202 |
bool SearchData::clausesToQuery(Rcl::Db &db, SClType tp,
|
203 |
vector<SearchDataClause*>& query,
|
203 |
vector<SearchDataClause*>& query,
|
204 |
string& reason, void *d)
|
204 |
string& reason, void *d,
|
|
|
205 |
int maxexp, int maxcl)
|
205 |
{
|
206 |
{
|
206 |
Xapian::Query xq;
|
207 |
Xapian::Query xq;
|
207 |
for (qlist_it_t it = query.begin(); it != query.end(); it++) {
|
208 |
for (qlist_it_t it = query.begin(); it != query.end(); it++) {
|
208 |
Xapian::Query nq;
|
209 |
Xapian::Query nq;
|
209 |
if (!(*it)->toNativeQuery(db, &nq)) {
|
210 |
if (!(*it)->toNativeQuery(db, &nq, maxexp, maxcl)) {
|
210 |
LOGERR(("SearchData::clausesToQuery: toNativeQuery failed\n"));
|
211 |
LOGERR(("SearchData::clausesToQuery: toNativeQuery failed: %s\n",
|
|
|
212 |
(*it)->getReason().c_str()));
|
211 |
reason = (*it)->getReason();
|
213 |
reason += (*it)->getReason() + " ";
|
212 |
return false;
|
214 |
return false;
|
213 |
}
|
215 |
}
|
214 |
if (nq.empty()) {
|
216 |
if (nq.empty()) {
|
215 |
LOGDEB(("SearchData::clausesToQuery: skipping empty clause\n"));
|
217 |
LOGDEB(("SearchData::clausesToQuery: skipping empty clause\n"));
|
216 |
continue;
|
218 |
continue;
|
|
... |
|
... |
234 |
else
|
236 |
else
|
235 |
xq = nq;
|
237 |
xq = nq;
|
236 |
} else {
|
238 |
} else {
|
237 |
xq = Xapian::Query(op, xq, nq);
|
239 |
xq = Xapian::Query(op, xq, nq);
|
238 |
}
|
240 |
}
|
|
|
241 |
if (int(xq.get_length()) >= maxcl) {
|
|
|
242 |
LOGERR(("Maximum Xapian query size exceeded."
|
|
|
243 |
" Maybe increase maxXapianClauses."));
|
|
|
244 |
m_reason += "Maximum Xapian query size exceeded."
|
|
|
245 |
" Maybe increase maxXapianClauses.";
|
|
|
246 |
return false;
|
|
|
247 |
}
|
239 |
}
|
248 |
}
|
240 |
if (xq.empty())
|
249 |
if (xq.empty())
|
241 |
xq = Xapian::Query::MatchAll;
|
250 |
xq = Xapian::Query::MatchAll;
|
242 |
|
251 |
|
243 |
*((Xapian::Query *)d) = xq;
|
252 |
*((Xapian::Query *)d) = xq;
|
244 |
return true;
|
253 |
return true;
|
245 |
}
|
254 |
}
|
246 |
|
255 |
|
247 |
bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
|
256 |
bool SearchData::toNativeQuery(Rcl::Db &db, void *d, int maxexp, int maxcl)
|
248 |
{
|
257 |
{
|
249 |
LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
|
258 |
LOGDEB(("SearchData::toNativeQuery: stemlang [%s]\n", m_stemlang.c_str()));
|
250 |
m_reason.erase();
|
259 |
m_reason.erase();
|
251 |
|
260 |
|
252 |
// Walk the clause list translating each in turn and building the
|
261 |
// Walk the clause list translating each in turn and building the
|
253 |
// Xapian query tree
|
262 |
// Xapian query tree
|
254 |
Xapian::Query xq;
|
263 |
Xapian::Query xq;
|
255 |
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
|
264 |
if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq, maxexp, maxcl)) {
|
256 |
LOGERR(("SearchData::toNativeQuery: clausesToQuery failed\n"));
|
265 |
LOGERR(("SearchData::toNativeQuery: clausesToQuery failed. reason: %s\n",
|
|
|
266 |
m_reason.c_str()));
|
257 |
return false;
|
267 |
return false;
|
258 |
}
|
268 |
}
|
259 |
|
269 |
|
260 |
if (m_haveDates) {
|
270 |
if (m_haveDates) {
|
261 |
// If one of the extremities is unset, compute db extremas
|
271 |
// If one of the extremities is unset, compute db extremas
|
|
... |
|
... |
618 |
// The object keeps track of the query terms and term groups while
|
628 |
// The object keeps track of the query terms and term groups while
|
619 |
// translating.
|
629 |
// translating.
|
620 |
class StringToXapianQ {
|
630 |
class StringToXapianQ {
|
621 |
public:
|
631 |
public:
|
622 |
StringToXapianQ(Db& db, HighlightData& hld, const string& field,
|
632 |
StringToXapianQ(Db& db, HighlightData& hld, const string& field,
|
623 |
const string &stmlng, bool boostUser)
|
633 |
const string &stmlng, bool boostUser, int maxexp, int maxcl)
|
624 |
: m_db(db), m_field(field), m_stemlang(stmlng),
|
634 |
: m_db(db), m_field(field), m_stemlang(stmlng),
|
625 |
m_doBoostUserTerms(boostUser), m_hld(hld), m_autodiacsens(false),
|
635 |
m_doBoostUserTerms(boostUser), m_hld(hld), m_autodiacsens(false),
|
626 |
m_autocasesens(true)
|
636 |
m_autocasesens(true), m_maxexp(maxexp), m_maxcl(maxcl), m_curcl(0)
|
627 |
{
|
637 |
{
|
628 |
m_db.getConf()->getConfParam("autodiacsens", &m_autodiacsens);
|
638 |
m_db.getConf()->getConfParam("autodiacsens", &m_autodiacsens);
|
629 |
m_db.getConf()->getConfParam("autocasesens", &m_autocasesens);
|
639 |
m_db.getConf()->getConfParam("autocasesens", &m_autocasesens);
|
630 |
}
|
640 |
}
|
631 |
|
641 |
|
|
... |
|
... |
633 |
int mods,
|
643 |
int mods,
|
634 |
string &ermsg,
|
644 |
string &ermsg,
|
635 |
vector<Xapian::Query> &pqueries,
|
645 |
vector<Xapian::Query> &pqueries,
|
636 |
int slack = 0, bool useNear = false);
|
646 |
int slack = 0, bool useNear = false);
|
637 |
private:
|
647 |
private:
|
638 |
void expandTerm(int mods,
|
648 |
bool expandTerm(string& ermsg, int mods,
|
639 |
const string& term, vector<string>& exp,
|
649 |
const string& term, vector<string>& exp,
|
640 |
string& sterm, const string& prefix);
|
650 |
string& sterm, const string& prefix);
|
641 |
// After splitting entry on whitespace: process non-phrase element
|
651 |
// After splitting entry on whitespace: process non-phrase element
|
642 |
void processSimpleSpan(const string& span,
|
652 |
void processSimpleSpan(string& ermsg, const string& span,
|
643 |
int mods,
|
653 |
int mods,
|
644 |
vector<Xapian::Query> &pqueries);
|
654 |
vector<Xapian::Query> &pqueries);
|
645 |
// Process phrase/near element
|
655 |
// Process phrase/near element
|
646 |
void processPhraseOrNear(TextSplitQ *splitData,
|
656 |
void processPhraseOrNear(string& ermsg, TextSplitQ *splitData,
|
647 |
int mods,
|
657 |
int mods,
|
648 |
vector<Xapian::Query> &pqueries,
|
658 |
vector<Xapian::Query> &pqueries,
|
649 |
bool useNear, int slack);
|
659 |
bool useNear, int slack);
|
650 |
|
660 |
|
651 |
Db& m_db;
|
661 |
Db& m_db;
|
|
... |
|
... |
653 |
const string& m_stemlang;
|
663 |
const string& m_stemlang;
|
654 |
const bool m_doBoostUserTerms;
|
664 |
const bool m_doBoostUserTerms;
|
655 |
HighlightData& m_hld;
|
665 |
HighlightData& m_hld;
|
656 |
bool m_autodiacsens;
|
666 |
bool m_autodiacsens;
|
657 |
bool m_autocasesens;
|
667 |
bool m_autocasesens;
|
|
|
668 |
int m_maxexp;
|
|
|
669 |
int m_maxcl;
|
|
|
670 |
int m_curcl;
|
658 |
};
|
671 |
};
|
659 |
|
672 |
|
660 |
#if 1
|
673 |
#if 1
|
661 |
static void listVector(const string& what, const vector<string>&l)
|
674 |
static void listVector(const string& what, const vector<string>&l)
|
662 |
{
|
675 |
{
|
|
... |
|
... |
677 |
* @param sterm output original input term if there were no wildcards
|
690 |
* @param sterm output original input term if there were no wildcards
|
678 |
* @param prefix field prefix in index. We could recompute it, but the caller
|
691 |
* @param prefix field prefix in index. We could recompute it, but the caller
|
679 |
* has it already. Used in the simple case where there is nothing to expand,
|
692 |
* has it already. Used in the simple case where there is nothing to expand,
|
680 |
* and we just return the prefixed term (else Db::termMatch deals with it).
|
693 |
* and we just return the prefixed term (else Db::termMatch deals with it).
|
681 |
*/
|
694 |
*/
|
682 |
void StringToXapianQ::expandTerm(int mods,
|
695 |
bool StringToXapianQ::expandTerm(string& ermsg, int mods,
|
683 |
const string& term,
|
696 |
const string& term,
|
684 |
vector<string>& oexp, string &sterm,
|
697 |
vector<string>& oexp, string &sterm,
|
685 |
const string& prefix)
|
698 |
const string& prefix)
|
686 |
{
|
699 |
{
|
687 |
LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
|
700 |
LOGDEB0(("expandTerm: mods 0x%x fld [%s] trm [%s] lang [%s]\n",
|
688 |
mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
|
701 |
mods, m_field.c_str(), term.c_str(), m_stemlang.c_str()));
|
689 |
sterm.clear();
|
702 |
sterm.clear();
|
690 |
oexp.clear();
|
703 |
oexp.clear();
|
691 |
if (term.empty())
|
704 |
if (term.empty())
|
692 |
return;
|
705 |
return true;
|
693 |
|
706 |
|
694 |
bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
|
707 |
bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
|
695 |
|
708 |
|
696 |
// If there are no wildcards, add term to the list of user-entered terms
|
709 |
// If there are no wildcards, add term to the list of user-entered terms
|
697 |
if (!haswild)
|
710 |
if (!haswild)
|
|
... |
|
... |
751 |
if (noexpansion) {
|
764 |
if (noexpansion) {
|
752 |
sterm = term;
|
765 |
sterm = term;
|
753 |
oexp.push_back(prefix + term);
|
766 |
oexp.push_back(prefix + term);
|
754 |
m_hld.terms[term] = m_hld.uterms.size() - 1;
|
767 |
m_hld.terms[term] = m_hld.uterms.size() - 1;
|
755 |
LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
|
768 |
LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
|
756 |
return;
|
769 |
return true;
|
757 |
}
|
770 |
}
|
758 |
|
771 |
|
759 |
// Make objects before the goto jungle to avoid compiler complaints
|
772 |
// Make objects before the goto jungle to avoid compiler complaints
|
760 |
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
|
773 |
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
|
761 |
XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all",
|
774 |
XapComputableSynFamMember synac(m_db.m_ndb->xrdb, synFamDiCa, "all",
|
|
... |
|
... |
768 |
if (haswild) {
|
781 |
if (haswild) {
|
769 |
// Note that if there are wildcards, we do a direct from-index
|
782 |
// Note that if there are wildcards, we do a direct from-index
|
770 |
// expansion, which means that we are casediac-sensitive. There
|
783 |
// expansion, which means that we are casediac-sensitive. There
|
771 |
// would be nothing to prevent us to expand from the casediac
|
784 |
// would be nothing to prevent us to expand from the casediac
|
772 |
// synonyms first. To be done later
|
785 |
// synonyms first. To be done later
|
773 |
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, term, res, -1, m_field);
|
786 |
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang,term,res,m_maxexp,m_field);
|
774 |
goto termmatchtoresult;
|
787 |
goto termmatchtoresult;
|
775 |
}
|
788 |
}
|
776 |
|
789 |
|
777 |
sterm = term;
|
790 |
sterm = term;
|
778 |
|
791 |
|
779 |
#ifdef RCL_INDEX_STRIPCHARS
|
792 |
#ifdef RCL_INDEX_STRIPCHARS
|
780 |
|
793 |
|
781 |
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
|
794 |
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, m_maxexp, m_field);
|
782 |
|
795 |
|
783 |
#else
|
796 |
#else
|
784 |
|
797 |
|
785 |
if (o_index_stripchars) {
|
798 |
if (o_index_stripchars) {
|
786 |
// If the index is raw, we can only come here if nostemexp is unset
|
799 |
// If the index is raw, we can only come here if nostemexp is unset
|
787 |
// and we just need stem expansion.
|
800 |
// and we just need stem expansion.
|
788 |
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang, term, res, -1, m_field);
|
801 |
m_db.termMatch(Rcl::Db::ET_STEM, m_stemlang,term,res,m_maxexp,m_field);
|
789 |
goto termmatchtoresult;
|
802 |
goto termmatchtoresult;
|
790 |
}
|
803 |
}
|
791 |
|
804 |
|
792 |
// No stem expansion when diacritic or case sensitivity is set, it
|
805 |
// No stem expansion when diacritic or case sensitivity is set, it
|
793 |
// makes no sense (it would mess with the diacritics anyway if
|
806 |
// makes no sense (it would mess with the diacritics anyway if
|
|
... |
|
... |
852 |
// Bogus wildcard expand to generate the result (possibly add prefixes)
|
865 |
// Bogus wildcard expand to generate the result (possibly add prefixes)
|
853 |
exptotermatch:
|
866 |
exptotermatch:
|
854 |
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
|
867 |
LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
|
855 |
for (vector<string>::const_iterator it = lexp.begin();
|
868 |
for (vector<string>::const_iterator it = lexp.begin();
|
856 |
it != lexp.end(); it++) {
|
869 |
it != lexp.end(); it++) {
|
857 |
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res, -1, m_field);
|
870 |
m_db.termMatch(Rcl::Db::ET_WILD, m_stemlang, *it, res,m_maxexp,m_field);
|
858 |
}
|
871 |
}
|
859 |
#endif
|
872 |
#endif
|
860 |
|
873 |
|
861 |
// Term match entries to vector of terms
|
874 |
// Term match entries to vector of terms
|
862 |
termmatchtoresult:
|
875 |
termmatchtoresult:
|
|
|
876 |
if (int(res.entries.size()) >= m_maxexp) {
|
|
|
877 |
ermsg = "Maximum term expansion size exceeded."
|
|
|
878 |
" Maybe increase maxTermExpand.";
|
|
|
879 |
return false;
|
|
|
880 |
}
|
863 |
for (vector<TermMatchEntry>::const_iterator it = res.entries.begin();
|
881 |
for (vector<TermMatchEntry>::const_iterator it = res.entries.begin();
|
864 |
it != res.entries.end(); it++) {
|
882 |
it != res.entries.end(); it++) {
|
865 |
oexp.push_back(it->term);
|
883 |
oexp.push_back(it->term);
|
866 |
}
|
884 |
}
|
867 |
// If the term does not exist at all in the db, the return from
|
885 |
// If the term does not exist at all in the db, the return from
|
|
... |
|
... |
874 |
for (vector<string>::const_iterator it = oexp.begin();
|
892 |
for (vector<string>::const_iterator it = oexp.begin();
|
875 |
it != oexp.end(); it++) {
|
893 |
it != oexp.end(); it++) {
|
876 |
m_hld.terms[strip_prefix(*it)] = term;
|
894 |
m_hld.terms[strip_prefix(*it)] = term;
|
877 |
}
|
895 |
}
|
878 |
LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
|
896 |
LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
|
|
|
897 |
return true;
|
879 |
}
|
898 |
}
|
880 |
|
899 |
|
881 |
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
|
900 |
// Do distribution of string vectors: a,b c,d -> a,c a,d b,c b,d
|
882 |
void multiply_groups(vector<vector<string> >::const_iterator vvit,
|
901 |
void multiply_groups(vector<vector<string> >::const_iterator vvit,
|
883 |
vector<vector<string> >::const_iterator vvend,
|
902 |
vector<vector<string> >::const_iterator vvend,
|
|
... |
|
... |
910 |
// vector)
|
929 |
// vector)
|
911 |
comb.pop_back();
|
930 |
comb.pop_back();
|
912 |
}
|
931 |
}
|
913 |
}
|
932 |
}
|
914 |
|
933 |
|
915 |
void StringToXapianQ::processSimpleSpan(const string& span,
|
934 |
void StringToXapianQ::processSimpleSpan(string& ermsg, const string& span,
|
916 |
int mods,
|
935 |
int mods,
|
917 |
vector<Xapian::Query> &pqueries)
|
936 |
vector<Xapian::Query> &pqueries)
|
918 |
{
|
937 |
{
|
919 |
LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
|
938 |
LOGDEB0(("StringToXapianQ::processSimpleSpan: [%s] mods 0x%x\n",
|
920 |
span.c_str(), (unsigned int)mods));
|
939 |
span.c_str(), (unsigned int)mods));
|
|
... |
|
... |
925 |
const FieldTraits *ftp;
|
944 |
const FieldTraits *ftp;
|
926 |
if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
|
945 |
if (!m_field.empty() && m_db.fieldToTraits(m_field, &ftp)) {
|
927 |
prefix = wrap_prefix(ftp->pfx);
|
946 |
prefix = wrap_prefix(ftp->pfx);
|
928 |
}
|
947 |
}
|
929 |
|
948 |
|
930 |
expandTerm(mods, span, exp, sterm, prefix);
|
949 |
if (!expandTerm(ermsg, mods, span, exp, sterm, prefix))
|
|
|
950 |
return;
|
931 |
|
951 |
|
932 |
// Set up the highlight data. No prefix should go in there
|
952 |
// Set up the highlight data. No prefix should go in there
|
933 |
for (vector<string>::const_iterator it = exp.begin();
|
953 |
for (vector<string>::const_iterator it = exp.begin();
|
934 |
it != exp.end(); it++) {
|
954 |
it != exp.end(); it++) {
|
935 |
m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
|
955 |
m_hld.groups.push_back(vector<string>(1, it->substr(prefix.size())));
|
|
... |
|
... |
937 |
m_hld.grpsugidx.push_back(m_hld.ugroups.size() - 1);
|
957 |
m_hld.grpsugidx.push_back(m_hld.ugroups.size() - 1);
|
938 |
}
|
958 |
}
|
939 |
|
959 |
|
940 |
// Push either term or OR of stem-expanded set
|
960 |
// Push either term or OR of stem-expanded set
|
941 |
Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
|
961 |
Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
|
|
|
962 |
m_curcl += exp.size();
|
942 |
|
963 |
|
943 |
// If sterm (simplified original user term) is not null, give it a
|
964 |
// If sterm (simplified original user term) is not null, give it a
|
944 |
// relevance boost. We do this even if no expansion occurred (else
|
965 |
// relevance boost. We do this even if no expansion occurred (else
|
945 |
// the non-expanded terms in a term list would end-up with even
|
966 |
// the non-expanded terms in a term list would end-up with even
|
946 |
// less wqf). This does not happen if there are wildcards anywhere
|
967 |
// less wqf). This does not happen if there are wildcards anywhere
|
|
... |
|
... |
955 |
|
976 |
|
956 |
// User entry element had several terms: transform into a PHRASE or
|
977 |
// User entry element had several terms: transform into a PHRASE or
|
957 |
// NEAR xapian query, the elements of which can themselves be OR
|
978 |
// NEAR xapian query, the elements of which can themselves be OR
|
958 |
// queries if the terms get expanded by stemming or wildcards (we
|
979 |
// queries if the terms get expanded by stemming or wildcards (we
|
959 |
// don't do stemming for PHRASE though)
|
980 |
// don't do stemming for PHRASE though)
|
960 |
void StringToXapianQ::processPhraseOrNear(TextSplitQ *splitData,
|
981 |
void StringToXapianQ::processPhraseOrNear(string& ermsg, TextSplitQ *splitData,
|
961 |
int mods,
|
982 |
int mods,
|
962 |
vector<Xapian::Query> &pqueries,
|
983 |
vector<Xapian::Query> &pqueries,
|
963 |
bool useNear, int slack)
|
984 |
bool useNear, int slack)
|
964 |
{
|
985 |
{
|
965 |
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
986 |
Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR :
|
|
... |
|
... |
997 |
int lmods = mods;
|
1018 |
int lmods = mods;
|
998 |
if (nostemexp)
|
1019 |
if (nostemexp)
|
999 |
lmods |= SearchDataClause::SDCM_NOSTEMMING;
|
1020 |
lmods |= SearchDataClause::SDCM_NOSTEMMING;
|
1000 |
string sterm;
|
1021 |
string sterm;
|
1001 |
vector<string> exp;
|
1022 |
vector<string> exp;
|
1002 |
expandTerm(lmods, *it, exp, sterm, prefix);
|
1023 |
if (!expandTerm(ermsg, lmods, *it, exp, sterm, prefix))
|
|
|
1024 |
return;
|
1003 |
LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
|
1025 |
LOGDEB0(("ProcessPhraseOrNear: exp size %d\n", exp.size()));
|
1004 |
listVector("", exp);
|
1026 |
listVector("", exp);
|
1005 |
// groups is used for highlighting, we don't want prefixes in there.
|
1027 |
// groups is used for highlighting, we don't want prefixes in there.
|
1006 |
vector<string> noprefs;
|
1028 |
vector<string> noprefs;
|
1007 |
for (vector<string>::const_iterator it = exp.begin();
|
1029 |
for (vector<string>::const_iterator it = exp.begin();
|
|
... |
|
... |
1009 |
noprefs.push_back(it->substr(prefix.size()));
|
1031 |
noprefs.push_back(it->substr(prefix.size()));
|
1010 |
}
|
1032 |
}
|
1011 |
groups.push_back(noprefs);
|
1033 |
groups.push_back(noprefs);
|
1012 |
orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
|
1034 |
orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR,
|
1013 |
exp.begin(), exp.end()));
|
1035 |
exp.begin(), exp.end()));
|
|
|
1036 |
m_curcl += exp.size();
|
|
|
1037 |
if (m_curcl >= m_maxcl)
|
|
|
1038 |
return;
|
1014 |
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
1039 |
#ifdef XAPIAN_NEAR_EXPAND_SINGLE_BUF
|
1015 |
if (exp.size() > 1)
|
1040 |
if (exp.size() > 1)
|
1016 |
hadmultiple = true;
|
1041 |
hadmultiple = true;
|
1017 |
#endif
|
1042 |
#endif
|
1018 |
}
|
1043 |
}
|
|
... |
|
... |
1097 |
{
|
1122 |
{
|
1098 |
LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x "
|
1123 |
LOGDEB(("StringToXapianQ:pUS:: qstr [%s] fld [%s] mods 0x%x "
|
1099 |
"slack %d near %d\n",
|
1124 |
"slack %d near %d\n",
|
1100 |
iq.c_str(), m_field.c_str(), mods, slack, useNear));
|
1125 |
iq.c_str(), m_field.c_str(), mods, slack, useNear));
|
1101 |
ermsg.erase();
|
1126 |
ermsg.erase();
|
1102 |
|
1127 |
m_curcl = 0;
|
1103 |
const StopList stops = m_db.getStopList();
|
1128 |
const StopList stops = m_db.getStopList();
|
1104 |
|
1129 |
|
1105 |
// Simple whitespace-split input into user-level words and
|
1130 |
// Simple whitespace-split input into user-level words and
|
1106 |
// double-quoted phrases: word1 word2 "this is a phrase".
|
1131 |
// double-quoted phrases: word1 word2 "this is a phrase".
|
1107 |
//
|
1132 |
//
|
|
... |
|
... |
1163 |
case 1: {
|
1188 |
case 1: {
|
1164 |
int lmods = mods;
|
1189 |
int lmods = mods;
|
1165 |
if (splitter.nostemexps.front())
|
1190 |
if (splitter.nostemexps.front())
|
1166 |
lmods |= SearchDataClause::SDCM_NOSTEMMING;
|
1191 |
lmods |= SearchDataClause::SDCM_NOSTEMMING;
|
1167 |
m_hld.ugroups.push_back(vector<string>(1, *it));
|
1192 |
m_hld.ugroups.push_back(vector<string>(1, *it));
|
1168 |
processSimpleSpan(splitter.terms.front(), lmods, pqueries);
|
1193 |
processSimpleSpan(ermsg,splitter.terms.front(),lmods, pqueries);
|
1169 |
}
|
1194 |
}
|
1170 |
break;
|
1195 |
break;
|
1171 |
default:
|
1196 |
default:
|
1172 |
m_hld.ugroups.push_back(vector<string>(1, *it));
|
1197 |
m_hld.ugroups.push_back(vector<string>(1, *it));
|
1173 |
processPhraseOrNear(&splitter, mods, pqueries, useNear, slack);
|
1198 |
processPhraseOrNear(ermsg, &splitter, mods, pqueries,
|
|
|
1199 |
useNear, slack);
|
|
|
1200 |
}
|
|
|
1201 |
if (m_curcl >= m_maxcl) {
|
|
|
1202 |
ermsg = "Maximum Xapian query size exceeded."
|
|
|
1203 |
" Maybe increase maxXapianClauses.";
|
|
|
1204 |
break;
|
1174 |
}
|
1205 |
}
|
1175 |
}
|
1206 |
}
|
1176 |
} catch (const Xapian::Error &e) {
|
1207 |
} catch (const Xapian::Error &e) {
|
1177 |
ermsg = e.get_msg();
|
1208 |
ermsg = e.get_msg();
|
1178 |
} catch (const string &s) {
|
1209 |
} catch (const string &s) {
|
|
... |
|
... |
1188 |
}
|
1219 |
}
|
1189 |
return true;
|
1220 |
return true;
|
1190 |
}
|
1221 |
}
|
1191 |
|
1222 |
|
1192 |
// Translate a simple OR, AND, or EXCL search clause.
|
1223 |
// Translate a simple OR, AND, or EXCL search clause.
|
1193 |
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
|
1224 |
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p,
|
|
|
1225 |
int maxexp, int maxcl)
|
1194 |
{
|
1226 |
{
|
1195 |
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
|
1227 |
LOGDEB2(("SearchDataClauseSimple::toNativeQuery: stemlang [%s]\n",
|
1196 |
getStemLang().c_str()));
|
1228 |
getStemLang().c_str()));
|
1197 |
|
1229 |
|
1198 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1230 |
Xapian::Query *qp = (Xapian::Query *)p;
|
|
... |
|
... |
1214 |
// do it if there are wildcards anywhere, this would skew the results.
|
1246 |
// do it if there are wildcards anywhere, this would skew the results.
|
1215 |
bool doBoostUserTerm =
|
1247 |
bool doBoostUserTerm =
|
1216 |
(m_parentSearch && !m_parentSearch->haveWildCards()) ||
|
1248 |
(m_parentSearch && !m_parentSearch->haveWildCards()) ||
|
1217 |
(m_parentSearch == 0 && !m_haveWildCards);
|
1249 |
(m_parentSearch == 0 && !m_haveWildCards);
|
1218 |
|
1250 |
|
1219 |
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm);
|
1251 |
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm,
|
|
|
1252 |
maxexp, maxcl);
|
1220 |
if (!tr.processUserString(m_text, getModifiers(), m_reason, pqueries))
|
1253 |
if (!tr.processUserString(m_text, getModifiers(), m_reason, pqueries))
|
1221 |
return false;
|
1254 |
return false;
|
1222 |
if (pqueries.empty()) {
|
1255 |
if (pqueries.empty()) {
|
1223 |
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
1256 |
LOGERR(("SearchDataClauseSimple: resolved to null query\n"));
|
1224 |
return true;
|
1257 |
return true;
|
|
... |
|
... |
1238 |
//
|
1271 |
//
|
1239 |
// We do not split the entry any more (used to do some crazy thing
|
1272 |
// We do not split the entry any more (used to do some crazy thing
|
1240 |
// about expanding multiple fragments in the past. We just take the
|
1273 |
// about expanding multiple fragments in the past. We just take the
|
1241 |
// value blanks and all and expand this against the indexed unsplit
|
1274 |
// value blanks and all and expand this against the indexed unsplit
|
1242 |
// file names
|
1275 |
// file names
|
1243 |
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
|
1276 |
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p,
|
|
|
1277 |
int maxexp, int)
|
1244 |
{
|
1278 |
{
|
1245 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1279 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1246 |
*qp = Xapian::Query();
|
1280 |
*qp = Xapian::Query();
|
1247 |
|
1281 |
|
1248 |
vector<string> names;
|
1282 |
vector<string> names;
|
1249 |
db.filenameWildExp(m_text, names);
|
1283 |
db.filenameWildExp(m_text, names, maxexp);
|
1250 |
*qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
|
1284 |
*qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());
|
1251 |
|
1285 |
|
1252 |
if (m_weight != 1.0) {
|
1286 |
if (m_weight != 1.0) {
|
1253 |
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
1287 |
*qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
|
1254 |
}
|
1288 |
}
|
1255 |
return true;
|
1289 |
return true;
|
1256 |
}
|
1290 |
}
|
1257 |
|
1291 |
|
1258 |
// Translate NEAR or PHRASE clause.
|
1292 |
// Translate NEAR or PHRASE clause.
|
1259 |
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
|
1293 |
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p,
|
|
|
1294 |
int maxexp, int maxcl)
|
1260 |
{
|
1295 |
{
|
1261 |
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
1296 |
LOGDEB(("SearchDataClauseDist::toNativeQuery\n"));
|
1262 |
|
1297 |
|
1263 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1298 |
Xapian::Query *qp = (Xapian::Query *)p;
|
1264 |
*qp = Xapian::Query();
|
1299 |
*qp = Xapian::Query();
|
|
... |
|
... |
1279 |
if (m_text.find('\"') != string::npos) {
|
1314 |
if (m_text.find('\"') != string::npos) {
|
1280 |
m_text = neutchars(m_text, "\"");
|
1315 |
m_text = neutchars(m_text, "\"");
|
1281 |
}
|
1316 |
}
|
1282 |
string s = cstr_dquote + m_text + cstr_dquote;
|
1317 |
string s = cstr_dquote + m_text + cstr_dquote;
|
1283 |
bool useNear = (m_tp == SCLT_NEAR);
|
1318 |
bool useNear = (m_tp == SCLT_NEAR);
|
1284 |
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm);
|
1319 |
StringToXapianQ tr(db, m_hldata, m_field, getStemLang(), doBoostUserTerm,
|
|
|
1320 |
maxexp, maxcl);
|
1285 |
if (!tr.processUserString(s, getModifiers(), m_reason, pqueries,
|
1321 |
if (!tr.processUserString(s, getModifiers(), m_reason, pqueries,
|
1286 |
m_slack, useNear))
|
1322 |
m_slack, useNear))
|
1287 |
return false;
|
1323 |
return false;
|
1288 |
if (pqueries.empty()) {
|
1324 |
if (pqueries.empty()) {
|
1289 |
LOGERR(("SearchDataClauseDist: resolved to null query\n"));
|
1325 |
LOGERR(("SearchDataClauseDist: resolved to null query\n"));
|