|
a/src/rcldb/rclterms.cpp |
|
b/src/rcldb/rclterms.cpp |
|
... |
|
... |
196 |
// The case/diac expansion db
|
196 |
// The case/diac expansion db
|
197 |
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
|
197 |
SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
|
198 |
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
|
198 |
XapComputableSynFamMember synac(xrdb, synFamDiCa, "all", &unacfoldtrans);
|
199 |
|
199 |
|
200 |
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
|
200 |
if (matchtyp == ET_WILD || matchtyp == ET_REGEXP) {
|
201 |
RefCntr<StrMatcher> matcher;
|
201 |
STD_SHARED_PTR<StrMatcher> matcher;
|
202 |
if (matchtyp == ET_WILD) {
|
202 |
if (matchtyp == ET_WILD) {
|
203 |
matcher = RefCntr<StrMatcher>(new StrWildMatcher(term));
|
203 |
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(term));
|
204 |
} else {
|
204 |
} else {
|
205 |
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(term));
|
205 |
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(term));
|
206 |
}
|
206 |
}
|
207 |
if (!diac_sensitive || !case_sensitive) {
|
207 |
if (!diac_sensitive || !case_sensitive) {
|
208 |
// Perform case/diac expansion on the exp as appropriate and
|
208 |
// Perform case/diac expansion on the exp as appropriate and
|
209 |
// expand the result.
|
209 |
// expand the result.
|
210 |
vector<string> exp;
|
210 |
vector<string> exp;
|
211 |
if (diac_sensitive) {
|
211 |
if (diac_sensitive) {
|
212 |
// Expand for diacritics and case, filtering for same diacritics
|
212 |
// Expand for diacritics and case, filtering for same diacritics
|
213 |
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
213 |
SynTermTransUnac foldtrans(UNACOP_FOLD);
|
214 |
synac.synKeyExpand(matcher.getptr(), exp, &foldtrans);
|
214 |
synac.synKeyExpand(matcher.get(), exp, &foldtrans);
|
215 |
} else if (case_sensitive) {
|
215 |
} else if (case_sensitive) {
|
216 |
// Expand for diacritics and case, filtering for same case
|
216 |
// Expand for diacritics and case, filtering for same case
|
217 |
SynTermTransUnac unactrans(UNACOP_UNAC);
|
217 |
SynTermTransUnac unactrans(UNACOP_UNAC);
|
218 |
synac.synKeyExpand(matcher.getptr(), exp, &unactrans);
|
218 |
synac.synKeyExpand(matcher.get(), exp, &unactrans);
|
219 |
} else {
|
219 |
} else {
|
220 |
// Expand for diacritics and case, no filtering
|
220 |
// Expand for diacritics and case, no filtering
|
221 |
synac.synKeyExpand(matcher.getptr(), exp);
|
221 |
synac.synKeyExpand(matcher.get(), exp);
|
222 |
}
|
222 |
}
|
223 |
// Retrieve additional info and filter against the index itself
|
223 |
// Retrieve additional info and filter against the index itself
|
224 |
for (vector<string>::const_iterator it = exp.begin();
|
224 |
for (vector<string>::const_iterator it = exp.begin();
|
225 |
it != exp.end(); it++) {
|
225 |
it != exp.end(); it++) {
|
226 |
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
226 |
idxTermMatch(ET_NONE, "", *it, res, max, field);
|
|
... |
|
... |
335 |
prefix = wrap_prefix(ftp->pfx);
|
335 |
prefix = wrap_prefix(ftp->pfx);
|
336 |
}
|
336 |
}
|
337 |
}
|
337 |
}
|
338 |
res.prefix = prefix;
|
338 |
res.prefix = prefix;
|
339 |
|
339 |
|
340 |
RefCntr<StrMatcher> matcher;
|
340 |
STD_SHARED_PTR<StrMatcher> matcher;
|
341 |
if (typ == ET_REGEXP) {
|
341 |
if (typ == ET_REGEXP) {
|
342 |
matcher = RefCntr<StrMatcher>(new StrRegexpMatcher(root));
|
342 |
matcher = STD_SHARED_PTR<StrMatcher>(new StrRegexpMatcher(root));
|
343 |
if (!matcher->ok()) {
|
343 |
if (!matcher->ok()) {
|
344 |
LOGERR(("termMatch: regcomp failed: %s\n",
|
344 |
LOGERR(("termMatch: regcomp failed: %s\n",
|
345 |
matcher->getreason().c_str()))
|
345 |
matcher->getreason().c_str()))
|
346 |
return false;
|
346 |
return false;
|
347 |
}
|
347 |
}
|
348 |
} else if (typ == ET_WILD) {
|
348 |
} else if (typ == ET_WILD) {
|
349 |
matcher = RefCntr<StrMatcher>(new StrWildMatcher(root));
|
349 |
matcher = STD_SHARED_PTR<StrMatcher>(new StrWildMatcher(root));
|
350 |
}
|
350 |
}
|
351 |
|
351 |
|
352 |
// Find the initial section before any special char
|
352 |
// Find the initial section before any special char
|
353 |
string::size_type es = string::npos;
|
353 |
string::size_type es = string::npos;
|
354 |
if (matcher.isNotNull()) {
|
354 |
if (matcher) {
|
355 |
es = matcher->baseprefixlen();
|
355 |
es = matcher->baseprefixlen();
|
356 |
}
|
356 |
}
|
357 |
|
357 |
|
358 |
// Initial section: the part of the prefix+expr before the
|
358 |
// Initial section: the part of the prefix+expr before the
|
359 |
// first wildcard character. We only scan the part of the
|
359 |
// first wildcard character. We only scan the part of the
|
|
... |
|
... |
389 |
continue;
|
389 |
continue;
|
390 |
}
|
390 |
}
|
391 |
term = *it;
|
391 |
term = *it;
|
392 |
}
|
392 |
}
|
393 |
|
393 |
|
394 |
if (matcher.isNotNull() && !matcher->match(term))
|
394 |
if (matcher && !matcher->match(term))
|
395 |
continue;
|
395 |
continue;
|
396 |
|
396 |
|
397 |
res.entries.push_back(
|
397 |
res.entries.push_back(
|
398 |
TermMatchEntry(*it, xdb.get_collection_freq(*it),
|
398 |
TermMatchEntry(*it, xdb.get_collection_freq(*it),
|
399 |
it.get_termfreq()));
|
399 |
it.get_termfreq()));
|