|
a/src/query/plaintorich.cpp |
|
b/src/query/plaintorich.cpp |
|
... |
|
... |
13 |
* along with this program; if not, write to the
|
13 |
* along with this program; if not, write to the
|
14 |
* Free Software Foundation, Inc.,
|
14 |
* Free Software Foundation, Inc.,
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
16 |
*/
|
16 |
*/
|
17 |
|
17 |
|
18 |
|
18 |
#include <limits.h>
|
19 |
#include <string>
|
19 |
#include <string>
|
20 |
#include <utility>
|
20 |
#include <utility>
|
21 |
#include <list>
|
21 |
#include <list>
|
22 |
#include <set>
|
22 |
#include <set>
|
23 |
#include <vector>
|
23 |
#include <vector>
|
|
... |
|
... |
52 |
struct MatchEntry {
|
52 |
struct MatchEntry {
|
53 |
// Start/End byte offsets in the document text
|
53 |
// Start/End byte offsets in the document text
|
54 |
pair<int, int> offs;
|
54 |
pair<int, int> offs;
|
55 |
// Index of the search group this comes from: this is to relate a
|
55 |
// Index of the search group this comes from: this is to relate a
|
56 |
// match to the original user input.
|
56 |
// match to the original user input.
|
57 |
unsigned int grpidx;
|
57 |
size_t grpidx;
|
58 |
MatchEntry(int sta, int sto, unsigned int idx)
|
58 |
MatchEntry(int sta, int sto, size_t idx)
|
59 |
: offs(sta, sto), grpidx(idx)
|
59 |
: offs(sta, sto), grpidx(idx)
|
60 |
{
|
60 |
{
|
61 |
}
|
61 |
}
|
62 |
};
|
62 |
};
|
63 |
|
63 |
|
|
... |
|
... |
103 |
|
103 |
|
104 |
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
|
104 |
//LOGDEB2(("Input dumbbed term: '%s' %d %d %d\n", dumb.c_str(),
|
105 |
// pos, bts, bte));
|
105 |
// pos, bts, bte));
|
106 |
|
106 |
|
107 |
// If this word is a search term, remember its byte-offset span.
|
107 |
// If this word is a search term, remember its byte-offset span.
|
108 |
map<string, unsigned int>::const_iterator it = m_terms.find(dumb);
|
108 |
map<string, size_t>::const_iterator it = m_terms.find(dumb);
|
109 |
if (it != m_terms.end()) {
|
109 |
if (it != m_terms.end()) {
|
110 |
tboffs.push_back(MatchEntry(bts, bte, (*it).second));
|
110 |
tboffs.push_back(MatchEntry(bts, bte, (*it).second));
|
111 |
}
|
111 |
}
|
112 |
|
112 |
|
113 |
// If word is part of a search group, update its positions list
|
113 |
// If word is part of a search group, update its positions list
|
|
... |
|
... |
133 |
|
133 |
|
134 |
// Word count. Used to call checkCancel from time to time.
|
134 |
// Word count. Used to call checkCancel from time to time.
|
135 |
int m_wcount;
|
135 |
int m_wcount;
|
136 |
|
136 |
|
137 |
// In: user query terms
|
137 |
// In: user query terms
|
138 |
map<string, unsigned int> m_terms;
|
138 |
map<string, size_t> m_terms;
|
139 |
|
139 |
|
140 |
// m_gterms holds all the terms in m_groups, as a set for quick lookup
|
140 |
// m_gterms holds all the terms in m_groups, as a set for quick lookup
|
141 |
set<string> m_gterms;
|
141 |
set<string> m_gterms;
|
142 |
|
142 |
|
143 |
const HighlightData& m_hdata;
|
143 |
const HighlightData& m_hdata;
|
|
... |
|
... |
212 |
|
212 |
|
213 |
// Find NEAR matches for one group of terms, update highlight map
|
213 |
// Find NEAR matches for one group of terms, update highlight map
|
214 |
bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
214 |
bool TextSplitPTR::matchGroup(unsigned int grpidx)
|
215 |
{
|
215 |
{
|
216 |
const vector<string>& terms = m_hdata.groups[grpidx];
|
216 |
const vector<string>& terms = m_hdata.groups[grpidx];
|
217 |
int window = m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx];
|
217 |
int window = int(m_hdata.groups[grpidx].size() + m_hdata.slacks[grpidx]);
|
218 |
|
218 |
|
219 |
LOGDEB1(("TextSplitPTR::matchGroup:d %d: %s\n", window,
|
219 |
LOGDEB1(("TextSplitPTR::matchGroup:d %d: %s\n", window,
|
220 |
vecStringToString(terms).c_str()));
|
220 |
vecStringToString(terms).c_str()));
|
221 |
|
221 |
|
222 |
// The position lists we are going to work with. We extract them from the
|
222 |
// The position lists we are going to work with. We extract them from the
|
|
... |
|
... |
268 |
int minpos = 0;
|
268 |
int minpos = 0;
|
269 |
// Walk the shortest plist and look for matches
|
269 |
// Walk the shortest plist and look for matches
|
270 |
for (vector<int>::iterator it = plists[0]->begin();
|
270 |
for (vector<int>::iterator it = plists[0]->begin();
|
271 |
it != plists[0]->end(); it++) {
|
271 |
it != plists[0]->end(); it++) {
|
272 |
int pos = *it;
|
272 |
int pos = *it;
|
273 |
int sta = int(10E9), sto = 0;
|
273 |
int sta = INT_MAX, sto = 0;
|
274 |
LOGDEB2(("MatchGroup: Testing at pos %d\n", pos));
|
274 |
LOGDEB2(("MatchGroup: Testing at pos %d\n", pos));
|
275 |
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
275 |
if (do_proximity_test(window,plists, 1, pos, pos, &sta, &sto, minpos)) {
|
276 |
LOGDEB1(("TextSplitPTR::matchGroup: MATCH termpos [%d,%d]\n",
|
276 |
LOGDEB1(("TextSplitPTR::matchGroup: MATCH termpos [%d,%d]\n",
|
277 |
sta, sto));
|
277 |
sta, sto));
|
278 |
// Maybe extend the window by 1st term position, this was not
|
278 |
// Maybe extend the window by 1st term position, this was not
|
|
... |
|
... |
415 |
}
|
415 |
}
|
416 |
|
416 |
|
417 |
// If we still have terms positions, check (byte) position. If
|
417 |
// If we still have terms positions, check (byte) position. If
|
418 |
// we are at or after a term match, mark.
|
418 |
// we are at or after a term match, mark.
|
419 |
if (tPosIt != tPosEnd) {
|
419 |
if (tPosIt != tPosEnd) {
|
420 |
int ibyteidx = chariter.getBpos();
|
420 |
int ibyteidx = int(chariter.getBpos());
|
421 |
if (ibyteidx == tPosIt->offs.first) {
|
421 |
if (ibyteidx == tPosIt->offs.first) {
|
422 |
if (!intag && ibyteidx >= (int)headend) {
|
422 |
if (!intag && ibyteidx >= (int)headend) {
|
423 |
*olit += startMatch(tPosIt->grpidx);
|
423 |
*olit += startMatch((unsigned int)(tPosIt->grpidx));
|
424 |
}
|
424 |
}
|
425 |
inrcltag = 1;
|
425 |
inrcltag = 1;
|
426 |
} else if (ibyteidx == tPosIt->offs.second) {
|
426 |
} else if (ibyteidx == tPosIt->offs.second) {
|
427 |
// Output end of match region tags
|
427 |
// Output end of match region tags
|
428 |
if (!intag && ibyteidx > (int)headend) {
|
428 |
if (!intag && ibyteidx > (int)headend) {
|