|
a/src/common/textsplit.cpp |
|
b/src/common/textsplit.cpp |
|
... |
|
... |
276 |
}
|
276 |
}
|
277 |
|
277 |
|
278 |
|
278 |
|
279 |
// Generate terms from span. Have to take into account the
|
279 |
// Generate terms from span. Have to take into account the
|
280 |
// flags: ONLYSPANS, NOSPANS, noNumbers
|
280 |
// flags: ONLYSPANS, NOSPANS, noNumbers
|
281 |
bool TextSplit::words_from_span()
|
281 |
bool TextSplit::words_from_span(int bp)
|
282 |
{
|
282 |
{
|
283 |
#if 0
|
283 |
#if 0
|
284 |
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
|
284 |
cerr << "Span: [" << m_span << "] " << " w_i_s size: " <<
|
285 |
m_words_in_span.size() << " : ";
|
285 |
m_words_in_span.size() << " : ";
|
286 |
for (unsigned int i = 0; i < m_words_in_span.size(); i++) {
|
286 |
for (unsigned int i = 0; i < m_words_in_span.size(); i++) {
|
|
... |
|
... |
290 |
}
|
290 |
}
|
291 |
cerr << endl;
|
291 |
cerr << endl;
|
292 |
#endif
|
292 |
#endif
|
293 |
unsigned int spanwords = m_words_in_span.size();
|
293 |
unsigned int spanwords = m_words_in_span.size();
|
294 |
int pos = m_spanpos;
|
294 |
int pos = m_spanpos;
|
|
|
295 |
// Byte position of the span start
|
|
|
296 |
int spboffs = bp - m_span.size();
|
295 |
|
297 |
|
296 |
for (unsigned int i = 0;
|
298 |
for (unsigned int i = 0;
|
297 |
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
|
299 |
i < ((m_flags&TXTS_ONLYSPANS) ? 1 : spanwords);
|
298 |
i++, pos++) {
|
300 |
i++, pos++) {
|
299 |
|
301 |
|
|
... |
|
... |
307 |
//cerr << "i " << i << " j " << j << " deb " << deb <<
|
309 |
//cerr << "i " << i << " j " << j << " deb " << deb <<
|
308 |
// " fin " << fin << endl;
|
310 |
// " fin " << fin << endl;
|
309 |
if (fin - deb > int(m_span.size()))
|
311 |
if (fin - deb > int(m_span.size()))
|
310 |
break;
|
312 |
break;
|
311 |
string word(m_span.substr(deb, fin-deb));
|
313 |
string word(m_span.substr(deb, fin-deb));
|
312 |
if (!emitterm(j != i+1, word, pos, deb, fin))
|
314 |
if (!emitterm(j != i+1, word, pos, spboffs+deb, spboffs+fin))
|
313 |
return false;
|
315 |
return false;
|
314 |
}
|
316 |
}
|
315 |
}
|
317 |
}
|
316 |
return true;
|
318 |
return true;
|
317 |
}
|
319 |
}
|
|
... |
|
... |
383 |
goto breaktrimloop;
|
385 |
goto breaktrimloop;
|
384 |
}
|
386 |
}
|
385 |
}
|
387 |
}
|
386 |
breaktrimloop:
|
388 |
breaktrimloop:
|
387 |
|
389 |
|
388 |
if (!words_from_span()) {
|
390 |
if (!words_from_span(bp)) {
|
389 |
return false;
|
391 |
return false;
|
390 |
}
|
392 |
}
|
391 |
discardspan();
|
393 |
discardspan();
|
392 |
|
394 |
|
393 |
} else {
|
395 |
} else {
|