|
a/src/common/textsplit.cpp |
|
b/src/common/textsplit.cpp |
|
... |
|
... |
466 |
return true;
|
466 |
return true;
|
467 |
}
|
467 |
}
|
468 |
|
468 |
|
469 |
void TextSplit::discardspan()
|
469 |
void TextSplit::discardspan()
|
470 |
{
|
470 |
{
|
|
|
471 |
m_span.clear();
|
471 |
m_words_in_span.clear();
|
472 |
m_words_in_span.clear();
|
472 |
m_span.erase();
|
|
|
473 |
m_spanpos = m_wordpos;
|
473 |
m_spanpos = m_wordpos;
|
474 |
m_wordStart = 0;
|
474 |
m_wordStart = 0;
|
475 |
m_wordLen = m_wordChars = 0;
|
475 |
m_wordLen = m_wordChars = 0;
|
476 |
}
|
476 |
}
|
477 |
|
477 |
|
|
... |
|
... |
511 |
" [" << in.substr(0,50) << "]\n");
|
511 |
" [" << in.substr(0,50) << "]\n");
|
512 |
|
512 |
|
513 |
if (in.empty())
|
513 |
if (in.empty())
|
514 |
return true;
|
514 |
return true;
|
515 |
|
515 |
|
516 |
m_span.erase();
|
516 |
// Reset the data members relative to splitting state
|
517 |
m_inNumber = false;
|
517 |
clearsplitstate();
|
518 |
m_wordStart = m_wordLen = m_wordChars = m_prevpos = m_prevlen = m_wordpos
|
518 |
|
519 |
= m_spanpos = 0;
|
|
|
520 |
bool pagepending = false;
|
519 |
bool pagepending = false;
|
521 |
bool softhyphenpending = false;
|
520 |
bool softhyphenpending = false;
|
522 |
|
521 |
|
523 |
// Running count of non-alphanum chars. Reset when we see one;
|
522 |
// Running count of non-alphanum chars. Reset when we see one;
|
524 |
int nonalnumcnt = 0;
|
523 |
int nonalnumcnt = 0;
|
|
... |
|
... |
933 |
boffs[0], btend)) {
|
932 |
boffs[0], btend)) {
|
934 |
return false;
|
933 |
return false;
|
935 |
}
|
934 |
}
|
936 |
}
|
935 |
}
|
937 |
|
936 |
|
938 |
m_span.erase();
|
937 |
// Reset state, saving term position, and return the found non-cjk
|
939 |
m_inNumber = false;
|
938 |
// unicode character value. The current input byte offset is kept
|
940 |
m_wordStart = m_wordLen = m_wordChars = m_prevpos = m_prevlen = 0;
|
939 |
// in the utf8Iter
|
|
|
940 |
int pos = m_wordpos;
|
|
|
941 |
clearsplitstate();
|
941 |
m_spanpos = m_wordpos;
|
942 |
m_spanpos = m_wordpos = pos;
|
942 |
*cp = c;
|
943 |
*cp = c;
|
943 |
return true;
|
944 |
return true;
|
944 |
}
|
945 |
}
|
945 |
|
946 |
|
946 |
// Specialization for countWords
|
947 |
// Specialization for countWords
|