|
a/src/common/textsplit.cpp |
|
b/src/common/textsplit.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.21 2006-04-11 06:49:45 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.22 2006-04-25 08:17:36 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
201 |
* handled specially so that special cases, ie, c++ and dockes@okyz.com etc,
|
201 |
* handled specially so that special cases, ie, c++ and dockes@okyz.com etc,
|
202 |
* are handled properly,
|
202 |
* are handled properly,
|
203 |
*/
|
203 |
*/
|
204 |
bool TextSplit::text_to_words(const string &in)
|
204 |
bool TextSplit::text_to_words(const string &in)
|
205 |
{
|
205 |
{
|
206 |
LOGDEB2(("TextSplit::text_to_words: cb %p\n", cb));
|
206 |
LOGDEB2(("TextSplit::text_to_words: cb %p in [%s]\n", cb,
|
|
|
207 |
in.substr(0,50).c_str()));
|
207 |
|
208 |
|
208 |
setcharclasses();
|
209 |
setcharclasses();
|
209 |
|
210 |
|
210 |
span.erase();
|
211 |
span.erase();
|
211 |
word.erase(); // Current word: no punctuation at all in there
|
212 |
word.erase(); // Current word: no punctuation at all in there
|
212 |
number = false;
|
213 |
number = false;
|
213 |
wordpos = spanpos = charpos = 0;
|
214 |
prevpos = wordpos = spanpos = charpos = 0;
|
|
|
215 |
prevterm.erase();
|
214 |
|
216 |
|
215 |
Utf8Iter it(in);
|
217 |
Utf8Iter it(in);
|
216 |
|
218 |
|
217 |
for (; !it.eof(); it++, charpos++) {
|
219 |
for (; !it.eof(); it++, charpos++) {
|
218 |
unsigned int c = *it;
|
220 |
unsigned int c = *it;
|