Switch to unified view

a/src/common/textsplit.cpp b/src/common/textsplit.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.21 2006-04-11 06:49:45 dockes Exp $ (C) 2004 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: textsplit.cpp,v 1.22 2006-04-25 08:17:36 dockes Exp $ (C) 2004 J.F.Dockes";
3
#endif
3
#endif
4
/*
4
/*
5
 *   This program is free software; you can redistribute it and/or modify
5
 *   This program is free software; you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   the Free Software Foundation; either version 2 of the License, or
...
...
201
 * handled specially so that special cases, ie, c++ and dockes@okyz.com etc, 
201
 * handled specially so that special cases, ie, c++ and dockes@okyz.com etc, 
202
 * are handled properly,
202
 * are handled properly,
203
 */
203
 */
204
bool TextSplit::text_to_words(const string &in)
204
bool TextSplit::text_to_words(const string &in)
205
{
205
{
206
    LOGDEB2(("TextSplit::text_to_words: cb %p\n", cb));
206
    LOGDEB2(("TextSplit::text_to_words: cb %p in [%s]\n", cb, 
207
      in.substr(0,50).c_str()));
207
208
208
    setcharclasses();
209
    setcharclasses();
209
210
210
    span.erase();
211
    span.erase();
211
    word.erase(); // Current word: no punctuation at all in there
212
    word.erase(); // Current word: no punctuation at all in there
212
    number = false;
213
    number = false;
213
    wordpos = spanpos = charpos = 0;
214
    prevpos = wordpos = spanpos = charpos = 0;
215
    prevterm.erase();
214
216
215
    Utf8Iter it(in);
217
    Utf8Iter it(in);
216
218
217
    for (; !it.eof(); it++, charpos++) {
219
    for (; !it.eof(); it++, charpos++) {
218
    unsigned int c = *it;
220
    unsigned int c = *it;