Switch to unified view

a/src/common/textsplit.h b/src/common/textsplit.h
...
...
14
 *   Free Software Foundation, Inc.,
14
 *   Free Software Foundation, Inc.,
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16
 */
16
 */
17
#ifndef _TEXTSPLIT_H_INCLUDED_
17
#ifndef _TEXTSPLIT_H_INCLUDED_
18
#define _TEXTSPLIT_H_INCLUDED_
18
#define _TEXTSPLIT_H_INCLUDED_
19
/* @(#$Id: textsplit.h,v 1.19 2007-10-02 11:39:08 dockes Exp $  (C) 2004 J.F.Dockes */
19
/* @(#$Id: textsplit.h,v 1.20 2007-10-04 12:21:52 dockes Exp $  (C) 2004 J.F.Dockes */
20
20
21
#include <string>
21
#include <string>
22
#ifndef NO_NAMESPACES
22
#ifndef NO_NAMESPACES
23
using std::string;
23
using std::string;
24
#endif
24
#endif
...
...
36
              ) = 0; 
36
              ) = 0; 
37
};
37
};
38
38
39
class Utf8Iter;
39
class Utf8Iter;
40
40
41
41
/** 
42
/** 
42
 * Split text into words. 
43
 * Split text into words. 
43
 * See comments at top of .cpp for more explanations.
44
 * See comments at top of .cpp for more explanations.
44
 * This uses a callback function. It could be done with an iterator instead,
45
 * This uses a callback function. It could be done with an iterator instead,
45
 * but 'ts much simpler this way...
46
 * but 'ts much simpler this way...
46
 */
47
 */
47
class TextSplit {
48
class TextSplit {
48
public:
49
public:
49
    // Should we activate special processing of Chinese characters ? This
50
    // Should we activate special processing of Chinese characters ? This
50
    // needs a little more cpu, so it can be turned off globally.
51
    // needs a little more cpu, so it can be turned off globally.
51
    static bool t_processCJK;
52
    static bool o_processCJK;
52
    static void cjkProcessing(bool onoff) {t_processCJK = onoff;}
53
    static unsigned int  o_CJKNgramLen;
54
    static const unsigned int o_CJKMaxNgramLen =  5;
55
    static void cjkProcessing(bool onoff, unsigned int ngramlen = 2) 
56
    {
57
  o_processCJK = onoff;
58
  o_CJKNgramLen = ngramlen <= o_CJKMaxNgramLen ? 
59
      ngramlen : o_CJKMaxNgramLen;
60
    }
53
61
54
    enum Flags {TXTS_NONE = 0, 
62
    enum Flags {TXTS_NONE = 0, 
55
        TXTS_ONLYSPANS = 1,  // Only return maximum spans (a@b.com) 
63
        TXTS_ONLYSPANS = 1,  // Only return maximum spans (a@b.com) 
56
        TXTS_NOSPANS = 2,  // Only return atomic words (a, b, com)
64
        TXTS_NOSPANS = 2,  // Only return atomic words (a, b, com)
57
        TXTS_KEEPWILD = 4 // Handle wildcards as letters
65
        TXTS_KEEPWILD = 4 // Handle wildcards as letters