Switch to unified view

a/src/common/textsplit.h b/src/common/textsplit.h
...
...
14
 *   Free Software Foundation, Inc.,
14
 *   Free Software Foundation, Inc.,
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16
 */
16
 */
17
#ifndef _TEXTSPLIT_H_INCLUDED_
17
#ifndef _TEXTSPLIT_H_INCLUDED_
18
#define _TEXTSPLIT_H_INCLUDED_
18
#define _TEXTSPLIT_H_INCLUDED_
19
/* @(#$Id: textsplit.h,v 1.18 2007-09-20 08:45:05 dockes Exp $  (C) 2004 J.F.Dockes */
19
/* @(#$Id: textsplit.h,v 1.19 2007-10-02 11:39:08 dockes Exp $  (C) 2004 J.F.Dockes */
20
20
21
#include <string>
21
#include <string>
22
#ifndef NO_NAMESPACES
22
#ifndef NO_NAMESPACES
23
using std::string;
23
using std::string;
24
#endif
24
#endif
...
...
44
 * This uses a callback function. It could be done with an iterator instead,
44
 * This uses a callback function. It could be done with an iterator instead,
45
 * but 'ts much simpler this way...
45
 * but 'ts much simpler this way...
46
 */
46
 */
47
class TextSplit {
47
class TextSplit {
48
public:
48
public:
49
    // Should we activate special processing of Chinese characters ? This
50
    // needs a little more cpu, so it can be turned off globally.
51
    static bool t_processCJK;
52
    static void cjkProcessing(bool onoff) {t_processCJK = onoff;}
53
49
    enum Flags {TXTS_NONE = 0, 
54
    enum Flags {TXTS_NONE = 0, 
50
        TXTS_ONLYSPANS = 1,  // Only return maximum spans (a@b.com) 
55
        TXTS_ONLYSPANS = 1,  // Only return maximum spans (a@b.com) 
51
        TXTS_NOSPANS = 2,  // Only return atomic words (a, b, com)
56
        TXTS_NOSPANS = 2,  // Only return atomic words (a, b, com)
52
        TXTS_KEEPWILD = 4, // Handle wildcards as letters
57
        TXTS_KEEPWILD = 4 // Handle wildcards as letters
53
      TXTS_NOCJK = 8     // CJK special processing
54
    };
58
    };
55
59
56
    /**
60
    /**
57
     * Constructor: just store callback object
61
     * Constructor: just store callback object
58
     */
62
     */
59
    TextSplit(TextSplitCB *t, Flags flags = Flags(TXTS_NONE))
63
    TextSplit(TextSplitCB *t, Flags flags = Flags(TXTS_NONE))
60
    : m_flags(flags), m_cb(t), m_maxWordLength(40), 
64
    : m_flags(flags), m_cb(t), m_maxWordLength(40), 
61
    m_nocjk((m_flags & TXTS_NOCJK) != 0),
62
      m_prevpos(-1)
65
      m_prevpos(-1)
63
    {
66
    {
64
    }
67
    }
65
68
66
    /**
69
    /**
...
...
74
77
75
private:
78
private:
76
    Flags         m_flags;
79
    Flags         m_flags;
77
    TextSplitCB  *m_cb;
80
    TextSplitCB  *m_cb;
78
    int           m_maxWordLength;
81
    int           m_maxWordLength;
79
    int           m_nocjk;
80
82
81
    // Current span. Might be jf.dockes@wanadoo.f
83
    // Current span. Might be jf.dockes@wanadoo.f
82
    string        m_span; 
84
    string        m_span; 
83
85
84
    // Current word: no punctuation at all in there. Byte offset
86
    // Current word: no punctuation at all in there. Byte offset