Switch to unified view

a/src/common/textsplit.h b/src/common/textsplit.h
...
...
14
 *   Free Software Foundation, Inc.,
14
 *   Free Software Foundation, Inc.,
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16
 */
16
 */
17
#ifndef _TEXTSPLIT_H_INCLUDED_
17
#ifndef _TEXTSPLIT_H_INCLUDED_
18
#define _TEXTSPLIT_H_INCLUDED_
18
#define _TEXTSPLIT_H_INCLUDED_
19
/* @(#$Id: textsplit.h,v 1.14 2006-11-20 11:17:53 dockes Exp $  (C) 2004 J.F.Dockes */
19
/* @(#$Id: textsplit.h,v 1.15 2006-12-08 07:11:17 dockes Exp $  (C) 2004 J.F.Dockes */
20
20
21
#include <string>
21
#include <string>
22
#ifndef NO_NAMESPACES
22
#ifndef NO_NAMESPACES
23
using std::string;
23
using std::string;
24
#endif
24
#endif
25
25
26
/**
26
/**
27
 * Function class whose takeword method is called for every detected word while * splitting text.
27
 * Function class whose takeword method is called for every detected word while * splitting text.
28
 */
28
 */
29
class TextSplitCB {
29
class TextSplitCB {
30
 public:
30
public:
31
    virtual ~TextSplitCB() {}
31
    virtual ~TextSplitCB() {}
32
    virtual bool takeword(const std::string& term, 
32
    virtual bool takeword(const string& term, 
33
              int pos,  // term pos
33
              int pos,  // term pos
34
              int bts,  // byte offset of first char in term
34
              int bts,  // byte offset of first char in term
35
              int bte   // byte offset of first char after term
35
              int bte   // byte offset of first char after term
36
              ) = 0; 
36
              ) = 0; 
37
};
37
};
...
...
41
 * See comments at top of .cpp for more explanations.
41
 * See comments at top of .cpp for more explanations.
42
 * This uses a callback function. It could be done with an iterator instead,
42
 * This uses a callback function. It could be done with an iterator instead,
43
 * but 'ts much simpler this way...
43
 * but 'ts much simpler this way...
44
 */
44
 */
45
class TextSplit {
45
class TextSplit {
46
 public:
46
public:
47
    enum Flags {TXTS_NONE = 0, TXTS_ONLYSPANS = 1, TXTS_NOSPANS = 2};
47
    enum Flags {TXTS_NONE = 0, TXTS_ONLYSPANS = 1, TXTS_NOSPANS = 2};
48
    /**
48
    /**
49
     * Constructor: just store callback object
49
     * Constructor: just store callback object
50
     */
50
     */
51
    TextSplit(TextSplitCB *t, Flags flags = TXTS_NONE) 
51
    TextSplit(TextSplitCB *t, Flags flags = TXTS_NONE) 
52
    : m_flags(flags), cb(t), maxWordLength(40), prevpos(-1) {}
52
    : m_flags(flags), cb(t), maxWordLength(40), prevpos(-1) {}
53
    /**
53
    /**
54
     * Split text, emit words and positions.
54
     * Split text, emit words and positions.
55
     */
55
     */
56
    bool text_to_words(const std::string &in);
56
    bool text_to_words(const string &in);
57
57
58
    // Utility functions : these does not need the user to setup a callback 
59
    // etc.
60
    static int countWords(const string &in, Flags flgs = TXTS_ONLYSPANS);
61
58
 private:
62
private:
59
    Flags m_flags;
63
    Flags m_flags;
60
    TextSplitCB *cb;
64
    TextSplitCB *cb;
61
    int maxWordLength;
65
    int maxWordLength;
62
66
63
    string span; // Current span. Might be jf.dockes@wanadoo.f
67
    string span; // Current span. Might be jf.dockes@wanadoo.f
...
...
70
    // It may happen that our cleanup would result in emitting the
74
    // It may happen that our cleanup would result in emitting the
71
    // same term twice. We try to avoid this
75
    // same term twice. We try to avoid this
72
    int prevpos;
76
    int prevpos;
73
    unsigned int prevlen;
77
    unsigned int prevlen;
74
78
75
    bool emitterm(bool isspan, std::string &term, int pos, int bs, int be);
79
    bool emitterm(bool isspan, string &term, int pos, int bs, int be);
76
    bool doemit(bool spanerase, int bp);
80
    bool doemit(bool spanerase, int bp);
81
77
};
82
};
78
83
84
79
#endif /* _TEXTSPLIT_H_INCLUDED_ */
85
#endif /* _TEXTSPLIT_H_INCLUDED_ */