Switch to unified view

a/src/common/textsplit.h b/src/common/textsplit.h
1
#ifndef _TEXTSPLIT_H_INCLUDED_
1
#ifndef _TEXTSPLIT_H_INCLUDED_
2
#define _TEXTSPLIT_H_INCLUDED_
2
#define _TEXTSPLIT_H_INCLUDED_
3
/* @(#$Id: textsplit.h,v 1.3 2005-01-24 13:17:58 dockes Exp $  (C) 2004 J.F.Dockes */
3
/* @(#$Id: textsplit.h,v 1.4 2005-02-07 13:17:47 dockes Exp $  (C) 2004 J.F.Dockes */
4
4
5
#include <string>
5
#include <string>
6
7
// Function class whose called for every detected word
8
class TextSplitCB {
9
 public:
10
    virtual ~TextSplitCB() {}
11
    virtual bool takeword(const std::string& term, 
12
            int pos,  // term pos
13
            int bts,      // byte offset of first char in term
14
            int bte      // byte offset of first char after term
15
            ) = 0; 
16
};
6
17
7
/** 
18
/** 
8
 * Split text into words. 
19
 * Split text into words. 
9
 * See comments at top of .cpp for more explanations.
20
 * See comments at top of .cpp for more explanations.
10
 * This uses a callback function. It could be done with an iterator instead,
21
 * This uses a callback function. It could be done with an iterator instead,
11
 * but 'ts much simpler this way...
22
 * but 'ts much simpler this way...
12
 */
23
 */
13
class TextSplit {
24
class TextSplit {
14
 public:
25
    TextSplitCB *cb;
15
    typedef bool (*TermSink)(void *cdata, const std::string & term, int pos);
16
 private:
17
    TermSink termsink;
18
    void *cdata;
19
    int maxWordLength;
26
    int maxWordLength;
20
    bool emitterm(std::string &term, int pos, bool doerase);
27
    bool emitterm(std::string &term, int pos, bool doerase, int, int);
21
 public:
28
 public:
22
    /**
29
    /**
23
     * Constructor: just store callback and client data
30
     * Constructor: just store callback and client data
24
     */
31
     */
25
    TextSplit(TermSink t, void *c) : termsink(t), cdata(c), maxWordLength(40)
32
    TextSplit(TextSplitCB *t) : cb(t), maxWordLength(40) {}
26
    {}
27
    /**
33
    /**
28
     * Split text, emit words and positions.
34
     * Split text, emit words and positions.
29
     */
35
     */
30
    bool text_to_words(const std::string &in);
36
    bool text_to_words(const std::string &in);
31
};
37
};