Parent: [869b57] (diff)

Child: [2a0204] (diff)

Download this file

textsplit.h    34 lines (30 with data), 924 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#ifndef _TEXTSPLIT_H_INCLUDED_
#define _TEXTSPLIT_H_INCLUDED_
/* @(#$Id: textsplit.h,v 1.3 2005-01-24 13:17:58 dockes Exp $ (C) 2004 J.F.Dockes */
#include <string>
/**
* Split text into words.
* See comments at top of .cpp for more explanations.
* This uses a callback function. It could be done with an iterator instead,
* but 'ts much simpler this way...
*/
class TextSplit {
public:
typedef bool (*TermSink)(void *cdata, const std::string & term, int pos);
private:
TermSink termsink;
void *cdata;
int maxWordLength;
bool emitterm(std::string &term, int pos, bool doerase);
public:
/**
* Constructor: just store callback and client data
*/
TextSplit(TermSink t, void *c) : termsink(t), cdata(c), maxWordLength(40)
{}
/**
* Split text, emit words and positions.
*/
bool text_to_words(const std::string &in);
};
#endif /* _TEXTSPLIT_H_INCLUDED_ */