|
a/src/common/textsplit.h |
|
b/src/common/textsplit.h |
|
... |
|
... |
67 |
{
|
67 |
{
|
68 |
}
|
68 |
}
|
69 |
virtual ~TextSplit() {}
|
69 |
virtual ~TextSplit() {}
|
70 |
|
70 |
|
71 |
/** Split text, emit words and positions. */
|
71 |
/** Split text, emit words and positions. */
|
72 |
bool text_to_words(const string &in);
|
72 |
virtual bool text_to_words(const string &in);
|
73 |
|
73 |
|
74 |
/** Process one output word: to be implemented by the actual user class */
|
74 |
/** Process one output word: to be implemented by the actual user class */
|
75 |
virtual bool takeword(const string& term,
|
75 |
virtual bool takeword(const string& term,
|
76 |
int pos, // term pos
|
76 |
int pos, // term pos
|
77 |
int bts, // byte offset of first char in term
|
77 |
int bts, // byte offset of first char in term
|
|
... |
|
... |
126 |
// This processes cjk text:
|
126 |
// This processes cjk text:
|
127 |
bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
|
127 |
bool cjk_to_words(Utf8Iter *it, unsigned int *cp);
|
128 |
|
128 |
|
129 |
bool emitterm(bool isspan, string &term, int pos, int bs, int be);
|
129 |
bool emitterm(bool isspan, string &term, int pos, int bs, int be);
|
130 |
bool doemit(bool spanerase, int bp, bool spanemit=false);
|
130 |
bool doemit(bool spanerase, int bp, bool spanemit=false);
|
|
|
131 |
void discardspan();
|
131 |
};
|
132 |
};
|
132 |
|
133 |
|
133 |
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|
134 |
#endif /* _TEXTSPLIT_H_INCLUDED_ */
|