|
a/src/common/textsplit.h |
|
b/src/common/textsplit.h |
|
... |
|
... |
35 |
* but 'ts much simpler this way...
|
35 |
* but 'ts much simpler this way...
|
36 |
*/
|
36 |
*/
|
37 |
class TextSplit {
|
37 |
class TextSplit {
|
38 |
public:
|
38 |
public:
|
39 |
// Should we activate special processing of Chinese characters ? This
|
39 |
// Should we activate special processing of Chinese characters ? This
|
40 |
// needs a little more cpu, so it can be turned off globally.
|
40 |
// needs a little more cpu, so it can be turned off globally. This is set
|
|
|
41 |
// by rclconfig, changing it means reindexing
|
41 |
static bool o_processCJK;
|
42 |
static bool o_processCJK;
|
42 |
static unsigned int o_CJKNgramLen;
|
43 |
static unsigned int o_CJKNgramLen;
|
43 |
static const unsigned int o_CJKMaxNgramLen = 5;
|
44 |
static const unsigned int o_CJKMaxNgramLen = 5;
|
44 |
static void cjkProcessing(bool onoff, unsigned int ngramlen = 2)
|
45 |
static void cjkProcessing(bool onoff, unsigned int ngramlen = 2)
|
45 |
{
|
46 |
{
|
46 |
o_processCJK = onoff;
|
47 |
o_processCJK = onoff;
|
47 |
o_CJKNgramLen = ngramlen <= o_CJKMaxNgramLen ?
|
48 |
o_CJKNgramLen = ngramlen <= o_CJKMaxNgramLen ?
|
48 |
ngramlen : o_CJKMaxNgramLen;
|
49 |
ngramlen : o_CJKMaxNgramLen;
|
|
|
50 |
}
|
|
|
51 |
|
|
|
52 |
// Are we indexing numbers ? Set by rclconfig. Change needs reindex
|
|
|
53 |
static bool o_noNumbers;
|
|
|
54 |
static void noNumbers()
|
|
|
55 |
{
|
|
|
56 |
o_noNumbers = true;
|
49 |
}
|
57 |
}
|
50 |
|
58 |
|
51 |
enum Flags {TXTS_NONE = 0,
|
59 |
enum Flags {TXTS_NONE = 0,
|
52 |
TXTS_ONLYSPANS = 1, // Only return maximum spans (a@b.com)
|
60 |
TXTS_ONLYSPANS = 1, // Only return maximum spans (a@b.com)
|
53 |
TXTS_NOSPANS = 2, // Only return atomic words (a, b, com)
|
61 |
TXTS_NOSPANS = 2, // Only return atomic words (a, b, com)
|