|
a/src/common/textsplit.cpp |
|
b/src/common/textsplit.cpp |
|
... |
|
... |
19 |
|
19 |
|
20 |
#include <assert.h>
|
20 |
#include <assert.h>
|
21 |
|
21 |
|
22 |
#include <iostream>
|
22 |
#include <iostream>
|
23 |
#include <string>
|
23 |
#include <string>
|
24 |
#include <set>
|
24 |
//#include <set>
|
|
|
25 |
#include <tr1/unordered_set>
|
|
|
26 |
using std::tr1::unordered_set;
|
|
|
27 |
|
25 |
#include <cstring>
|
28 |
#include <cstring>
|
26 |
|
29 |
|
27 |
#include "textsplit.h"
|
30 |
#include "textsplit.h"
|
28 |
#include "debuglog.h"
|
31 |
#include "debuglog.h"
|
29 |
//#define UTF8ITER_CHECK
|
32 |
//#define UTF8ITER_CHECK
|
|
... |
|
... |
55 |
|
58 |
|
56 |
// Real UTF-8 characters are handled with sets holding all characters
|
59 |
// Real UTF-8 characters are handled with sets holding all characters
|
57 |
// with interesting properties. This is far from full-blown management
|
60 |
// with interesting properties. This is far from full-blown management
|
58 |
// of Unicode properties, but seems to do the job well enough in most
|
61 |
// of Unicode properties, but seems to do the job well enough in most
|
59 |
// common cases
|
62 |
// common cases
|
60 |
static set<unsigned int> unicign;
|
63 |
static unordered_set<unsigned int> unicign;
|
61 |
static set<unsigned int> visiblewhite;
|
64 |
static unordered_set<unsigned int> visiblewhite;
|
62 |
|
65 |
|
63 |
class CharClassInit {
|
66 |
class CharClassInit {
|
64 |
public:
|
67 |
public:
|
65 |
CharClassInit()
|
68 |
CharClassInit()
|
66 |
{
|
69 |
{
|