|
a/src/common/textsplit.cpp |
|
b/src/common/textsplit.cpp |
|
... |
|
... |
22 |
|
22 |
|
23 |
#include <iostream>
|
23 |
#include <iostream>
|
24 |
#include <string>
|
24 |
#include <string>
|
25 |
#include <algorithm>
|
25 |
#include <algorithm>
|
26 |
#include <cstring>
|
26 |
#include <cstring>
|
27 |
#include UNORDERED_SET_INCLUDE
|
27 |
#include <unordered_set>
|
28 |
|
28 |
|
29 |
#include "textsplit.h"
|
29 |
#include "textsplit.h"
|
30 |
#include "log.h"
|
30 |
#include "log.h"
|
31 |
//#define UTF8ITER_CHECK
|
31 |
//#define UTF8ITER_CHECK
|
32 |
#include "utf8iter.h"
|
32 |
#include "utf8iter.h"
|
|
... |
|
... |
60 |
// Non-ascii UTF-8 characters are handled with sets holding all
|
60 |
// Non-ascii UTF-8 characters are handled with sets holding all
|
61 |
// characters with interesting properties. This is far from full-blown
|
61 |
// characters with interesting properties. This is far from full-blown
|
62 |
// management of Unicode properties, but seems to do the job well
|
62 |
// management of Unicode properties, but seems to do the job well
|
63 |
// enough in most common cases
|
63 |
// enough in most common cases
|
64 |
static vector<unsigned int> vpuncblocks;
|
64 |
static vector<unsigned int> vpuncblocks;
|
65 |
static STD_UNORDERED_SET<unsigned int> spunc;
|
65 |
static std::unordered_set<unsigned int> spunc;
|
66 |
static STD_UNORDERED_SET<unsigned int> visiblewhite;
|
66 |
static std::unordered_set<unsigned int> visiblewhite;
|
67 |
static STD_UNORDERED_SET<unsigned int> sskip;
|
67 |
static std::unordered_set<unsigned int> sskip;
|
68 |
|
68 |
|
69 |
class CharClassInit {
|
69 |
class CharClassInit {
|
70 |
public:
|
70 |
public:
|
71 |
CharClassInit()
|
71 |
CharClassInit()
|
72 |
{
|
72 |
{
|