Switch to unified view

a/src/common/textsplit.cpp b/src/common/textsplit.cpp
...
...
86
86
87
    char wild[] = "*?[]";
87
    char wild[] = "*?[]";
88
    for (i = 0; i  < strlen(wild); i++)
88
    for (i = 0; i  < strlen(wild); i++)
89
    charclasses[int(wild[i])] = WILD;
89
    charclasses[int(wild[i])] = WILD;
90
90
91
    char special[] = ".@+-,#'\n\r";
91
    char special[] = ".@+-,#'_\n\r";
92
    for (i = 0; i  < strlen(special); i++)
92
    for (i = 0; i  < strlen(special); i++)
93
    charclasses[int(special[i])] = special[i];
93
    charclasses[int(special[i])] = special[i];
94
94
95
    for (i = 0; i < sizeof(uniign) / sizeof(int); i++) {
95
    for (i = 0; i < sizeof(uniign) / sizeof(int); i++) {
96
    unicign.insert(uniign[i]);
96
    unicign.insert(uniign[i]);
...
...
136
// F900..FAFF; CJK Compatibility Ideographs
136
// F900..FAFF; CJK Compatibility Ideographs
137
// FE30..FE4F; CJK Compatibility Forms
137
// FE30..FE4F; CJK Compatibility Forms
138
// FF00..FFEF; Halfwidth and Fullwidth Forms
138
// FF00..FFEF; Halfwidth and Fullwidth Forms
139
// 20000..2A6DF; CJK Unified Ideographs Extension B
139
// 20000..2A6DF; CJK Unified Ideographs Extension B
140
// 2F800..2FA1F; CJK Compatibility Ideographs Supplement
140
// 2F800..2FA1F; CJK Compatibility Ideographs Supplement
141
// Note: the p > 127 test is not necessary, but optimizes away the ascii case
141
#define UNICODE_IS_CJK(p)                       \
142
#define UNICODE_IS_CJK(p)                       \
142
    (((p) >= 0x2E80 && (p) <= 0x2EFF)                                   \
143
    ((p) > 127 &&                         \
143
     || ((p) >= 0x3000 && (p) <= 0x9FFF)                                \
144
     (((p) >= 0x2E80 && (p) <= 0x2EFF) ||             \
144
     || ((p) >= 0xA700 && (p) <= 0xA71F)                                \
145
      ((p) >= 0x3000 && (p) <= 0x9FFF) ||             \
145
     || ((p) >= 0xAC00 && (p) <= 0xD7AF)                                \
146
      ((p) >= 0xA700 && (p) <= 0xA71F) ||             \
146
     || ((p) >= 0xF900 && (p) <= 0xFAFF)                                \
147
      ((p) >= 0xAC00 && (p) <= 0xD7AF) ||             \
147
     || ((p) >= 0xFE30 && (p) <= 0xFE4F)                                \
148
      ((p) >= 0xF900 && (p) <= 0xFAFF) ||             \
148
     || ((p) >= 0xFF00 && (p) <= 0xFFEF)                                \
149
      ((p) >= 0xFE30 && (p) <= 0xFE4F) ||             \
149
     || ((p) >= 0x20000 && (p) <= 0x2A6DF)                              \
150
      ((p) >= 0xFF00 && (p) <= 0xFFEF) ||             \
151
      ((p) >= 0x20000 && (p) <= 0x2A6DF) ||               \
150
     || ((p) >= 0x2F800 && (p) <= 0x2FA1F))
152
      ((p) >= 0x2F800 && (p) <= 0x2FA1F)))
151
153
152
bool TextSplit::isCJK(int c)
154
bool TextSplit::isCJK(int c)
153
{
155
{
154
    return UNICODE_IS_CJK(c);
156
    return UNICODE_IS_CJK(c);
155
}
157
}
...
...
383
            return false;
385
            return false;
384
        m_inNumber = false;
386
        m_inNumber = false;
385
        }
387
        }
386
        m_wordStart += it.appendchartostring(m_span);
388
        m_wordStart += it.appendchartostring(m_span);
387
        break;
389
        break;
390
  case '_':
391
      if (m_wordLen) {
392
      if (!doemit(false, it.getBpos()))
393
          return false;
394
      m_inNumber = false;
395
      }
396
      m_wordStart += it.appendchartostring(m_span);
397
      break;
388
    case '\'':
398
    case '\'':
389
        // If in word, potential span: o'brien, else, this is more 
399
        // If in word, potential span: o'brien, else, this is more 
390
        // whitespace
400
        // whitespace
391
        if (m_wordLen) {
401
        if (m_wordLen) {
392
        if (!doemit(false, it.getBpos()))
402
        if (!doemit(false, it.getBpos()))