Switch to unified view

a/src/utils/utf8iter.h b/src/utils/utf8iter.h
...
...
14
 *   Free Software Foundation, Inc.,
14
 *   Free Software Foundation, Inc.,
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16
 */
16
 */
17
#ifndef _UTF8ITER_H_INCLUDED_
17
#ifndef _UTF8ITER_H_INCLUDED_
18
#define _UTF8ITER_H_INCLUDED_
18
#define _UTF8ITER_H_INCLUDED_
19
/* @(#$Id: utf8iter.h,v 1.6 2006-01-30 11:15:28 dockes Exp $  (C) 2004 J.F.Dockes */
19
/* @(#$Id: utf8iter.h,v 1.7 2006-11-17 12:31:34 dockes Exp $  (C) 2004 J.F.Dockes */
20
20
21
/** 
21
/** 
22
 * A small helper class to iterate over utf8 strings. This is not an
22
 * A small helper class to iterate over utf8 strings. This is not an
23
 * STL iterator and this is not well designed, just convenient for
23
 * STL iterator and this is not well designed, just convenient for
24
   some specific uses
24
   some specific uses
25
 */
25
 */
26
class Utf8Iter {
26
class Utf8Iter {
27
    unsigned int cl; // Char length at current position if known
27
    unsigned int cl; // Char length at current position if known
28
    const string &s; // String we're working with
28
    const string &s; // String we're working with
29
    string::size_type pos; // Current position in string
29
    string::size_type pos; // Current position in string
30
    bool bad; // Status
31
    unsigned int m_charpos; // Current character posiiton
30
    unsigned int m_charpos; // Current character posiiton
32
31
33
    // Get character byte length at specified position
32
    // Get character byte length at specified position
34
    inline int get_cl(string::size_type p) const {
33
    inline int get_cl(string::size_type p) const {
35
    unsigned int z = (unsigned char)s[p];
34
    unsigned int z = (unsigned char)s[p];
...
...
53
    return p != string::npos && l > 0 && p + l <= s.length();
52
    return p != string::npos && l > 0 && p + l <= s.length();
54
    }
53
    }
55
    // Update current char length in object state. Assumes pos is inside string
54
    // Update current char length in object state. Assumes pos is inside string
56
    inline int compute_cl() {
55
    inline int compute_cl() {
57
    cl = 0;
56
    cl = 0;
58
  if (bad)
59
      return -1;
60
    cl = get_cl(pos);
57
    cl = get_cl(pos);
61
    if (!poslok(pos, cl)) {
58
    if (!poslok(pos, cl)) {
62
      bad = true;
63
        pos = s.length();
59
        pos = s.length();
64
        cl = 0;
60
        cl = 0;
65
        return -1;
61
        return -1;
66
    }
62
    }
67
    return 0;
63
    return 0;
...
...
94
        return (unsigned int)-1;
90
        return (unsigned int)-1;
95
    }
91
    }
96
    }
92
    }
97
 public:
93
 public:
98
    Utf8Iter(const string &in) 
94
    Utf8Iter(const string &in) 
99
    : cl(0), s(in), pos(0), bad(false), m_charpos(0) {}
95
    : cl(0), s(in), pos(0), m_charpos(0) 
96
  {
97
      // Ensure state is ok if appendchartostring is called at once
98
      compute_cl();
99
  }
100
100
101
    void rewind() {
101
    void rewind() {
102
    cl=0; pos=0; bad=false; m_charpos=0;
102
    cl=0; pos=0; m_charpos=0;
103
    }
103
    }
104
    /** operator* returns the ucs4 value as a machine integer*/
104
    /** operator* returns the ucs4 value as a machine integer*/
105
    unsigned int operator*() {
105
    unsigned int operator*() {
106
    if (!cl && compute_cl() < 0)
106
    if (!cl && compute_cl() < 0)
107
        return (unsigned int)-1;
107
        return (unsigned int)-1;
108
    unsigned int val = getvalueat(pos, cl);
108
    unsigned int val = getvalueat(pos, cl);
109
    if (val == (unsigned int)-1) {
109
    if (val == (unsigned int)-1) {
110
      bad = true;
111
        pos = s.length();
110
        pos = s.length();
112
        cl = 0;
111
        cl = 0;
113
    }
112
    }
114
    return val;
113
    return val;
115
    }
114
    }
...
...
135
    return (unsigned int)-1;
134
    return (unsigned int)-1;
136
    }
135
    }
137
136
138
    /** Set current position before next utf-8 character */
137
    /** Set current position before next utf-8 character */
139
    string::size_type operator++(int) {
138
    string::size_type operator++(int) {
140
    if (bad || (!cl && compute_cl() < 0)) {
139
    if (!cl && compute_cl() < 0) {
141
        return pos = string::npos;
140
        return pos = string::npos;
142
    }
141
    }
143
    pos += cl;
142
    pos += cl;
144
    m_charpos++;
143
    m_charpos++;
145
    cl = 0;
144
    cl = 0;
146
    return pos;
145
    return pos;
147
    }
146
    }
148
147
    /** This needs to be fast. No error checking. */
149
    bool appendchartostring(string &out) {
148
    void appendchartostring(string &out) {
150
  if (bad || (!cl && compute_cl() < 0)) {
149
  out.append(&s[pos], cl);
151
      return false;
152
  }
153
  out += s.substr(pos, cl);
154
  return true;
155
    }
150
    }
156
    operator string() {
151
    operator string() {
157
    if (bad || (!cl && compute_cl() < 0)) {
152
    if (!cl && compute_cl() < 0) {
158
        return std::string("");
153
        return std::string("");
159
    }
154
    }
160
    return s.substr(pos, cl);
155
    return s.substr(pos, cl);
161
    }
156
    }
162
    bool eof() {
157
    bool eof() {
163
    // Note: we always ensure that pos == s.length() when setting bad to 
158
    // Note: we always ensure that pos == s.length() when setting bad to 
164
    // true
159
    // true
165
    return pos == s.length();
160
    return pos == s.length();
166
    }
161
    }
167
    bool error() {
162
    bool error() {
168
  return bad;
163
  return compute_cl() < 0;
169
    }
164
    }
170
    string::size_type getBpos() const {
165
    string::size_type getBpos() const {
171
    return pos;
166
    return pos;
172
    }
167
    }
173
    string::size_type getCpos() const {
168
    string::size_type getCpos() const {