Switch to unified view

a b/cfgui/picoxml.h
1
/* Copyright (C) 2016 J.F.Dockes
2
 *   This program is free software; you can redistribute it and/or modify
3
 *   it under the terms of the GNU General Public License as published by
4
 *   the Free Software Foundation; either version 2 of the License, or
5
 *   (at your option) any later version.
6
 *
7
 *   This program is distributed in the hope that it will be useful,
8
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
 *   GNU General Public License for more details.
11
 *
12
 *   You should have received a copy of the GNU General Public License
13
 *   along with this program; if not, write to the
14
 *   Free Software Foundation, Inc.,
15
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16
 */
17
18
#ifndef _PICOXML_H_INCLUDED_
19
#define _PICOXML_H_INCLUDED_
20
21
/** 
22
 * PicoXMLParser: a single include file parser for an XML-like, but
23
 * restricted language, adequate for config files, not for arbitrary
24
 * externally generated data.
25
 * 
26
 *  - The code depends on nothing but the "classical" C++ standard
27
 *    library (c++11 not necessary).
28
 *  - The input to the parser is a single c++ string. Does not deal with
29
 *    input in several pieces or files.
30
 *  - SAX mode only. You have access to the tag stack. I've always
31
 *    found DOM mode less usable.
32
 *  - Checks for proper tag nesting and not much else.
33
 *  - ! No CDATA
34
 *  - ! Attributes should really really not contain XML special chars.
35
 *  - Entity decoding is left as an exercise to the user.
36
 *
37
 * A typical input would be like the following (you can add XML
38
 * declarations, whitespace and newlines to taste).
39
 *
40
 * <top>top chrs1<sub attr="attrval">sub chrs</sub>top chrs2 <emptyelt /></top>
41
 *
42
 * Usage: subclass PicoXMLParser, overriding the methods in the
43
 *  "protected:" section (look there for more details), call the
44
 * constructor with your input, then call parse().
45
 */
46
47
#include <string>
48
#include <vector>
49
#include <map>
50
#include <sstream>
51
#include <iostream>
52
53
class PicoXMLParser {
54
public:
55
    PicoXMLParser(const std::string& input)
56
        : m_in(input), m_pos(0) {
57
    }
58
    virtual ~PicoXMLParser() { }
59
60
    virtual bool parse() {
61
        // skip initial whitespace and XML decl. On success, returns with
62
        // current pos on first tag '<'
63
        if (!skipDecl()) {
64
            return false;
65
        }
66
        if (nomore()) {
67
            // empty file
68
            return true;
69
        }
70
        
71
        for (;;) {
72
            // Current char is '<' and the next char is not '?'
73
            //std::cerr<< "m_pos "<< m_pos<<" char "<< m_in[m_pos] << std::endl;
74
            m_pos++;
75
            if (nomore()) {
76
                m_reason << "EOF within tag";
77
                return false;
78
            }
79
            std::string::size_type spos = m_pos;
80
            int isendtag = m_in[m_pos] == '/' ? 1 : 0;
81
82
            skipStr(">");
83
            if (m_pos == std::string::npos || m_pos <= spos + 1) {
84
                m_reason << "Empty tag or EOF inside tag. pos " << spos;
85
                return false;
86
            }
87
88
            int emptyel = m_in[m_pos-2] == '/' ? 1 : 0;
89
            if (emptyel && isendtag) {
90
                m_reason << "Bad tag </xx/> at cpos " << spos;
91
                return false;
92
            }
93
                    
94
            std::string tag =
95
                m_in.substr(spos + isendtag,
96
                            m_pos - (spos + 1 + isendtag + emptyel));
97
            //std::cerr << "TAG NAME [" << tag << "]\n";
98
            trimtag(tag);
99
            std::map<std::string, std::string> attrs;
100
            if (!parseattrs(tag, attrs)) {
101
                return false;
102
            }
103
            if (isendtag) {
104
                if (m_tagstack.empty() || tag.compare(m_tagstack.back())) {
105
                    m_reason << "Closing not open tag " << tag <<
106
                        " at cpos " << m_pos;
107
                    return false;
108
                }
109
                m_tagstack.pop_back();
110
                endElement(tag);
111
            } else {
112
                startElement(tag, attrs);
113
                m_tagstack.push_back(tag);
114
                if (emptyel) {
115
                    m_tagstack.pop_back();
116
                    endElement(tag);
117
                }
118
            }
119
            spos = m_pos;
120
            m_pos = m_in.find("<", m_pos);
121
            if (nomore()) {
122
                if (!m_tagstack.empty()) {
123
                    m_reason << "EOF hit inside open element";
124
                    return false;
125
                }
126
                return true;
127
            }
128
            if (m_pos != spos) {
129
                characterData(m_in.substr(spos, m_pos - spos));
130
            }
131
        }
132
        return false;
133
    }
134
135
    virtual std::string getReason() {
136
        return m_reason.str();
137
    }
138
        
139
protected:
140
141
    /* Methods to be overriden */
142
143
    /** 
144
     * Called when seeing an opening tag.
145
     * @param tagname the tag name 
146
     * @param attrs a map of attribute name/value pairs
147
     */
148
    virtual void startElement(const std::string& /*tagname*/,
149
                              const std::map<std::string, std::string>&
150
                              /* attrs */) {
151
    }
152
153
    /**
154
     * Called when closing a tag. You should probably have been
155
     * accumulating text and stuff since the tag opening.
156
     * @param tagname the tag name.
157
     */
158
    virtual void endElement(const std::string& /*tagname*/) {}
159
160
    /*
161
     * Called when we see non-tag data.
162
     * @param data the data.
163
     */
164
    virtual void characterData(const std::string& /*data*/) {}
165
166
    /*
167
     * Gives access to the current path in the tree. Attributes are
168
     * not kept in there though, you'll have to do this yourself.
169
     * @return a const ref to a vector of tag names.
170
     */
171
    virtual const std::vector<std::string>& tagStack() {
172
        return m_tagstack;
173
    }
174
175
176
private:
177
178
    const std::string& m_in;
179
    std::string::size_type m_pos;
180
    std::stringstream m_reason;
181
    std::vector<std::string> m_tagstack;
182
    
183
    bool nomore() const {
184
        return m_pos == std::string::npos || m_pos == m_in.size();
185
    }
186
    bool skipWS(const std::string& in, std::string::size_type& pos) {
187
        if (pos == std::string::npos)
188
            return false;
189
        pos = in.find_first_not_of(" \t\n\r", pos);
190
        return pos != std::string::npos;
191
    }
192
    bool skipStr(const std::string& str) {
193
        if (m_pos == std::string::npos)
194
            return false;
195
        m_pos = m_in.find(str, m_pos);
196
        if (m_pos != std::string::npos)
197
            m_pos += str.size();
198
        return m_pos != std::string::npos;
199
    }
200
    int peek() const {
201
        if (nomore())
202
            return -1;
203
        return m_in[m_pos + 1];
204
    }
205
    void trimtag(std::string& tagname) {
206
        std::string::size_type trimpos = tagname.find_last_not_of(" \t\n\r");
207
        if (trimpos != std::string::npos) {
208
            tagname = tagname.substr(0, trimpos+1);
209
        }
210
    }
211
212
    bool skipDecl() {
213
        for (;;) {
214
            if (!skipWS(m_in, m_pos)) {
215
                m_reason << "EOF during initial ws skip";
216
                return true;
217
            }
218
            if (m_in[m_pos] != '<') {
219
                m_reason << "EOF file does not begin with decl/tag";
220
                return false;
221
            }
222
            if (peek() == '?') {
223
                if (!skipStr("?>")) {
224
                    m_reason << "EOF while looking for end of xml decl";
225
                    return false;
226
                }
227
            } else {
228
                break;
229
            }
230
        }
231
        return true;
232
    }
233
    
234
    bool parseattrs(std::string& tag,
235
                    std::map<std::string, std::string>& attrs) {
236
        //std::cerr << "parseattrs: [" << tag << "]\n";
237
        attrs.clear();
238
        std::string::size_type spos = tag.find_first_of(" \t\n\r");
239
        if (spos == std::string::npos)
240
            return true;
241
        std::string tagname = tag.substr(0, spos);
242
        //std::cerr << "tag name [" << tagname << "] pos " << spos << "\n";
243
        skipWS(tag, spos);
244
245
        for (;;) {
246
            //std::cerr << "top of loop [" << tag.substr(spos) << "]\n";
247
            std::string::size_type epos = tag.find_first_of(" \t\n\r=", spos);
248
            if (epos == std::string::npos) {
249
                m_reason << "Bad attributes syntax at cpos " << m_pos + epos;
250
                return false;
251
            }
252
            std::string attrnm = tag.substr(spos, epos - spos);
253
            if (attrnm.empty()) {
254
                m_reason << "Empty attribute name ?? at cpos " << m_pos + epos;
255
                return false;
256
            }
257
            //std::cerr << "attr name [" << attrnm << "]\n";
258
            skipWS(tag, epos);
259
            if (epos == std::string::npos || epos == tag.size() - 1 ||
260
                tag[epos] != '=') {
261
                m_reason <<"Missing equal sign or value at cpos " << m_pos+epos;
262
                return false;
263
            }
264
            epos++;
265
            skipWS(tag, epos);
266
            if (tag[epos] != '"' || epos == tag.size() - 1) {
267
                m_reason << "Missing dquote or value at cpos " << m_pos+epos;
268
                return false;
269
            }
270
            spos = epos + 1;
271
            epos = tag.find_first_of("\"", spos);
272
            if (epos == std::string::npos) {
273
                m_reason << "Missing closing dquote at cpos " << m_pos+spos;
274
                return false;
275
            }
276
            attrs[attrnm] = tag.substr(spos, epos - spos);
277
            //std::cerr << "attr value [" << attrs[attrnm] << "]\n";
278
            if (epos == tag.size() - 1) {
279
                break;
280
            }
281
            epos++;
282
            skipWS(tag, epos);
283
            if (epos == tag.size() - 1) {
284
                break;
285
            }
286
            spos = epos;
287
        }
288
        tag = tagname;
289
        return true;
290
    }
291
};
292
#endif /* _PICOXML_H_INCLUDED_ */