|
a/src/internfile/htmlparse.cpp |
|
b/src/internfile/htmlparse.cpp |
|
... |
|
... |
21 |
* USA
|
21 |
* USA
|
22 |
* -----END-LICENCE-----
|
22 |
* -----END-LICENCE-----
|
23 |
*/
|
23 |
*/
|
24 |
|
24 |
|
25 |
#ifndef lint
|
25 |
#ifndef lint
|
26 |
static char rcsid[] = "@(#$Id: htmlparse.cpp,v 1.3 2005-11-24 07:16:15 dockes Exp $ ";
|
26 |
static char rcsid[] = "@(#$Id: htmlparse.cpp,v 1.4 2005-12-08 08:44:14 dockes Exp $ ";
|
27 |
#endif
|
27 |
#endif
|
28 |
|
28 |
|
29 |
//#include <config.h>
|
29 |
//#include <config.h>
|
30 |
|
30 |
|
31 |
#include <algorithm>
|
31 |
#include <algorithm>
|
|
... |
|
... |
271 |
{
|
271 |
{
|
272 |
map<string,string> Param;
|
272 |
map<string,string> Param;
|
273 |
string::const_iterator start = body.begin();
|
273 |
string::const_iterator start = body.begin();
|
274 |
|
274 |
|
275 |
while (1) {
|
275 |
while (1) {
|
276 |
// Skip through until we find an HTML tag, a comment, or the end of
|
|
|
277 |
// document. Ignore isolated occurences of `<' which don't start
|
|
|
278 |
// a tag or comment
|
|
|
279 |
string::const_iterator p = start;
|
276 |
string::const_iterator p = start;
|
|
|
277 |
|
|
|
278 |
// Eat text until we find an HTML tag, a comment, or the end
|
|
|
279 |
// of document. Ignore isolated occurences of `<' which don't
|
|
|
280 |
// start a tag or comment
|
280 |
while (1) {
|
281 |
while (1) {
|
281 |
p = find(p, body.end(), '<');
|
282 |
p = find(p, body.end(), '<');
|
282 |
if (p == body.end()) break;
|
283 |
if (p == body.end()) break;
|
283 |
char ch = *(p + 1);
|
284 |
char ch = *(p + 1);
|
284 |
// tag, closing tag, comment (or SGML declaration), or PHP
|
285 |
// tag, closing tag, comment (or SGML declaration), or PHP
|
285 |
if (isalpha(ch) || ch == '/' || ch == '!' || ch == '?') break;
|
286 |
if (isalpha(ch) || ch == '/' || ch == '!' || ch == '?') break;
|
286 |
p++;
|
287 |
p++;
|
287 |
}
|
288 |
}
|
288 |
|
289 |
|
289 |
|
290 |
// Process text
|
290 |
// process text up to start of tag
|
291 |
if (p > start || p == body.end()) {
|
291 |
if (p > start) {
|
|
|
292 |
string text = body.substr(start - body.begin(), p - start);
|
292 |
string text = body.substr(start - body.begin(), p - start);
|
293 |
decode_entities(text);
|
293 |
decode_entities(text);
|
294 |
process_text(text);
|
294 |
process_text(text);
|
295 |
}
|
295 |
}
|
296 |
|
296 |
|
297 |
if (p == body.end()) break;
|
297 |
if (p == body.end()) {
|
|
|
298 |
do_eof();
|
|
|
299 |
break;
|
|
|
300 |
}
|
298 |
|
301 |
|
299 |
start = p + 1;
|
302 |
start = p + 1;
|
300 |
|
303 |
|
301 |
if (start == body.end()) break;
|
304 |
if (start == body.end()) break;
|
302 |
|
305 |
|