Switch to unified view

a/src/internfile/myhtmlparse.cpp b/src/internfile/myhtmlparse.cpp
...
...
32
#include "mimeparse.h"
32
#include "mimeparse.h"
33
#include "smallut.h"
33
#include "smallut.h"
34
#include "cancelcheck.h"
34
#include "cancelcheck.h"
35
#include "debuglog.h"
35
#include "debuglog.h"
36
#include "transcode.h"
36
#include "transcode.h"
37
38
map<string, string> MyHtmlParser::my_named_ents;
39
37
40
inline static bool
38
inline static bool
41
p_notdigit(char c)
39
p_notdigit(char c)
42
{
40
{
43
    return !isdigit(static_cast<unsigned char>(c));
41
    return !isdigit(static_cast<unsigned char>(c));
...
...
149
    "rdquo", "\xe2\x80\x9d", "bdquo", "\xe2\x80\x9e", "dagger", "\xe2\x80\xa0",
147
    "rdquo", "\xe2\x80\x9d", "bdquo", "\xe2\x80\x9e", "dagger", "\xe2\x80\xa0",
150
    "Dagger", "\xe2\x80\xa1", "permil", "\xe2\x80\xb0", "lsaquo", "\xe2\x80\xb9",
148
    "Dagger", "\xe2\x80\xa1", "permil", "\xe2\x80\xb0", "lsaquo", "\xe2\x80\xb9",
151
    "rsaquo", "\xe2\x80\xba", "euro", "\xe2\x82\xac",
149
    "rsaquo", "\xe2\x80\xba", "euro", "\xe2\x82\xac",
152
    NULL, NULL
150
    NULL, NULL
153
};
151
};
152
map<string, string> my_named_ents;
153
class NamedEntsInitializer {
154
public:
155
    NamedEntsInitializer()
156
    {
157
  for (int i = 0;;) {
158
      const char *ent;
159
      const char *val;
160
      ent = epairs[i++];
161
      if (ent == 0) 
162
      break;
163
      val = epairs[i++];
164
      if (val == 0) 
165
      break;
166
      my_named_ents[string(ent)] = val;
167
  }
168
    }
169
};
170
static NamedEntsInitializer namedEntsInitializerInstance;
154
171
155
MyHtmlParser::MyHtmlParser()
172
MyHtmlParser::MyHtmlParser()
156
    : in_script_tag(false),
173
    : in_script_tag(false),
157
      in_style_tag(false),
174
      in_style_tag(false),
158
      in_body_tag(false),
175
      in_body_tag(false),
...
...
161
      indexing_allowed(true)
178
      indexing_allowed(true)
162
{
179
{
163
    // The default html document charset is iso-8859-1. We'll update
180
    // The default html document charset is iso-8859-1. We'll update
164
    // this value from the encoding tag if found.
181
    // this value from the encoding tag if found.
165
    charset = "iso-8859-1";
182
    charset = "iso-8859-1";
166
167
    if (my_named_ents.empty()) {
168
  for (int i = 0;;) {
169
      const char *ent;
170
      const char *val;
171
      ent = epairs[i++];
172
      if (ent == 0) 
173
      break;
174
      val = epairs[i++];
175
      if (val == 0) 
176
      break;
177
      my_named_ents[string(ent)] = val;
178
  }
179
    }
180
}
183
}
181
184
182
void MyHtmlParser::decode_entities(string &s)
185
void MyHtmlParser::decode_entities(string &s)
183
{
186
{
184
    LOGDEB2(("MyHtmlParser::decode_entities\n"));
187
    LOGDEB2(("MyHtmlParser::decode_entities\n"));