|
a/src/internfile/myhtmlparse.h |
|
b/src/internfile/myhtmlparse.h |
|
... |
|
... |
40 |
bool in_pre_tag;
|
40 |
bool in_pre_tag;
|
41 |
bool pending_space;
|
41 |
bool pending_space;
|
42 |
map<string,string> meta;
|
42 |
map<string,string> meta;
|
43 |
static map<string, string> my_named_ents;
|
43 |
static map<string, string> my_named_ents;
|
44 |
string dump, dmtime;
|
44 |
string dump, dmtime;
|
45 |
string ocharset; // This is the charset our user thinks the doc was
|
45 |
// This is the charset our caller thinks the doc used (initially
|
46 |
// charset is declared by HtmlParser
|
46 |
// comes from the environment/configuration, used as source for
|
47 |
//string charset; // This is the charset it was supposedly converted to
|
47 |
// conversion to utf-8)
|
48 |
string doccharset; // Set this to value of charset parameter in header
|
48 |
string fromcharset;
|
|
|
49 |
// This is the charset it was supposedly converted to (always
|
|
|
50 |
// utf-8 in fact, except if conversion utterly failed)
|
|
|
51 |
string tocharset;
|
|
|
52 |
// charset is declared by HtmlParser. It is the charset from the
|
|
|
53 |
// document: default, then from html or xml header.
|
|
|
54 |
// string charset;
|
|
|
55 |
|
49 |
bool indexing_allowed;
|
56 |
bool indexing_allowed;
|
|
|
57 |
|
50 |
void process_text(const string &text);
|
58 |
void process_text(const string &text);
|
51 |
void opening_tag(const string &tag, const map<string,string> &p);
|
59 |
void opening_tag(const string &tag, const map<string,string> &p);
|
52 |
void closing_tag(const string &tag);
|
60 |
void closing_tag(const string &tag);
|
53 |
void do_eof();
|
61 |
void do_eof();
|
54 |
void decode_entities(string &s);
|
62 |
void decode_entities(string &s);
|
|
|
63 |
void reset_charsets() {fromcharset = tocharset = "";}
|
|
|
64 |
void set_charsets(const string& f, const string& t)
|
|
|
65 |
{
|
|
|
66 |
fromcharset = f;
|
|
|
67 |
tocharset = t;
|
|
|
68 |
}
|
|
|
69 |
// Return charset as determined from html
|
|
|
70 |
const string& get_charset() {return charset;}
|
|
|
71 |
|
55 |
MyHtmlParser();
|
72 |
MyHtmlParser();
|
56 |
};
|
73 |
};
|