|
a/src/internfile/myhtmlparse.h |
|
b/src/internfile/myhtmlparse.h |
|
... |
|
... |
20 |
* along with this program; if not, write to the Free Software
|
20 |
* along with this program; if not, write to the Free Software
|
21 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
21 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
22 |
* USA
|
22 |
* USA
|
23 |
* -----END-LICENCE-----
|
23 |
* -----END-LICENCE-----
|
24 |
*/
|
24 |
*/
|
|
|
25 |
#include <map>
|
|
|
26 |
using std::map;
|
25 |
|
27 |
|
26 |
#include "htmlparse.h"
|
28 |
#include "htmlparse.h"
|
27 |
|
29 |
|
28 |
// FIXME: Should we include \xa0 which is non-breaking space in iso-8859-1, but
|
30 |
// FIXME: Should we include \xa0 which is non-breaking space in iso-8859-1, but
|
29 |
// not in all charsets and perhaps spans of all \xa0 should become a single
|
31 |
// not in all charsets and perhaps spans of all \xa0 should become a single
|
|
... |
|
... |
35 |
bool in_script_tag;
|
37 |
bool in_script_tag;
|
36 |
bool in_style_tag;
|
38 |
bool in_style_tag;
|
37 |
bool in_body_tag;
|
39 |
bool in_body_tag;
|
38 |
bool in_pre_tag;
|
40 |
bool in_pre_tag;
|
39 |
bool pending_space;
|
41 |
bool pending_space;
|
40 |
string title, sample, keywords, dump, dmtime, author;
|
42 |
map<string,string> meta;
|
|
|
43 |
string dump, dmtime;
|
41 |
string ocharset; // This is the charset our user thinks the doc was
|
44 |
string ocharset; // This is the charset our user thinks the doc was
|
42 |
string charset; // This is the charset it was supposedly converted to
|
45 |
string charset; // This is the charset it was supposedly converted to
|
43 |
string doccharset; // Set this to value of charset parameter in header
|
46 |
string doccharset; // Set this to value of charset parameter in header
|
44 |
bool indexing_allowed;
|
47 |
bool indexing_allowed;
|
45 |
void process_text(const string &text);
|
48 |
void process_text(const string &text);
|