|
a/src/internfile/mh_html.cpp |
|
b/src/internfile/mh_html.cpp |
|
... |
|
... |
71 |
m_havedoc = false;
|
71 |
m_havedoc = false;
|
72 |
// If set_doc(fn), take note of file name.
|
72 |
// If set_doc(fn), take note of file name.
|
73 |
string fn = m_filename;
|
73 |
string fn = m_filename;
|
74 |
m_filename.erase();
|
74 |
m_filename.erase();
|
75 |
|
75 |
|
76 |
string charset = m_defcharset;
|
76 |
string charset = m_dfltInputCharset;
|
77 |
LOGDEB(("textHtmlToDoc: next_document. defcharset before parsing: [%s]\n",
|
77 |
LOGDEB(("MHHtml::next_doc.: default supposed input charset: [%s]\n",
|
78 |
charset.c_str()));
|
78 |
charset.c_str()));
|
|
|
79 |
// Override default input charset if someone took care to set one:
|
|
|
80 |
map<string,string>::const_iterator it = m_metaData.find("charset");
|
|
|
81 |
if (it != m_metaData.end() && !it->second.empty()) {
|
|
|
82 |
charset = it->second;
|
|
|
83 |
LOGDEB(("MHHtml: next_doc.: input charset from metadata: [%s]\n",
|
|
|
84 |
charset.c_str()));
|
|
|
85 |
}
|
79 |
|
86 |
|
80 |
// - We first try to convert from the default configured charset
|
87 |
// - We first try to convert from the supposed charset
|
81 |
// (which may depend of the current directory) to utf-8. If this
|
88 |
// (which may depend of the current directory) to utf-8. If this
|
82 |
// fails, we keep the original text
|
89 |
// fails, we keep the original text
|
83 |
// - During parsing, if we find a charset parameter, and it differs from
|
90 |
// - During parsing, if we find a charset parameter, and it differs from
|
84 |
// what we started with, we abort and restart with the parameter value
|
91 |
// what we started with, we abort and restart with the parameter value
|
85 |
// instead of the configuration one.
|
92 |
// instead of the configuration one.
|