|
a/src/internfile/mh_html.h |
|
b/src/internfile/mh_html.h |
|
... |
|
... |
25 |
/**
|
25 |
/**
|
26 |
* Convert html to utf-8 text and extract whatever metadata we can find.
|
26 |
* Convert html to utf-8 text and extract whatever metadata we can find.
|
27 |
*/
|
27 |
*/
|
28 |
class MimeHandlerHtml : public RecollFilter {
|
28 |
class MimeHandlerHtml : public RecollFilter {
|
29 |
public:
|
29 |
public:
|
30 |
MimeHandlerHtml(const string& mt) : RecollFilter(mt) {}
|
30 |
MimeHandlerHtml(RclConfig *cnf, const string& mt)
|
|
|
31 |
: RecollFilter(cnf, mt) {}
|
31 |
virtual ~MimeHandlerHtml() {}
|
32 |
virtual ~MimeHandlerHtml() {}
|
32 |
virtual bool set_document_file(const string &file_path);
|
33 |
virtual bool set_document_file(const string &file_path);
|
33 |
virtual bool set_document_string(const string &data);
|
34 |
virtual bool set_document_string(const string &data);
|
34 |
virtual bool is_data_input_ok(DataInput input) const {
|
35 |
virtual bool is_data_input_ok(DataInput input) const {
|
35 |
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
36 |
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|