|
a/src/internfile/mh_html.h |
|
b/src/internfile/mh_html.h |
|
... |
|
... |
24 |
/**
|
24 |
/**
|
25 |
* Convert html to utf-8 text and extract whatever metadata we can find.
|
25 |
* Convert html to utf-8 text and extract whatever metadata we can find.
|
26 |
*/
|
26 |
*/
|
27 |
class MimeHandlerHtml : public RecollFilter {
|
27 |
class MimeHandlerHtml : public RecollFilter {
|
28 |
public:
|
28 |
public:
|
29 |
MimeHandlerHtml(RclConfig *cnf, const string& mt)
|
29 |
MimeHandlerHtml(RclConfig *cnf, const string& id)
|
30 |
: RecollFilter(cnf, mt) {}
|
30 |
: RecollFilter(cnf, id)
|
|
|
31 |
{
|
|
|
32 |
}
|
31 |
virtual ~MimeHandlerHtml() {}
|
33 |
virtual ~MimeHandlerHtml()
|
|
|
34 |
{
|
|
|
35 |
}
|
32 |
virtual bool set_document_file(const string &file_path);
|
36 |
virtual bool set_document_file(const string& mt, const string &file_path);
|
33 |
virtual bool set_document_string(const string &data);
|
37 |
virtual bool set_document_string(const string& mt, const string &data);
|
34 |
virtual bool is_data_input_ok(DataInput input) const {
|
38 |
virtual bool is_data_input_ok(DataInput input) const {
|
35 |
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
39 |
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
36 |
return true;
|
40 |
return true;
|
37 |
return false;
|
41 |
return false;
|
38 |
}
|
42 |
}
|