|
a/src/internfile/mh_html.h |
|
b/src/internfile/mh_html.h |
|
... |
|
... |
14 |
* Free Software Foundation, Inc.,
|
14 |
* Free Software Foundation, Inc.,
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
16 |
*/
|
16 |
*/
|
17 |
#ifndef _HTML_H_INCLUDED_
|
17 |
#ifndef _HTML_H_INCLUDED_
|
18 |
#define _HTML_H_INCLUDED_
|
18 |
#define _HTML_H_INCLUDED_
|
19 |
/* @(#$Id: mh_html.h,v 1.10 2007-05-30 12:31:19 dockes Exp $ (C) 2004 J.F.Dockes */
|
19 |
/* @(#$Id: mh_html.h,v 1.11 2008-10-03 06:17:46 dockes Exp $ (C) 2004 J.F.Dockes */
|
20 |
|
20 |
|
21 |
#include <string>
|
21 |
#include <string>
|
22 |
|
22 |
|
23 |
#include "mimehandler.h"
|
23 |
#include "mimehandler.h"
|
24 |
|
24 |
|
25 |
/**
|
25 |
/**
|
26 |
Translate html document to internal one.
|
26 |
* Convert html to utf-8 text and extract whatever metadata we can find.
|
27 |
*/
|
27 |
*/
|
28 |
class MimeHandlerHtml : public RecollFilter {
|
28 |
class MimeHandlerHtml : public RecollFilter {
|
29 |
public:
|
29 |
public:
|
30 |
MimeHandlerHtml(const string& mt) : RecollFilter(mt) {}
|
30 |
MimeHandlerHtml(const string& mt) : RecollFilter(mt) {}
|
31 |
virtual ~MimeHandlerHtml() {}
|
31 |
virtual ~MimeHandlerHtml() {}
|
32 |
virtual bool set_document_file(const string &file_path);
|
32 |
virtual bool set_document_file(const string &file_path);
|
|
... |
|
... |
35 |
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
35 |
if (input == DOCUMENT_FILE_NAME || input == DOCUMENT_STRING)
|
36 |
return true;
|
36 |
return true;
|
37 |
return false;
|
37 |
return false;
|
38 |
}
|
38 |
}
|
39 |
virtual bool next_document();
|
39 |
virtual bool next_document();
|
|
|
40 |
const string& get_html()
|
|
|
41 |
{
|
|
|
42 |
return m_html;
|
|
|
43 |
}
|
|
|
44 |
|
40 |
private:
|
45 |
private:
|
41 |
string m_filename;
|
46 |
string m_filename;
|
42 |
string m_html;
|
47 |
string m_html;
|
43 |
};
|
48 |
};
|
44 |
|
49 |
|