|
a/src/internfile/mimehandler.cpp |
|
b/src/internfile/mimehandler.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.8 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.9 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
|
4 |
|
5 |
#include <iostream>
|
5 |
#include <iostream>
|
6 |
#include <string>
|
6 |
#include <string>
|
7 |
using namespace std;
|
7 |
using namespace std;
|
|
... |
|
... |
11 |
#include "csguess.h"
|
11 |
#include "csguess.h"
|
12 |
#include "transcode.h"
|
12 |
#include "transcode.h"
|
13 |
#include "debuglog.h"
|
13 |
#include "debuglog.h"
|
14 |
#include "smallut.h"
|
14 |
#include "smallut.h"
|
15 |
#include "html.h"
|
15 |
#include "html.h"
|
|
|
16 |
#include "mail.h"
|
16 |
#include "execmd.h"
|
17 |
#include "execmd.h"
|
17 |
#include "pathut.h"
|
18 |
#include "pathut.h"
|
18 |
|
19 |
|
19 |
class MimeHandlerText : public MimeHandler {
|
20 |
class MimeHandlerText : public MimeHandler {
|
20 |
public:
|
21 |
public:
|
21 |
bool worker(RclConfig *conf, const string &fn,
|
22 |
MimeHandler::Status worker(RclConfig *conf, const string &fn,
|
22 |
const string &mtype, Rcl::Doc &docout);
|
23 |
const string &mtype, Rcl::Doc &docout, string&);
|
23 |
|
24 |
|
24 |
};
|
25 |
};
|
25 |
|
26 |
|
26 |
// Process a plain text file
|
27 |
// Process a plain text file
|
27 |
bool MimeHandlerText::worker(RclConfig *conf, const string &fn,
|
28 |
MimeHandler::Status MimeHandlerText::worker(RclConfig *conf, const string &fn,
|
28 |
const string &mtype, Rcl::Doc &docout)
|
29 |
const string &mtype, Rcl::Doc &docout, string&)
|
29 |
{
|
30 |
{
|
30 |
string otext;
|
31 |
string otext;
|
31 |
if (!file_to_string(fn, otext))
|
32 |
if (!file_to_string(fn, otext))
|
32 |
return false;
|
33 |
return MimeHandler::MHError;
|
33 |
|
34 |
|
34 |
// Try to guess charset, then convert to utf-8, and fill document
|
35 |
// Try to guess charset, then convert to utf-8, and fill document
|
35 |
// fields The charset guesser really doesnt work well in general
|
36 |
// fields The charset guesser really doesnt work well in general
|
36 |
// and should be avoided (especially for short documents)
|
37 |
// and should be avoided (especially for short documents)
|
37 |
string charset;
|
38 |
string charset;
|
|
... |
|
... |
44 |
|
45 |
|
45 |
if (!transcode(otext, utf8, charset, "UTF-8")) {
|
46 |
if (!transcode(otext, utf8, charset, "UTF-8")) {
|
46 |
cerr << "textPlainToDoc: transcode failed: charset '" << charset
|
47 |
cerr << "textPlainToDoc: transcode failed: charset '" << charset
|
47 |
<< "' to UTF-8: "<< utf8 << endl;
|
48 |
<< "' to UTF-8: "<< utf8 << endl;
|
48 |
otext.erase();
|
49 |
otext.erase();
|
49 |
return 0;
|
50 |
return MimeHandler::MHError;
|
50 |
}
|
51 |
}
|
51 |
|
52 |
|
52 |
Rcl::Doc out;
|
53 |
Rcl::Doc out;
|
53 |
out.origcharset = charset;
|
54 |
out.origcharset = charset;
|
54 |
out.text = utf8;
|
55 |
out.text = utf8;
|
55 |
docout = out;
|
56 |
docout = out;
|
56 |
return true;
|
57 |
return MimeHandler::MHDone;
|
57 |
}
|
58 |
}
|
58 |
|
59 |
|
59 |
class MimeHandlerExec : public MimeHandler {
|
60 |
class MimeHandlerExec : public MimeHandler {
|
60 |
public:
|
61 |
public:
|
61 |
list<string> params;
|
62 |
list<string> params;
|
62 |
virtual ~MimeHandlerExec() {}
|
63 |
virtual ~MimeHandlerExec() {}
|
63 |
virtual bool worker(RclConfig *conf, const string &fn,
|
64 |
virtual MimeHandler::Status worker(RclConfig *conf, const string &fn,
|
64 |
const string &mtype, Rcl::Doc &docout);
|
65 |
const string &mtype, Rcl::Doc &docout,
|
|
|
66 |
string&);
|
65 |
|
67 |
|
66 |
};
|
68 |
};
|
67 |
|
69 |
|
68 |
|
70 |
|
69 |
// Execute an external program to translate a file from its native format
|
71 |
// Execute an external program to translate a file from its native format
|
70 |
// to html. Then call the html parser to do the actual indexing
|
72 |
// to html. Then call the html parser to do the actual indexing
|
|
|
73 |
MimeHandler::Status
|
71 |
bool MimeHandlerExec::worker(RclConfig *conf, const string &fn,
|
74 |
MimeHandlerExec::worker(RclConfig *conf, const string &fn,
|
72 |
const string &mtype, Rcl::Doc &docout)
|
75 |
const string &mtype, Rcl::Doc &docout, string&)
|
73 |
{
|
76 |
{
|
74 |
if (params.empty()) {
|
77 |
if (params.empty()) {
|
75 |
// Hu ho
|
78 |
// Hu ho
|
76 |
LOGERR(("MimeHandlerExec::worker: empty params for mime %s\n",
|
79 |
LOGERR(("MimeHandlerExec::worker: empty params for mime %s\n",
|
77 |
mtype.c_str()));
|
80 |
mtype.c_str()));
|
78 |
return false;
|
81 |
return MimeHandler::MHError;
|
79 |
}
|
82 |
}
|
80 |
// Command name
|
83 |
// Command name
|
81 |
string cmd = find_filter(conf, params.front());
|
84 |
string cmd = find_filter(conf, params.front());
|
82 |
|
85 |
|
83 |
// Build parameter list: delete cmd name and add the file name
|
86 |
// Build parameter list: delete cmd name and add the file name
|
|
... |
|
... |
90 |
ExecCmd exec;
|
93 |
ExecCmd exec;
|
91 |
int status = exec.doexec(cmd, myparams, 0, &html);
|
94 |
int status = exec.doexec(cmd, myparams, 0, &html);
|
92 |
if (status) {
|
95 |
if (status) {
|
93 |
LOGERR(("MimeHandlerExec: command status 0x%x: %s\n",
|
96 |
LOGERR(("MimeHandlerExec: command status 0x%x: %s\n",
|
94 |
status, cmd.c_str()));
|
97 |
status, cmd.c_str()));
|
95 |
return false;
|
98 |
return MimeHandler::MHError;
|
96 |
}
|
99 |
}
|
97 |
|
100 |
|
98 |
// Process/index the html
|
101 |
// Process/index the html
|
99 |
MimeHandlerHtml hh;
|
102 |
MimeHandlerHtml hh;
|
100 |
return hh.worker1(conf, fn, html, mtype, docout);
|
103 |
return hh.worker1(conf, fn, html, mtype, docout);
|
|
... |
|
... |
104 |
{
|
107 |
{
|
105 |
if (!stringlowercmp("text/plain", mime))
|
108 |
if (!stringlowercmp("text/plain", mime))
|
106 |
return new MimeHandlerText;
|
109 |
return new MimeHandlerText;
|
107 |
else if (!stringlowercmp("text/html", mime))
|
110 |
else if (!stringlowercmp("text/html", mime))
|
108 |
return new MimeHandlerHtml;
|
111 |
return new MimeHandlerHtml;
|
|
|
112 |
else if (!stringlowercmp("text/x-mail", mime))
|
|
|
113 |
return new MimeHandlerMail;
|
|
|
114 |
else if (!stringlowercmp("message/rfc822", mime))
|
|
|
115 |
return new MimeHandlerMail;
|
109 |
return 0;
|
116 |
return 0;
|
110 |
}
|
117 |
}
|
111 |
|
118 |
|
112 |
/**
|
119 |
/**
|
113 |
* Return handler function for given mime type
|
120 |
* Return handler function for given mime type
|
|
... |
|
... |
115 |
MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
|
122 |
MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
|
116 |
{
|
123 |
{
|
117 |
// Return handler definition for mime type
|
124 |
// Return handler definition for mime type
|
118 |
string hs;
|
125 |
string hs;
|
119 |
if (!mhandlers->get(mtype, hs, "index")) {
|
126 |
if (!mhandlers->get(mtype, hs, "index")) {
|
120 |
LOGDEB(("getMimeHandler: no handler for %s\n", mtype.c_str()));
|
127 |
LOGDEB(("getMimeHandler: no handler for '%s'\n", mtype.c_str()));
|
121 |
return 0;
|
128 |
return 0;
|
122 |
}
|
129 |
}
|
123 |
|
130 |
|
124 |
// Break definition into type and name
|
131 |
// Break definition into type and name
|
125 |
list<string> toks;
|
132 |
list<string> toks;
|