Parent: [1f8fbc] (diff)

Child: [d392d3] (diff)

Download this file

mimehandler.cpp    186 lines (163 with data), 4.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#ifndef lint
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.8 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <iostream>
#include <string>
using namespace std;
#include "mimehandler.h"
#include "readfile.h"
#include "csguess.h"
#include "transcode.h"
#include "debuglog.h"
#include "smallut.h"
#include "html.h"
#include "execmd.h"
#include "pathut.h"
class MimeHandlerText : public MimeHandler {
public:
bool worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout);
};
// Process a plain text file
bool MimeHandlerText::worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout)
{
string otext;
if (!file_to_string(fn, otext))
return false;
// Try to guess charset, then convert to utf-8, and fill document
// fields The charset guesser really doesnt work well in general
// and should be avoided (especially for short documents)
string charset;
if (conf->getGuessCharset()) {
charset = csguess(otext, conf->getDefCharset());
} else
charset = conf->getDefCharset();
string utf8;
LOGDEB1(("textPlainToDoc: transcod from %s to %s\n", charset, "UTF-8"));
if (!transcode(otext, utf8, charset, "UTF-8")) {
cerr << "textPlainToDoc: transcode failed: charset '" << charset
<< "' to UTF-8: "<< utf8 << endl;
otext.erase();
return 0;
}
Rcl::Doc out;
out.origcharset = charset;
out.text = utf8;
docout = out;
return true;
}
class MimeHandlerExec : public MimeHandler {
public:
list<string> params;
virtual ~MimeHandlerExec() {}
virtual bool worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout);
};
// Execute an external program to translate a file from its native format
// to html. Then call the html parser to do the actual indexing
bool MimeHandlerExec::worker(RclConfig *conf, const string &fn,
const string &mtype, Rcl::Doc &docout)
{
if (params.empty()) {
// Hu ho
LOGERR(("MimeHandlerExec::worker: empty params for mime %s\n",
mtype.c_str()));
return false;
}
// Command name
string cmd = find_filter(conf, params.front());
// Build parameter list: delete cmd name and add the file name
list<string>::iterator it = params.begin();
list<string>myparams(++it, params.end());
myparams.push_back(fn);
// Execute command and store the result text, which is supposedly html
string html;
ExecCmd exec;
int status = exec.doexec(cmd, myparams, 0, &html);
if (status) {
LOGERR(("MimeHandlerExec: command status 0x%x: %s\n",
status, cmd.c_str()));
return false;
}
// Process/index the html
MimeHandlerHtml hh;
return hh.worker1(conf, fn, html, mtype, docout);
}
static MimeHandler *mhfact(const string &mime)
{
if (!stringlowercmp("text/plain", mime))
return new MimeHandlerText;
else if (!stringlowercmp("text/html", mime))
return new MimeHandlerHtml;
return 0;
}
/**
* Return handler function for given mime type
*/
MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
{
// Return handler definition for mime type
string hs;
if (!mhandlers->get(mtype, hs, "index")) {
LOGDEB(("getMimeHandler: no handler for %s\n", mtype.c_str()));
return 0;
}
// Break definition into type and name
list<string> toks;
ConfTree::stringToStrings(hs, toks);
if (toks.empty()) {
LOGERR(("getMimeHandler: bad mimeconf line for %s\n", mtype.c_str()));
return 0;
}
// Retrieve handler function according to type
if (!stringlowercmp("internal", toks.front())) {
return mhfact(mtype);
} else if (!stringlowercmp("dll", toks.front())) {
return 0;
} else if (!stringlowercmp("exec", toks.front())) {
if (toks.size() < 2) {
LOGERR(("getMimeHandler: bad line for %s: %s\n", mtype.c_str(),
hs.c_str()));
return 0;
}
MimeHandlerExec *h = new MimeHandlerExec;
list<string>::const_iterator it1 = toks.begin();
it1++;
for (;it1 != toks.end();it1++)
h->params.push_back(*it1);
return h;
}
return 0;
}
/**
* Return external viewer exec string for given mime type
*/
string getMimeViewer(const std::string &mtype, ConfTree *mhandlers)
{
string hs;
mhandlers->get(mtype, hs, "view");
return hs;
}
/**
* Return decompression command line for given mime type
*/
bool getUncompressor(const std::string &mtype, ConfTree *mhandlers,
list<string>& cmd)
{
string hs;
mhandlers->get(mtype, hs, "");
if (hs.empty())
return false;
list<string> tokens;
ConfTree::stringToStrings(hs, tokens);
if (tokens.empty()) {
LOGERR(("getUncompressor: empty spec for mtype %s\n", mtype.c_str()));
return false;
}
if (stringlowercmp("uncompress", tokens.front()))
return false;
list<string>::iterator it = tokens.begin();
cmd.assign(++it, tokens.end());
return true;
}