Switch to unified view

a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.8 2005-02-04 14:21:17 dockes Exp $ (C) 2004 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: mimehandler.cpp,v 1.9 2005-03-25 09:40:27 dockes Exp $ (C) 2004 J.F.Dockes";
3
#endif
3
#endif
4
4
5
#include <iostream>
5
#include <iostream>
6
#include <string>
6
#include <string>
7
using namespace std;
7
using namespace std;
...
...
11
#include "csguess.h"
11
#include "csguess.h"
12
#include "transcode.h"
12
#include "transcode.h"
13
#include "debuglog.h"
13
#include "debuglog.h"
14
#include "smallut.h"
14
#include "smallut.h"
15
#include "html.h"
15
#include "html.h"
16
#include "mail.h"
16
#include "execmd.h"
17
#include "execmd.h"
17
#include "pathut.h"
18
#include "pathut.h"
18
19
19
class MimeHandlerText : public MimeHandler {
20
class MimeHandlerText : public MimeHandler {
20
 public:
21
 public:
21
    bool worker(RclConfig *conf, const string &fn, 
22
    MimeHandler::Status worker(RclConfig *conf, const string &fn, 
22
        const string &mtype, Rcl::Doc &docout);
23
        const string &mtype, Rcl::Doc &docout, string&);
23
    
24
    
24
};
25
};
25
26
26
// Process a plain text file
27
// Process a plain text file
27
bool MimeHandlerText::worker(RclConfig *conf, const string &fn, 
28
MimeHandler::Status MimeHandlerText::worker(RclConfig *conf, const string &fn, 
28
                 const string &mtype, Rcl::Doc &docout)
29
                 const string &mtype, Rcl::Doc &docout, string&)
29
{
30
{
30
    string otext;
31
    string otext;
31
    if (!file_to_string(fn, otext))
32
    if (!file_to_string(fn, otext))
32
  return false;
33
  return MimeHandler::MHError;
33
    
34
    
34
    // Try to guess charset, then convert to utf-8, and fill document
35
    // Try to guess charset, then convert to utf-8, and fill document
35
    // fields The charset guesser really doesnt work well in general
36
    // fields The charset guesser really doesnt work well in general
36
    // and should be avoided (especially for short documents)
37
    // and should be avoided (especially for short documents)
37
    string charset;
38
    string charset;
...
...
44
45
45
    if (!transcode(otext, utf8, charset, "UTF-8")) {
46
    if (!transcode(otext, utf8, charset, "UTF-8")) {
46
    cerr << "textPlainToDoc: transcode failed: charset '" << charset
47
    cerr << "textPlainToDoc: transcode failed: charset '" << charset
47
         << "' to UTF-8: "<< utf8 << endl;
48
         << "' to UTF-8: "<< utf8 << endl;
48
    otext.erase();
49
    otext.erase();
49
  return 0;
50
  return MimeHandler::MHError;
50
    }
51
    }
51
52
52
    Rcl::Doc out;
53
    Rcl::Doc out;
53
    out.origcharset = charset;
54
    out.origcharset = charset;
54
    out.text = utf8;
55
    out.text = utf8;
55
    docout = out;
56
    docout = out;
56
    return true;
57
    return MimeHandler::MHDone;
57
}
58
}
58
59
59
class MimeHandlerExec : public MimeHandler {
60
class MimeHandlerExec : public MimeHandler {
60
 public:
61
 public:
61
    list<string> params;
62
    list<string> params;
62
    virtual ~MimeHandlerExec() {}
63
    virtual ~MimeHandlerExec() {}
63
    virtual bool worker(RclConfig *conf, const string &fn, 
64
    virtual MimeHandler::Status worker(RclConfig *conf, const string &fn, 
64
            const string &mtype, Rcl::Doc &docout);
65
                     const string &mtype, Rcl::Doc &docout, 
66
                     string&);
65
67
66
};
68
};
67
69
68
    
70
    
69
// Execute an external program to translate a file from its native format
71
// Execute an external program to translate a file from its native format
70
// to html. Then call the html parser to do the actual indexing
72
// to html. Then call the html parser to do the actual indexing
73
MimeHandler::Status 
71
bool MimeHandlerExec::worker(RclConfig *conf, const string &fn, 
74
MimeHandlerExec::worker(RclConfig *conf, const string &fn, 
72
                 const string &mtype, Rcl::Doc &docout)
75
            const string &mtype, Rcl::Doc &docout, string&)
73
{
76
{
74
    if (params.empty()) {
77
    if (params.empty()) {
75
    // Hu ho
78
    // Hu ho
76
    LOGERR(("MimeHandlerExec::worker: empty params for mime %s\n",
79
    LOGERR(("MimeHandlerExec::worker: empty params for mime %s\n",
77
        mtype.c_str()));
80
        mtype.c_str()));
78
  return false;
81
  return MimeHandler::MHError;
79
    }
82
    }
80
    // Command name
83
    // Command name
81
    string cmd = find_filter(conf, params.front());
84
    string cmd = find_filter(conf, params.front());
82
    
85
    
83
    // Build parameter list: delete cmd name and add the file name
86
    // Build parameter list: delete cmd name and add the file name
...
...
90
    ExecCmd exec;
93
    ExecCmd exec;
91
    int status = exec.doexec(cmd, myparams, 0, &html);
94
    int status = exec.doexec(cmd, myparams, 0, &html);
92
    if (status) {
95
    if (status) {
93
    LOGERR(("MimeHandlerExec: command status 0x%x: %s\n", 
96
    LOGERR(("MimeHandlerExec: command status 0x%x: %s\n", 
94
        status, cmd.c_str()));
97
        status, cmd.c_str()));
95
  return false;
98
  return MimeHandler::MHError;
96
    }
99
    }
97
100
98
    // Process/index  the html
101
    // Process/index  the html
99
    MimeHandlerHtml hh;
102
    MimeHandlerHtml hh;
100
    return hh.worker1(conf, fn, html, mtype, docout);
103
    return hh.worker1(conf, fn, html, mtype, docout);
...
...
104
{
107
{
105
    if (!stringlowercmp("text/plain", mime))
108
    if (!stringlowercmp("text/plain", mime))
106
    return new MimeHandlerText;
109
    return new MimeHandlerText;
107
    else if (!stringlowercmp("text/html", mime))
110
    else if (!stringlowercmp("text/html", mime))
108
    return new MimeHandlerHtml;
111
    return new MimeHandlerHtml;
112
    else if (!stringlowercmp("text/x-mail", mime))
113
  return new MimeHandlerMail;
114
    else if (!stringlowercmp("message/rfc822", mime))
115
  return new MimeHandlerMail;
109
    return 0;
116
    return 0;
110
}
117
}
111
118
112
/**
119
/**
113
 * Return handler function for given mime type
120
 * Return handler function for given mime type
...
...
115
MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
122
MimeHandler *getMimeHandler(const std::string &mtype, ConfTree *mhandlers)
116
{
123
{
117
    // Return handler definition for mime type
124
    // Return handler definition for mime type
118
    string hs;
125
    string hs;
119
    if (!mhandlers->get(mtype, hs, "index")) {
126
    if (!mhandlers->get(mtype, hs, "index")) {
120
    LOGDEB(("getMimeHandler: no handler for %s\n", mtype.c_str()));
127
    LOGDEB(("getMimeHandler: no handler for '%s'\n", mtype.c_str()));
121
    return 0;
128
    return 0;
122
    }
129
    }
123
130
124
    // Break definition into type and name 
131
    // Break definition into type and name 
125
    list<string> toks;
132
    list<string> toks;