Switch to unified view

a/src/internfile/mh_execm.cpp b/src/internfile/mh_execm.cpp
...
...
26
#include "debuglog.h"
26
#include "debuglog.h"
27
#include "cancelcheck.h"
27
#include "cancelcheck.h"
28
#include "smallut.h"
28
#include "smallut.h"
29
#include "transcode.h"
29
#include "transcode.h"
30
#include "md5.h"
30
#include "md5.h"
31
#include "rclconfig.h"
32
#include "mimetype.h"
33
#include "idfile.h"
31
34
32
#include <sys/types.h>
35
#include <sys/types.h>
33
#include <sys/wait.h>
36
#include <sys/wait.h>
34
37
35
#ifndef NO_NAMESPACES
38
#ifndef NO_NAMESPACES
...
...
37
#endif /* NO_NAMESPACES */
40
#endif /* NO_NAMESPACES */
38
41
39
bool MimeHandlerExecMultiple::startCmd()
42
bool MimeHandlerExecMultiple::startCmd()
40
{
43
{
41
    LOGDEB(("MimeHandlerExecMultiple::startCmd\n"));
44
    LOGDEB(("MimeHandlerExecMultiple::startCmd\n"));
45
    if (params.empty()) {
46
  // Hu ho
47
  LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
48
  m_reason = "RECFILTERROR BADCONFIG";
49
  return false;
50
    }
51
42
    // Command name
52
    // Command name
43
    string cmd = params.front();
53
    string cmd = params.front();
44
    
54
    
45
    // Build parameter list: delete cmd name
55
    // Build parameter list: delete cmd name
46
    list<string>::iterator it = params.begin();
56
    list<string>::iterator it = params.begin();
...
...
54
        return false;
64
        return false;
55
    }
65
    }
56
    return true;
66
    return true;
57
}
67
}
58
68
69
// Note: data is not used if this is the "document:" field: it goes
70
// directly to m_metaData["content"] to avoid an extra copy
71
// 
72
// Messages are made of data elements. Each element is like:
73
// name: len\ndata
74
// An empty line signals the end of the message, so the whole thing
75
// would look like:
76
// Name1: Len1\nData1Name2: Len2\nData2\n
59
bool MimeHandlerExecMultiple::readDataElement(string& name)
77
bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
60
{
78
{
61
    string ibuf;
79
    string ibuf;
80
81
    // Read name and length
62
    if (m_cmd.getline(ibuf) <= 0) {
82
    if (m_cmd.getline(ibuf) <= 0) {
63
        LOGERR(("MHExecMultiple: getline error\n"));
83
        LOGERR(("MHExecMultiple: getline error\n"));
64
        return false;
84
        return false;
65
    }
85
    }
86
    // Empty line (end of message) ?
66
    if (!ibuf.compare("\n")) {
87
    if (!ibuf.compare("\n")) {
67
        LOGDEB(("MHExecMultiple: Got empty line\n"));
88
        LOGDEB(("MHExecMultiple: Got empty line\n"));
68
        name = "";
89
        name = "";
69
        return true;
90
        return true;
70
    }
91
    }
71
92
72
    // We're expecting something like paramname: len\n
93
    // We're expecting something like Name: len\n
73
    list<string> tokens;
94
    list<string> tokens;
74
    stringToTokens(ibuf, tokens);
95
    stringToTokens(ibuf, tokens);
75
    if (tokens.size() != 2) {
96
    if (tokens.size() != 2) {
76
        LOGERR(("MHExecMultiple: bad line in filter output: [%s]\n",
97
        LOGERR(("MHExecMultiple: bad line in filter output: [%s]\n",
77
                ibuf.c_str()));
98
                ibuf.c_str()));
...
...
84
    if (sscanf(slen.c_str(), "%d", &len) != 1) {
105
    if (sscanf(slen.c_str(), "%d", &len) != 1) {
85
        LOGERR(("MHExecMultiple: bad line in filter output: [%s]\n",
106
        LOGERR(("MHExecMultiple: bad line in filter output: [%s]\n",
86
                ibuf.c_str()));
107
                ibuf.c_str()));
87
        return false;
108
        return false;
88
    }
109
    }
89
    LOGDEB(("MHExecMultiple: got paramname [%s] len: %d\n", 
110
    LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
90
            name.c_str(), len));
111
91
    // We only care about the "data:" field for now
112
    // Hack: check for 'Document:' and read directly the document data
92
    string discard;
113
    // to m_metaData["content"] to avoid an extra copy of the bulky
114
    // piece
93
    string *datap;
115
    string *datap = &data;
94
    if (!stringlowercmp("data:", name)) {
116
    if (!stringlowercmp("document:", name)) {
95
        datap = &m_metaData["content"];
117
        datap = &m_metaData["content"];
96
    } else {
118
    } else {
97
        datap = &discard;
119
        datap = &data;
98
    }
120
    }
99
    // Then the data.
121
122
    // Read element data
100
    datap->erase();
123
    datap->erase();
101
    if (m_cmd.receive(*datap, len) != len) {
124
    if (len > 0 && m_cmd.receive(*datap, len) != len) {
102
        LOGERR(("MHExecMultiple: expected %d bytes of data, got %d\n", 
125
        LOGERR(("MHExecMultiple: expected %d bytes of data, got %d\n", 
103
                len, datap->length()));
126
                len, datap->length()));
104
        return false;
127
        return false;
105
    }
128
    }
106
    return true;
129
    return true;
107
}
130
}
108
131
109
// Execute an external program to translate a file from its native
110
// format to text or html.
111
bool MimeHandlerExecMultiple::next_document()
132
bool MimeHandlerExecMultiple::next_document()
112
{
133
{
134
    LOGDEB(("MimeHandlerExecMultiple::next_document(): [%s]\n", m_fn.c_str()));
113
    if (m_havedoc == false)
135
    if (m_havedoc == false)
114
    return false;
136
    return false;
137
115
    if (missingHelper) {
138
    if (missingHelper) {
116
    LOGDEB(("MHExecMultiple::next_document(): helper known missing\n"));
139
    LOGDEB(("MHExecMultiple::next_document(): helper known missing\n"));
117
    return false;
140
    return false;
118
    }
141
    }
119
    if (params.empty()) {
120
  // Hu ho
121
  LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
122
  m_reason = "RECFILTERROR BADCONFIG";
123
  return false;
124
    }
125
142
126
    if (m_cmd.getChildPid() < 0 && !startCmd()) {
143
    if (m_cmd.getChildPid() < 0 && !startCmd()) {
127
        return false;
144
        return false;
128
    }
145
    }
129
146
130
    // Send request to child process
147
    // Send request to child process. This maybe the first/only
148
    // request for a given file, or a continuation request. We send an
149
    // empty file name in the latter case.
131
    ostringstream obuf;
150
    ostringstream obuf;
151
    if (m_filefirst) {
132
    obuf << "FileName: " << m_fn.length() << endl << m_fn << endl;
152
        obuf << "FileName: " << m_fn.length() << "\n" << m_fn;
153
        // m_filefirst is set to true by set_document_file()
154
        m_filefirst = false;
155
    } else {
156
        obuf << "Filename: " << 0 << "\n";
157
    }
158
    if (m_ipath.length()) {
159
        obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath;
160
    }
161
    obuf << "\n";
133
    if (m_cmd.send(obuf.str()) < 0) {
162
    if (m_cmd.send(obuf.str()) < 0) {
134
        LOGERR(("MHExecMultiple: send error\n"));
163
        LOGERR(("MHExecMultiple: send error\n"));
135
        return false;
164
        return false;
136
    }
165
    }
137
166
138
    // Read answer
167
    // Read answer (multiple elements)
139
    LOGDEB(("MHExecMultiple: reading answer\n"));
168
    LOGDEB1(("MHExecMultiple: reading answer\n"));
169
    bool eof_received = false;
170
    string ipath;
171
    string mtype;
140
    for (int loop=0;;loop++) {
172
    for (int loop=0;;loop++) {
141
        string name;
173
        string name, data;
142
        if (!readDataElement(name)) {
174
        if (!readDataElement(name, data)) {
143
            return false;
175
            return false;
144
        }
176
        }
145
        if (name.empty())
177
        if (name.empty())
146
            break;
178
            break;
179
        if (!stringlowercmp("eof:", name)) {
180
            LOGDEB(("MHExecMultiple: got EOF\n"));
181
            eof_received = true;
182
        }
183
        if (!stringlowercmp("ipath:", name)) {
184
            ipath = data;
185
            LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str()));
186
        }
187
        if (!stringlowercmp("mimetype:", name)) {
188
            mtype = data;
189
            LOGDEB(("MHExecMultiple: got mimetype [%s]\n", data.c_str()));
190
        }
147
        if (loop == 10) {
191
        if (loop == 10) {
148
            // ?? 
192
            // ?? 
149
            LOGERR(("MHExecMultiple: filter sent too many parameters\n"));
193
            LOGERR(("MHExecMultiple: filter sent too many parameters\n"));
150
            return false;
194
            return false;
151
        }
195
        }
152
    }
196
    }
153
    
197
    // The end of data can be signaled from the filter in two ways:
154
    finaldetails();
198
    // either by returning an empty document (if the filter just hits
199
    // eof while trying to read the doc), or with an "eof:" field
200
    // accompanying a normal document (if the filter hit eof at the
201
    // end of the current doc, which is the preferred way).
202
    if (m_metaData["content"].length() == 0) {
203
        LOGDEB(("MHExecMultiple: got empty document\n"));
155
    m_havedoc = false;
204
        m_havedoc = false;
205
        return false;
206
    }
207
208
    // If this has an ipath, it is an internal doc from a
209
    // multi-document file. In this case, either the filter supplies the 
210
    // mimetype, or the ipath MUST be a filename-like string which we can use
211
    // to compute a mime type
212
    if (!ipath.empty()) {
213
        m_metaData["ipath"] = ipath;
214
        if (mtype.empty()) {
215
            mtype = mimetype(ipath, 0, RclConfig::getMainConfig(), false);
216
            if (mtype.empty()) {
217
                // mimetype() won't call idFile when there is no file. Do it
218
                mtype = idFileMem(m_metaData["content"]);
219
                if (mtype.empty()) {
220
                    LOGERR(("MHExecMultiple: cant guess mime type\n"));
221
                    mtype = "application/octet-stream";
222
                }
223
            }
224
        }
225
        m_metaData["mimetype"] = mtype;
226
        string md5, xmd5;
227
        MD5String(m_metaData["content"], md5);
228
        m_metaData["md5"] = MD5HexPrint(md5, xmd5);
229
    } else {
230
        m_metaData.erase("ipath");
231
        string md5, xmd5, reason;
232
        if (MD5File(m_fn, md5, &reason)) {
233
            m_metaData["md5"] = MD5HexPrint(md5, xmd5);
234
        } else {
235
            LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n",
236
                    m_fn.c_str(), reason.c_str()));
237
        }
238
    }
239
240
    if (eof_received)
241
        m_havedoc = false;
242
156
    return true;
243
    return true;
157
}
244
}