Switch to unified view

a/src/internfile/mimehandler.cpp b/src/internfile/mimehandler.cpp
...
...
28
#include "cstr.h"
28
#include "cstr.h"
29
#include "mimehandler.h"
29
#include "mimehandler.h"
30
#include "debuglog.h"
30
#include "debuglog.h"
31
#include "rclconfig.h"
31
#include "rclconfig.h"
32
#include "smallut.h"
32
#include "smallut.h"
33
#include "md5.h"
33
34
34
#include "mh_exec.h"
35
#include "mh_exec.h"
35
#include "mh_execm.h"
36
#include "mh_execm.h"
36
#include "mh_html.h"
37
#include "mh_html.h"
37
#include "mh_mail.h"
38
#include "mh_mail.h"
...
...
43
44
44
// Performance help: we use a pool of already known and created
45
// Performance help: we use a pool of already known and created
45
// handlers. There can be several instances for a given mime type
46
// handlers. There can be several instances for a given mime type
46
// (think email attachment in email message: 2 rfc822 handlers are
47
// (think email attachment in email message: 2 rfc822 handlers are
47
// needed simulteanously)
48
// needed simulteanously)
48
static multimap<string, Dijon::Filter*>  o_handlers;
49
static multimap<string, RecollFilter*>  o_handlers;
49
static list<multimap<string, Dijon::Filter*>::iterator> o_hlru;
50
static list<multimap<string, RecollFilter*>::iterator> o_hlru;
50
typedef list<multimap<string, Dijon::Filter*>::iterator>::iterator hlruit_tp;
51
typedef list<multimap<string, RecollFilter*>::iterator>::iterator hlruit_tp;
51
52
52
static PTMutexInit o_handlers_mutex;
53
static PTMutexInit o_handlers_mutex;
53
54
54
static const unsigned int max_handlers_cache_size = 100;
55
static const unsigned int max_handlers_cache_size = 100;
55
56
56
/* Look for mime handler in pool */
57
/* Look for mime handler in pool */
57
static Dijon::Filter *getMimeHandlerFromCache(const string& key)
58
static RecollFilter *getMimeHandlerFromCache(const string& key)
58
{
59
{
59
    PTMutexLocker locker(o_handlers_mutex);
60
    PTMutexLocker locker(o_handlers_mutex);
61
    string xdigest;
62
    MD5HexPrint(key, xdigest);
60
    LOGDEB(("getMimeHandlerFromCache: %s cache size %u\n", 
63
    LOGDEB(("getMimeHandlerFromCache: %s cache size %u\n", 
61
        key.c_str(), o_handlers.size()));
64
        xdigest.c_str(), o_handlers.size()));
62
65
63
    multimap<string, Dijon::Filter *>::iterator it = o_handlers.find(key);
66
    multimap<string, RecollFilter *>::iterator it = o_handlers.find(key);
64
    if (it != o_handlers.end()) {
67
    if (it != o_handlers.end()) {
65
    Dijon::Filter *h = it->second;
68
    RecollFilter *h = it->second;
66
    hlruit_tp it1 = find(o_hlru.begin(), o_hlru.end(), it);
69
    hlruit_tp it1 = find(o_hlru.begin(), o_hlru.end(), it);
67
    if (it1 != o_hlru.end()) {
70
    if (it1 != o_hlru.end()) {
68
        o_hlru.erase(it1);
71
        o_hlru.erase(it1);
69
    } else {
72
    } else {
70
        LOGERR(("getMimeHandlerFromCache: lru position not found\n"));
73
        LOGERR(("getMimeHandlerFromCache: lru position not found\n"));
71
    }
74
    }
72
    o_handlers.erase(it);
75
    o_handlers.erase(it);
73
    LOGDEB(("getMimeHandlerFromCache: %s found size %u\n", 
76
    LOGDEB(("getMimeHandlerFromCache: %s found size %u\n", 
74
        key.c_str(), o_handlers.size()));
77
        xdigest.c_str(), o_handlers.size()));
75
    return h;
78
    return h;
76
    }
79
    }
77
    LOGDEB(("getMimeHandlerFromCache: %s not found\n", key.c_str()));
80
    LOGDEB(("getMimeHandlerFromCache: %s not found\n", xdigest.c_str()));
78
    return 0;
81
    return 0;
79
}
82
}
80
83
81
/* Return mime handler to pool */
84
/* Return mime handler to pool */
82
void returnMimeHandler(Dijon::Filter *handler)
85
void returnMimeHandler(RecollFilter *handler)
83
{
86
{
84
    typedef multimap<string, Dijon::Filter*>::value_type value_type;
87
    typedef multimap<string, RecollFilter*>::value_type value_type;
85
88
86
    if (handler==0) 
89
    if (handler == 0) {
90
  LOGERR(("returnMimeHandler: bad parameter\n"));
87
    return;
91
    return;
92
    }
88
    handler->clear();
93
    handler->clear();
89
94
90
    PTMutexLocker locker(o_handlers_mutex);
95
    PTMutexLocker locker(o_handlers_mutex);
91
96
92
    LOGDEB(("returnMimeHandler: returning filter for %s cache size %d\n", 
97
    LOGDEB(("returnMimeHandler: returning filter for %s cache size %d\n", 
...
...
95
    // Limit pool size. The pool can grow quite big because there are
100
    // Limit pool size. The pool can grow quite big because there are
96
    // many filter types, each of which can be used in several copies
101
    // many filter types, each of which can be used in several copies
97
    // at the same time either because it occurs several times in a
102
    // at the same time either because it occurs several times in a
98
    // stack (ie mail attachment to mail), or because several threads
103
    // stack (ie mail attachment to mail), or because several threads
99
    // are processing the same mime type at the same time.
104
    // are processing the same mime type at the same time.
100
    multimap<string, Dijon::Filter *>::iterator it;
105
    multimap<string, RecollFilter *>::iterator it;
101
    if (o_handlers.size() >= max_handlers_cache_size) {
106
    if (o_handlers.size() >= max_handlers_cache_size) {
102
    static int once = 1;
107
    static int once = 1;
103
    if (once) {
108
    if (once) {
104
        once = 0;
109
        once = 0;
105
        for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
110
        for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
...
...
112
        o_hlru.pop_back();
117
        o_hlru.pop_back();
113
        delete it->second;
118
        delete it->second;
114
        o_handlers.erase(it);
119
        o_handlers.erase(it);
115
    }
120
    }
116
    }
121
    }
117
    it = o_handlers.insert(value_type(handler->get_mime_type(), handler));
122
    it = o_handlers.insert(value_type(handler->get_id(), handler));
118
    o_hlru.push_front(it);
123
    o_hlru.push_front(it);
119
}
124
}
120
125
121
void clearMimeHandlerCache()
126
void clearMimeHandlerCache()
122
{
127
{
123
    LOGDEB(("clearMimeHandlerCache()\n"));
128
    LOGDEB(("clearMimeHandlerCache()\n"));
124
    typedef multimap<string, Dijon::Filter*>::value_type value_type;
129
    typedef multimap<string, RecollFilter*>::value_type value_type;
125
    map<string, Dijon::Filter *>::iterator it;
130
    map<string, RecollFilter *>::iterator it;
126
    PTMutexLocker locker(o_handlers_mutex);
131
    PTMutexLocker locker(o_handlers_mutex);
127
    for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
132
    for (it = o_handlers.begin(); it != o_handlers.end(); it++) {
128
    delete it->second;
133
    delete it->second;
129
    }
134
    }
130
    o_handlers.clear();
135
    o_handlers.clear();
131
}
136
}
132
137
133
/** For mime types set as "internal" in mimeconf: 
138
/** For mime types set as "internal" in mimeconf: 
134
  * create appropriate handler object. */
139
  * create appropriate handler object. */
135
static Dijon::Filter *mhFactory(RclConfig *config, const string &mime)
140
static RecollFilter *mhFactory(RclConfig *config, const string &mime,
141
              bool nobuild, string& id)
136
{
142
{
137
    LOGDEB2(("mhFactory(%s)\n", mime.c_str()));
143
    LOGDEB2(("mhFactory(%s)\n", mime.c_str()));
138
    string lmime(mime);
144
    string lmime(mime);
139
    stringtolower(lmime);
145
    stringtolower(lmime);
140
    if (cstr_textplain == lmime) {
146
    if (cstr_textplain == lmime) {
141
    LOGDEB2(("mhFactory(%s): returning MimeHandlerText\n", mime.c_str()));
147
    LOGDEB2(("mhFactory(%s): returning MimeHandlerText\n", mime.c_str()));
148
  MD5String("MimeHandlerText", id);
142
    return new MimeHandlerText(config, lmime);
149
    return nobuild ? 0 : new MimeHandlerText(config, id);
143
    } else if ("text/html" == lmime) {
150
    } else if ("text/html" == lmime) {
144
    LOGDEB2(("mhFactory(%s): returning MimeHandlerHtml\n", mime.c_str()));
151
    LOGDEB2(("mhFactory(%s): returning MimeHandlerHtml\n", mime.c_str()));
152
  MD5String("MimeHandlerHtml", id);
145
    return new MimeHandlerHtml(config, lmime);
153
    return nobuild ? 0 : new MimeHandlerHtml(config, id);
146
    } else if ("text/x-mail" == lmime) {
154
    } else if ("text/x-mail" == lmime) {
147
    LOGDEB2(("mhFactory(%s): returning MimeHandlerMbox\n", mime.c_str()));
155
    LOGDEB2(("mhFactory(%s): returning MimeHandlerMbox\n", mime.c_str()));
156
  MD5String("MimeHandlerMbox", id);
148
    return new MimeHandlerMbox(config, lmime);
157
    return nobuild ? 0 : new MimeHandlerMbox(config, id);
149
    } else if ("message/rfc822" == lmime) {
158
    } else if ("message/rfc822" == lmime) {
150
    LOGDEB2(("mhFactory(%s): returning MimeHandlerMail\n", mime.c_str()));
159
    LOGDEB2(("mhFactory(%s): returning MimeHandlerMail\n", mime.c_str()));
160
  MD5String("MimeHandlerMail", id);
151
    return new MimeHandlerMail(config, lmime);
161
    return nobuild ? 0 : new MimeHandlerMail(config, id);
152
    } else if ("inode/symlink" == lmime) {
162
    } else if ("inode/symlink" == lmime) {
153
    LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
163
    LOGDEB2(("mhFactory(%s): ret MimeHandlerSymlink\n", mime.c_str()));
164
  MD5String("MimeHandlerSymlink", id);
154
    return new MimeHandlerSymlink(config, lmime);
165
    return nobuild ? 0 : new MimeHandlerSymlink(config, id);
155
    } else if (lmime.find("text/") == 0) {
166
    } else if (lmime.find("text/") == 0) {
156
        // Try to handle unknown text/xx as text/plain. This
167
        // Try to handle unknown text/xx as text/plain. This
157
        // only happen if the text/xx was defined as "internal" in
168
        // only happen if the text/xx was defined as "internal" in
158
        // mimeconf, not at random. For programs, for example this
169
        // mimeconf, not at random. For programs, for example this
159
        // allows indexing and previewing as text/plain (no filter
170
        // allows indexing and previewing as text/plain (no filter
160
        // exec) but still opening with a specific editor.
171
        // exec) but still opening with a specific editor.
161
    LOGDEB2(("mhFactory(%s): returning MimeHandlerText(x)\n",mime.c_str()));
172
    LOGDEB2(("mhFactory(%s): returning MimeHandlerText(x)\n",mime.c_str()));
173
  MD5String("MimeHandlerText", id);
162
        return new MimeHandlerText(config, lmime); 
174
        return nobuild ? 0 : new MimeHandlerText(config, id); 
163
    } else {
175
    } else {
164
    // We should not get there. It means that "internal" was set
176
    // We should not get there. It means that "internal" was set
165
    // as a handler in mimeconf for a mime type we actually can't
177
    // as a handler in mimeconf for a mime type we actually can't
166
    // handle.
178
    // handle.
167
    LOGERR(("mhFactory: mime type [%s] set as internal but unknown\n", 
179
    LOGERR(("mhFactory: mime type [%s] set as internal but unknown\n", 
168
        lmime.c_str()));
180
        lmime.c_str()));
181
  MD5String("MimeHandlerUnknown", id);
169
    return new MimeHandlerUnknown(config, lmime);
182
    return nobuild ? 0 : new MimeHandlerUnknown(config, id);
170
    }
183
    }
171
}
184
}
172
185
173
static const string cstr_mh_charset("charset");
186
static const string cstr_mh_charset("charset");
174
/**
187
/**
...
...
179
 * This list is treated by replacing semi-colons with newlines and building
192
 * This list is treated by replacing semi-colons with newlines and building
180
 * a confsimple. This is done quite brutally and we don't support having
193
 * a confsimple. This is done quite brutally and we don't support having
181
 * a ';' inside a quoted string for now. Can't see a use for it.
194
 * a ';' inside a quoted string for now. Can't see a use for it.
182
 */
195
 */
183
MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
196
MimeHandlerExec *mhExecFactory(RclConfig *cfg, const string& mtype, string& hs,
184
                               bool multiple)
197
                               bool multiple, const string& id)
185
{
198
{
186
    ConfSimple attrs;
199
    ConfSimple attrs;
187
    string cmdstr;
200
    string cmdstr;
201
188
    if (!cfg->valueSplitAttributes(hs, cmdstr, attrs)) {
202
    if (!cfg->valueSplitAttributes(hs, cmdstr, attrs)) {
189
    LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n", 
203
    LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n", 
190
        mtype.c_str(), hs.c_str()));
204
        mtype.c_str(), hs.c_str()));
191
        return 0;
205
        return 0;
192
    }
206
    }
...
...
198
    LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n", 
212
    LOGERR(("mhExecFactory: bad config line for [%s]: [%s]\n", 
199
        mtype.c_str(), hs.c_str()));
213
        mtype.c_str(), hs.c_str()));
200
    return 0;
214
    return 0;
201
    }
215
    }
202
    MimeHandlerExec *h = multiple ? 
216
    MimeHandlerExec *h = multiple ? 
203
        new MimeHandlerExecMultiple(cfg, mtype.c_str()) :
217
  new MimeHandlerExecMultiple(cfg, id) :
204
        new MimeHandlerExec(cfg, mtype.c_str());
218
        new MimeHandlerExec(cfg, id);
205
    list<string>::iterator it = cmdtoks.begin();
219
    list<string>::iterator it = cmdtoks.begin();
206
    h->params.push_back(cfg->findFilter(*it++));
220
    h->params.push_back(cfg->findFilter(*it++));
207
    h->params.insert(h->params.end(), it, cmdtoks.end());
221
    h->params.insert(h->params.end(), it, cmdtoks.end());
208
222
209
    // Handle additional attributes. We substitute the semi-colons
223
    // Handle additional attributes. We substitute the semi-colons
...
...
226
240
227
    return h;
241
    return h;
228
}
242
}
229
243
230
/* Get handler/filter object for given mime type: */
244
/* Get handler/filter object for given mime type: */
231
Dijon::Filter *getMimeHandler(const string &mtype, RclConfig *cfg, 
245
RecollFilter *getMimeHandler(const string &mtype, RclConfig *cfg, 
232
                  bool filtertypes)
246
                  bool filtertypes)
233
{
247
{
234
    LOGDEB(("getMimeHandler: mtype [%s] filtertypes %d\n", 
248
    LOGDEB(("getMimeHandler: mtype [%s] filtertypes %d\n", 
235
         mtype.c_str(), filtertypes));
249
         mtype.c_str(), filtertypes));
236
    Dijon::Filter *h = 0;
250
    RecollFilter *h = 0;
237
251
238
    // Get handler definition for mime type. We do this even if an
252
    // Get handler definition for mime type. We do this even if an
239
    // appropriate handler object may be in the cache (indexed by mime
253
    // appropriate handler object may be in the cache.
240
    // type). This is fast, and necessary to conform to the
254
    // This is fast, and necessary to conform to the
241
    // configuration, (ie: text/html might be filtered out by
255
    // configuration, (ie: text/html might be filtered out by
242
    // indexedmimetypes but an html handler could still be in the
256
    // indexedmimetypes but an html handler could still be in the
243
    // cache because it was needed by some other interning stack).
257
    // cache because it was needed by some other interning stack).
244
    string hs;
258
    string hs;
245
    hs = cfg->getMimeHandlerDef(mtype, filtertypes);
259
    hs = cfg->getMimeHandlerDef(mtype, filtertypes);
260
    string id;
246
261
247
    if (!hs.empty()) { // Got a handler definition line
262
    if (!hs.empty()) { 
248
263
  // Got a handler definition line
249
        // Do we already have a handler object in the cache ?
264
  // Break definition into type (internal/exec/execm) 
250
  h = getMimeHandlerFromCache(mtype);
265
  // and name/command string 
251
  if (h != 0)
252
      goto out;
253
  LOGDEB2(("getMimeHandler: %s not in cache\n", mtype.c_str()));
254
255
  // Not in cache. Break definition into type and name/command
256
        // string and instanciate handler object
257
        string::size_type b1 = hs.find_first_of(" \t");
266
        string::size_type b1 = hs.find_first_of(" \t");
258
        string handlertype = hs.substr(0, b1);
267
        string handlertype = hs.substr(0, b1);
259
    string cmdstr;
268
    string cmdstr;
260
    if (b1 != string::npos) {
269
    if (b1 != string::npos) {
261
        cmdstr = hs.substr(b1);
270
        cmdstr = hs.substr(b1);
262
            trimstring(cmdstr);
271
            trimstring(cmdstr);
263
    }
272
    }
264
    if (!stringlowercmp("internal", handlertype)) {
273
    bool internal = !stringlowercmp("internal", handlertype);
274
  if (internal) {
275
      // For internal types let the factory compute the id
276
      mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, true, id);
277
  } else {
278
      // exec/execm: use the md5 of the def line
279
      MD5String(hs, id);
280
  }
281
282
#if 0
283
  { // string xdigest; LOGDEB2(("getMimeHandler: [%s] hs [%s] id [%s]\n", 
284
    //mtype.c_str(), hs.c_str(), MD5HexPrint(id, xdigest).c_str()));
285
  }
286
#endif
287
288
        // Do we already have a handler object in the cache ?
289
  h = getMimeHandlerFromCache(id);
290
  if (h != 0)
291
      goto out;
292
293
  LOGDEB2(("getMimeHandler: %s not in cache\n", mtype.c_str()));
294
295
  // Not in cache. 
296
  if (internal) {
265
        // If there is a parameter after "internal" it's the mime
297
        // If there is a parameter after "internal" it's the mime
266
        // type to use. This is so that we can have bogus mime
298
        // type to use. This is so that we can have bogus mime
267
        // types like text/x-purple-html-log (for ie: specific
299
        // types like text/x-purple-html-log (for ie: specific
268
        // icon) and still use the html filter on them. This is
300
        // icon) and still use the html filter on them. This is
269
        // partly redundant with the localfields/rclaptg, but
301
        // partly redundant with the localfields/rclaptg, but
270
        // better and the latter will probably go away at some
302
        // better and the latter will probably go away at some
271
        // point in the future.
303
        // point in the future.
272
        LOGDEB2(("handlertype internal, cmdstr [%s]\n", cmdstr.c_str()));
304
        LOGDEB2(("handlertype internal, cmdstr [%s]\n", cmdstr.c_str()));
273
      if (!cmdstr.empty()) {
305
      h = mhFactory(cfg, cmdstr.empty() ? mtype : cmdstr, false, id);
274
      // Have to redo the cache thing. Maybe we should
275
      // rather just recurse instead ?
276
      if ((h = getMimeHandlerFromCache(cmdstr)) == 0)
277
          h = mhFactory(cfg, cmdstr);
278
      } else {
279
      h = mhFactory(cfg, mtype);
280
      }
281
        goto out;
306
        goto out;
282
    } else if (!stringlowercmp("dll", handlertype)) {
307
    } else if (!stringlowercmp("dll", handlertype)) {
283
    } else {
308
    } else {
284
            if (cmdstr.empty()) {
309
            if (cmdstr.empty()) {
285
        LOGERR(("getMimeHandler: bad line for %s: %s\n", 
310
        LOGERR(("getMimeHandler: bad line for %s: %s\n", 
286
            mtype.c_str(), hs.c_str()));
311
            mtype.c_str(), hs.c_str()));
287
        goto out;
312
        goto out;
288
        }
313
        }
289
            if (!stringlowercmp("exec", handlertype)) {
314
            if (!stringlowercmp("exec", handlertype)) {
290
                h = mhExecFactory(cfg, mtype, cmdstr, false);
315
                h = mhExecFactory(cfg, mtype, cmdstr, false, id);
291
        goto out;
316
        goto out;
292
            } else if (!stringlowercmp("execm", handlertype)) {
317
            } else if (!stringlowercmp("execm", handlertype)) {
293
                h = mhExecFactory(cfg, mtype, cmdstr, true);
318
                h = mhExecFactory(cfg, mtype, cmdstr, true, id);
294
        goto out;
319
        goto out;
295
            } else {
320
            } else {
296
        LOGERR(("getMimeHandler: bad line for %s: %s\n", 
321
        LOGERR(("getMimeHandler: bad line for %s: %s\n", 
297
            mtype.c_str(), hs.c_str()));
322
            mtype.c_str(), hs.c_str()));
298
        goto out;
323
        goto out;
...
...
303
    // We get here if there was no specific error, but there is no
328
    // We get here if there was no specific error, but there is no
304
    // identified mime type, or no handler associated.
329
    // identified mime type, or no handler associated.
305
330
306
    // Finally, unhandled files are either ignored or their name and
331
    // Finally, unhandled files are either ignored or their name and
307
    // generic metadata is indexed, depending on configuration
332
    // generic metadata is indexed, depending on configuration
333
    {
308
    {bool indexunknown = false;
334
  bool indexunknown = false;
309
    cfg->getConfParam("indexallfilenames", &indexunknown);
335
    cfg->getConfParam("indexallfilenames", &indexunknown);
310
    if (indexunknown) {
336
    if (indexunknown) {
337
      MD5String("MimeHandlerUnknown", id);
311
        if ((h = getMimeHandlerFromCache("application/octet-stream")) == 0)
338
        if ((h = getMimeHandlerFromCache(id)) == 0)
312
      h = new MimeHandlerUnknown(cfg, "application/octet-stream");
339
      h = new MimeHandlerUnknown(cfg, id);
340
  }
313
        goto out;
341
    goto out;
314
  } else {
315
      goto out;
316
  }
317
    }
342
    }
318
343
319
out:
344
out:
320
    if (h) {
345
    if (h) {
321
    h->set_property(Dijon::Filter::DEFAULT_CHARSET, cfg->getDefCharset());
346
    h->set_property(RecollFilter::DEFAULT_CHARSET, cfg->getDefCharset());
322
    // In multithread context, and in case this handler is out
347
    // In multithread context, and in case this handler is out
323
    // from the cache, it may have a config pointer belonging to
348
    // from the cache, it may have a config pointer belonging to
324
    // another thread. Fix it.
349
    // another thread. Fix it.
325
    h->setConfig(cfg);
350
    h->setConfig(cfg);
326
    }
351
    }