Parent: [68fb37] (diff)

Child: [548a4c] (diff)

Download this file

internfile.cpp    151 lines (136 with data), 4.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#ifndef lint
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.6 2005-11-08 21:02:55 dockes Exp $ (C) 2004 J.F.Dockes";
#endif
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
#include <string>
#include <iostream>
using namespace std;
#include "internfile.h"
#include "mimetype.h"
#include "debuglog.h"
#include "mimehandler.h"
#include "execmd.h"
#include "pathut.h"
#include "wipedir.h"
// Execute the command to uncompress a file into a temporary one.
static bool uncompressfile(RclConfig *conf, const string& ifn,
const list<string>& cmdv, const string& tdir,
string& tfile)
{
// Make sure tmp dir is empty. we guarantee this to filters
if (wipedir(tdir) != 0) {
LOGERR(("uncompressfile: can't clear temp dir %s\n", tdir.c_str()));
return false;
}
string cmd = find_filter(conf, cmdv.front());
// Substitute file name and temp dir in command elements
list<string>::const_iterator it = cmdv.begin();
++it;
list<string> args;
for (; it != cmdv.end(); it++) {
string s = *it;
string ns;
string::const_iterator it1;
for (it1 = s.begin(); it1 != s.end();it1++) {
if (*it1 == '%') {
if (++it1 == s.end()) {
ns += '%';
break;
}
if (*it1 == '%')
ns += '%';
if (*it1 == 'f')
ns += ifn;
if (*it1 == 't')
ns += tdir;
} else {
ns += *it1;
}
}
args.push_back(ns);
}
// Execute command and retrieve output file name, check that it exists
ExecCmd ex;
int status = ex.doexec(cmd, args, 0, &tfile);
if (status) {
LOGERR(("uncompressfile: doexec: status 0x%x\n", status));
rmdir(tdir.c_str());
return false;
}
if (!tfile.empty() && tfile[tfile.length() - 1] == '\n')
tfile.erase(tfile.length() - 1, 1);
return true;
}
void FileInterner::tmpcleanup()
{
if (tdir.empty() || tfile.empty())
return;
if (unlink(tfile.c_str()) < 0) {
LOGERR(("FileInterner::tmpcleanup: unlink(%s) errno %d\n",
tfile.c_str(), errno));
return;
}
}
// Handler==0 on return says we're in error
FileInterner::FileInterner(const std::string &f, RclConfig *cnf,
const string& td)
: fn(f), config(cnf), tdir(td), handler(0)
{
mime = mimetype(fn, config->getMimeMap());
if (mime.empty()) {
// No mime type: not listed in our map, or present in stop list
LOGDEB(("FileInterner::FileInterner: (no mime) [%s]\n", fn.c_str()));
return;
}
// First check for a compressed file. If so, create a temporary
// uncompressed file, and rerun the mime type identification, then do the
// rest with the temp file.
list<string>ucmd;
if (getUncompressor(mime, config->getMimeConf(), ucmd)) {
if (!uncompressfile(config, fn, ucmd, tdir, tfile)) {
return;
}
LOGDEB(("internfile: after ucomp: tdir %s, tfile %s\n",
tdir.c_str(), tfile.c_str()));
fn = tfile;
mime = mimetype(fn, config->getMimeMap());
if (mime.empty()) {
// No mime type ?? pass on.
LOGDEB(("internfile: (no mime) [%s]\n", fn.c_str()));
return;
}
}
// Look for appropriate handler
handler = getMimeHandler(mime, config->getMimeConf());
if (!handler) {
// No handler for this type, for now :(
LOGDEB(("FileInterner::FileInterner: %s: no handler\n", mime.c_str()));
return;
}
LOGDEB(("FileInterner::FileInterner: %s [%s]\n",mime.c_str(), fn.c_str()));
}
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
{
if (!handler)
return FIError;
// Turn file into a document. The document has fields for title, body
// etc., all text converted to utf8
MimeHandler::Status mhs = handler->mkDoc(config, fn, mime, doc, ipath);
FileInterner::Status ret = FIError;
switch (mhs) {
case MimeHandler::MHError: break;
case MimeHandler::MHDone: ret = FIDone;break;
case MimeHandler::MHAgain: ret = FIAgain;break;
}
doc.mimetype = mime;
return ret;
}
FileInterner::~FileInterner()
{
delete handler;
handler = 0;
tmpcleanup();
}