|
a/src/internfile/internfile.cpp |
|
b/src/internfile/internfile.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.39 2008-08-26 07:33:05 dockes Exp $ (C) 2004 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: internfile.cpp,v 1.40 2008-09-05 10:36:06 dockes Exp $ (C) 2004 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
45 |
|
45 |
|
46 |
// The internal path element separator. This can't be the same as the rcldb
|
46 |
// The internal path element separator. This can't be the same as the rcldb
|
47 |
// file to ipath separator : "|"
|
47 |
// file to ipath separator : "|"
|
48 |
static const string isep(":");
|
48 |
static const string isep(":");
|
49 |
|
49 |
|
|
|
50 |
// This is used when the user wants to retrieve a search result doc's parent
|
|
|
51 |
// (ie message having a given attachment)
|
50 |
bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
52 |
bool FileInterner::getEnclosing(const string &url, const string &ipath,
|
51 |
string &eurl, string &eipath)
|
53 |
string &eurl, string &eipath)
|
52 |
{
|
54 |
{
|
53 |
eurl = url;
|
55 |
eurl = url;
|
54 |
eipath = ipath;
|
56 |
eipath = ipath;
|
|
... |
|
... |
63 |
}
|
65 |
}
|
64 |
LOGDEB(("FileInterner::getEnclosing() after: [%s]\n", eipath.c_str()));
|
66 |
LOGDEB(("FileInterner::getEnclosing() after: [%s]\n", eipath.c_str()));
|
65 |
return true;
|
67 |
return true;
|
66 |
}
|
68 |
}
|
67 |
|
69 |
|
68 |
// Execute the command to uncompress a file into a temporary one.
|
70 |
// Uncompress input file into a temporary one, by executing the appropriate
|
|
|
71 |
// script.
|
69 |
static bool uncompressfile(RclConfig *conf, const string& ifn,
|
72 |
static bool uncompressfile(RclConfig *conf, const string& ifn,
|
70 |
const list<string>& cmdv, const string& tdir,
|
73 |
const list<string>& cmdv, const string& tdir,
|
71 |
string& tfile)
|
74 |
string& tfile)
|
72 |
{
|
75 |
{
|
73 |
// Make sure tmp dir is empty. we guarantee this to filters
|
76 |
// Make sure tmp dir is empty. we guarantee this to filters
|
|
... |
|
... |
101 |
if (tfile[tfile.length() - 1] == '\n')
|
104 |
if (tfile[tfile.length() - 1] == '\n')
|
102 |
tfile.erase(tfile.length() - 1, 1);
|
105 |
tfile.erase(tfile.length() - 1, 1);
|
103 |
return true;
|
106 |
return true;
|
104 |
}
|
107 |
}
|
105 |
|
108 |
|
|
|
109 |
// Delete temporary uncompressed file
|
106 |
void FileInterner::tmpcleanup()
|
110 |
void FileInterner::tmpcleanup()
|
107 |
{
|
111 |
{
|
108 |
if (m_tdir.empty() || m_tfile.empty())
|
112 |
if (m_tdir.empty() || m_tfile.empty())
|
109 |
return;
|
113 |
return;
|
110 |
if (unlink(m_tfile.c_str()) < 0) {
|
114 |
if (unlink(m_tfile.c_str()) < 0) {
|
|
... |
|
... |
112 |
m_tfile.c_str(), errno));
|
116 |
m_tfile.c_str(), errno));
|
113 |
return;
|
117 |
return;
|
114 |
}
|
118 |
}
|
115 |
}
|
119 |
}
|
116 |
|
120 |
|
117 |
// Handler==0 on return says we're in error, will be handled when calling
|
121 |
// Constructor: identify the input file, possibly create an
|
118 |
// internfile
|
122 |
// uncompressed temporary copy, and create the top filter for the
|
|
|
123 |
// uncompressed file type.
|
|
|
124 |
//
|
|
|
125 |
// Empty handler on return says that we're in error, this will be
|
|
|
126 |
// processed by the first call to internfile().
|
119 |
FileInterner::FileInterner(const std::string &f, const struct stat *stp,
|
127 |
FileInterner::FileInterner(const std::string &f, const struct stat *stp,
|
120 |
RclConfig *cnf,
|
128 |
RclConfig *cnf,
|
121 |
const string& td, const string *imime)
|
129 |
const string& td, const string *imime)
|
122 |
: m_cfg(cnf), m_fn(f), m_forPreview(imime?true:false), m_tdir(td)
|
130 |
: m_cfg(cnf), m_fn(f), m_forPreview(imime?true:false), m_tdir(td)
|
123 |
{
|
131 |
{
|
|
... |
|
... |
196 |
it != m_handlers.end(); it++)
|
204 |
it != m_handlers.end(); it++)
|
197 |
delete *it;
|
205 |
delete *it;
|
198 |
// m_tempfiles will take care of itself
|
206 |
// m_tempfiles will take care of itself
|
199 |
}
|
207 |
}
|
200 |
|
208 |
|
|
|
209 |
// Create a temporary file for a block of data (ie: attachment) found
|
|
|
210 |
// while walking the internal document tree, with a type for which the
|
|
|
211 |
// handler needs an actual file (ie : external script).
|
201 |
bool FileInterner::dataToTempFile(const string& dt, const string& mt,
|
212 |
bool FileInterner::dataToTempFile(const string& dt, const string& mt,
|
202 |
string& fn)
|
213 |
string& fn)
|
203 |
{
|
214 |
{
|
204 |
// Find appropriate suffix for mime type
|
215 |
// Find appropriate suffix for mime type
|
205 |
TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt)));
|
216 |
TempFile temp(new TempFileInternal(m_cfg->getSuffixFromMimeType(mt)));
|
|
... |
|
... |
229 |
return true;
|
240 |
return true;
|
230 |
}
|
241 |
}
|
231 |
|
242 |
|
232 |
// See if the error string is formatted as a missing helper message,
|
243 |
// See if the error string is formatted as a missing helper message,
|
233 |
// accumulate helper name if it is
|
244 |
// accumulate helper name if it is
|
234 |
void FileInterner::maybeExternalMissing(const string& msg)
|
245 |
void FileInterner::checkExternalMissing(const string& msg)
|
235 |
{
|
246 |
{
|
236 |
if (msg.find("RECFILTERROR") == 0) {
|
247 |
if (msg.find("RECFILTERROR") == 0) {
|
237 |
list<string> lerr;
|
248 |
list<string> lerr;
|
238 |
stringToStrings(msg, lerr);
|
249 |
stringToStrings(msg, lerr);
|
239 |
if (lerr.size() > 2) {
|
250 |
if (lerr.size() > 2) {
|
|
... |
|
... |
245 |
}
|
256 |
}
|
246 |
}
|
257 |
}
|
247 |
}
|
258 |
}
|
248 |
}
|
259 |
}
|
249 |
|
260 |
|
|
|
261 |
// Return the list of missing external helper apps that we saw while
|
|
|
262 |
// working
|
250 |
const list<string>& FileInterner::getMissingExternal()
|
263 |
const list<string>& FileInterner::getMissingExternal()
|
251 |
{
|
264 |
{
|
252 |
m_missingExternal.sort();
|
265 |
m_missingExternal.sort();
|
253 |
m_missingExternal.unique();
|
266 |
m_missingExternal.unique();
|
254 |
return m_missingExternal;
|
267 |
return m_missingExternal;
|
|
... |
|
... |
258 |
m_missingExternal.sort();
|
271 |
m_missingExternal.sort();
|
259 |
m_missingExternal.unique();
|
272 |
m_missingExternal.unique();
|
260 |
stringsToString(m_missingExternal, out);
|
273 |
stringsToString(m_missingExternal, out);
|
261 |
}
|
274 |
}
|
262 |
|
275 |
|
|
|
276 |
// Helper for extracting a value from a map.
|
263 |
static inline bool getKeyValue(const map<string, string>& docdata,
|
277 |
static inline bool getKeyValue(const map<string, string>& docdata,
|
264 |
const string& key, string& value)
|
278 |
const string& key, string& value)
|
265 |
{
|
279 |
{
|
266 |
map<string,string>::const_iterator it;
|
280 |
map<string,string>::const_iterator it;
|
267 |
it = docdata.find(key);
|
281 |
it = docdata.find(key);
|
|
... |
|
... |
308 |
doc.meta.erase(keyds);
|
322 |
doc.meta.erase(keyds);
|
309 |
}
|
323 |
}
|
310 |
return true;
|
324 |
return true;
|
311 |
}
|
325 |
}
|
312 |
|
326 |
|
313 |
// Collect the ipath stack.
|
327 |
// Collect the ipath from the current path in the document tree.
|
314 |
// While we're at it, we also set the mimetype and filename, which are special
|
328 |
// While we're at it, we also set the mimetype and filename, which are special
|
315 |
// properties: we want to get them from the topmost doc
|
329 |
// properties: we want to get them from the topmost doc
|
316 |
// with an ipath, not the last one which is usually text/plain
|
330 |
// with an ipath, not the last one which is usually text/plain
|
317 |
// We also set the author and modification time from the last doc
|
331 |
// We also set the author and modification time from the last doc
|
318 |
// which has them.
|
332 |
// which has them.
|
|
... |
|
... |
368 |
}
|
382 |
}
|
369 |
delete m_handlers.back();
|
383 |
delete m_handlers.back();
|
370 |
m_handlers.pop_back();
|
384 |
m_handlers.pop_back();
|
371 |
}
|
385 |
}
|
372 |
|
386 |
|
|
|
387 |
enum addResols {ADD_OK, ADD_CONTINUE, ADD_BREAK, ADD_ERROR};
|
|
|
388 |
|
|
|
389 |
// Just got document from current top handler. See what type it is,
|
|
|
390 |
// and possibly add a filter/handler to the stack
|
|
|
391 |
int FileInterner::addHandler()
|
|
|
392 |
{
|
|
|
393 |
const std::map<std::string, std::string>& docdata =
|
|
|
394 |
m_handlers.back()->get_meta_data();
|
|
|
395 |
string charset, mimetype;
|
|
|
396 |
getKeyValue(docdata, keycs, charset);
|
|
|
397 |
getKeyValue(docdata, keymt, mimetype);
|
|
|
398 |
|
|
|
399 |
LOGDEB(("FileInterner::addHandler: next_doc is %s\n", mimetype.c_str()));
|
|
|
400 |
// If we find a document of the target type (text/plain in
|
|
|
401 |
// general), we're done decoding
|
|
|
402 |
if (!stringicmp(mimetype, m_targetMType)) {
|
|
|
403 |
LOGDEB1(("FileInterner::addHandler: target reached\n"));
|
|
|
404 |
return ADD_BREAK;
|
|
|
405 |
}
|
|
|
406 |
|
|
|
407 |
// We need to stack another handler. Check stack size
|
|
|
408 |
if (m_handlers.size() > MAXHANDLERS) {
|
|
|
409 |
// Stack too big. Skip this and go on to check if there is
|
|
|
410 |
// something else in the current back()
|
|
|
411 |
LOGERR(("FileInterner::addHandler: stack too high\n"));
|
|
|
412 |
return ADD_CONTINUE;
|
|
|
413 |
}
|
|
|
414 |
|
|
|
415 |
Dijon::Filter *newflt = getMimeHandler(mimetype, m_cfg);
|
|
|
416 |
if (!newflt) {
|
|
|
417 |
// If we can't find a handler, this doc can't be handled
|
|
|
418 |
// but there can be other ones so we go on
|
|
|
419 |
LOGINFO(("FileInterner::addHandler: no filter for [%s]\n",
|
|
|
420 |
mimetype.c_str()));
|
|
|
421 |
return ADD_CONTINUE;
|
|
|
422 |
}
|
|
|
423 |
newflt->set_property(Dijon::Filter::OPERATING_MODE,
|
|
|
424 |
m_forPreview ? "view" : "index");
|
|
|
425 |
newflt->set_property(Dijon::Filter::DEFAULT_CHARSET, charset);
|
|
|
426 |
|
|
|
427 |
// Get content: we don't use getkeyvalue() here to avoid copying
|
|
|
428 |
// the text, which may be big.
|
|
|
429 |
string ns;
|
|
|
430 |
const string *txt = &ns;
|
|
|
431 |
{
|
|
|
432 |
map<string,string>::const_iterator it;
|
|
|
433 |
it = docdata.find(keyct);
|
|
|
434 |
if (it != docdata.end())
|
|
|
435 |
txt = &it->second;
|
|
|
436 |
}
|
|
|
437 |
bool setres = false;
|
|
|
438 |
if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
|
|
439 |
setres = newflt->set_document_string(*txt);
|
|
|
440 |
} else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
|
|
|
441 |
setres = newflt->set_document_data(txt->c_str(), txt->length());
|
|
|
442 |
} else if (newflt->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
|
|
|
443 |
string filename;
|
|
|
444 |
if (dataToTempFile(*txt, mimetype, filename)) {
|
|
|
445 |
if (!(setres = newflt->set_document_file(filename))) {
|
|
|
446 |
m_tmpflgs[m_handlers.size()-1] = false;
|
|
|
447 |
m_tempfiles.pop_back();
|
|
|
448 |
}
|
|
|
449 |
}
|
|
|
450 |
}
|
|
|
451 |
if (!setres) {
|
|
|
452 |
LOGINFO(("FileInterner::addHandler: set_doc failed inside %s "
|
|
|
453 |
" for mtype %s\n", m_fn.c_str(), mimetype.c_str()));
|
|
|
454 |
delete newflt;
|
|
|
455 |
if (m_forPreview)
|
|
|
456 |
return ADD_ERROR;
|
|
|
457 |
return ADD_CONTINUE;
|
|
|
458 |
}
|
|
|
459 |
// add handler and go on, maybe this one will give us text...
|
|
|
460 |
m_handlers.push_back(newflt);
|
|
|
461 |
LOGDEB1(("FileInterner::addHandler: added\n"));
|
|
|
462 |
return ADD_OK;
|
|
|
463 |
}
|
|
|
464 |
|
|
|
465 |
// Information and debug after a next_document error
|
|
|
466 |
void FileInterner::processNextDocError()
|
|
|
467 |
{
|
|
|
468 |
Rcl::Doc doc; string ipath;
|
|
|
469 |
collectIpathAndMT(doc, ipath);
|
|
|
470 |
m_reason = m_handlers.back()->get_error();
|
|
|
471 |
checkExternalMissing(m_reason);
|
|
|
472 |
LOGERR(("FileInterner::internfile: next_document error "
|
|
|
473 |
"[%s%s%s] %s\n", m_fn.c_str(), ipath.empty() ? "" : "|",
|
|
|
474 |
ipath.c_str(), m_reason.c_str()));
|
|
|
475 |
}
|
|
|
476 |
|
373 |
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
477 |
FileInterner::Status FileInterner::internfile(Rcl::Doc& doc, string& ipath)
|
374 |
{
|
478 |
{
|
375 |
LOGDEB(("FileInterner::internfile. ipath [%s]\n", ipath.c_str()));
|
479 |
LOGDEB(("FileInterner::internfile. ipath [%s]\n", ipath.c_str()));
|
376 |
if (m_handlers.size() < 1) {
|
480 |
if (m_handlers.size() < 1) {
|
377 |
// Just means the constructor failed
|
481 |
// Just means the constructor failed
|
378 |
LOGERR(("FileInterner::internfile: constructor failed\n"));
|
482 |
LOGERR(("FileInterner::internfile: constructor failed\n"));
|
379 |
return FIError;
|
483 |
return FIError;
|
380 |
}
|
484 |
}
|
381 |
|
485 |
|
382 |
// Ipath vector.
|
486 |
// Input Ipath vector when retrieving a given subdoc for previewing
|
383 |
// Note that the vector is big enough for the maximum stack. All values
|
487 |
// Note that the vector is big enough for the maximum stack. All values
|
384 |
// over the last significant one are ""
|
488 |
// over the last significant one are ""
|
385 |
// We set the ipath for the first handler here, others are set
|
489 |
// We set the ipath for the first handler here, others are set
|
386 |
// when they're pushed on the stack
|
490 |
// when they're pushed on the stack
|
387 |
vector<string> vipath(MAXHANDLERS);
|
491 |
vector<string> vipath(MAXHANDLERS);
|
|
... |
|
... |
394 |
LOGERR(("FileInterner::internfile: can't skip\n"));
|
498 |
LOGERR(("FileInterner::internfile: can't skip\n"));
|
395 |
return FIError;
|
499 |
return FIError;
|
396 |
}
|
500 |
}
|
397 |
}
|
501 |
}
|
398 |
|
502 |
|
399 |
/* Try to get doc from the topmost filter */
|
503 |
// Try to get doc from the topmost handler
|
400 |
// Security counter: we try not to loop but ...
|
504 |
// Security counter: looping happens when we stack one other
|
|
|
505 |
// handler or when walking the file document tree without finding
|
|
|
506 |
// something to index (typical exemple: email with multiple image
|
|
|
507 |
// attachments and no image filter installed). So we need to be
|
|
|
508 |
// quite generous here, especially because there is another
|
|
|
509 |
// security in the form of a maximum handler stack size.
|
401 |
int loop = 0;
|
510 |
int loop = 0;
|
402 |
while (!m_handlers.empty()) {
|
511 |
while (!m_handlers.empty()) {
|
403 |
if (loop++ > 30) {
|
512 |
if (loop++ > 1000) {
|
404 |
LOGERR(("FileInterner:: looping!\n"));
|
513 |
LOGERR(("FileInterner:: looping!\n"));
|
405 |
return FIError;
|
514 |
return FIError;
|
406 |
}
|
515 |
}
|
|
|
516 |
// If there are no more docs at the current top level we pop and
|
|
|
517 |
// see if there is something at the previous one
|
407 |
if (!m_handlers.back()->has_documents()) {
|
518 |
if (!m_handlers.back()->has_documents()) {
|
408 |
// No docs at the current top level. Pop and see if there
|
|
|
409 |
// is something at the previous one
|
|
|
410 |
popHandler();
|
519 |
popHandler();
|
411 |
continue;
|
520 |
continue;
|
412 |
}
|
521 |
}
|
413 |
|
522 |
|
414 |
// Don't stop on next_document() error. There might be ie an
|
523 |
// While indexing, don't stop on next_document() error. There
|
415 |
// error while decoding an attachment, but we still want to
|
524 |
// might be ie an error while decoding an attachment, but we
|
416 |
// process the rest of the mbox!
|
525 |
// still want to process the rest of the mbox! For preview: fatal.
|
417 |
if (!m_handlers.back()->next_document()) {
|
526 |
if (!m_handlers.back()->next_document()) {
|
418 |
Rcl::Doc doc; string ipath;
|
527 |
processNextDocError(); // Debug etc.
|
419 |
collectIpathAndMT(doc, ipath);
|
|
|
420 |
m_reason = m_handlers.back()->get_error();
|
|
|
421 |
maybeExternalMissing(m_reason);
|
|
|
422 |
LOGERR(("FileInterner::internfile: next_document error [%s%s%s] %s\n",
|
|
|
423 |
m_fn.c_str(), ipath.empty()?"":"|", ipath.c_str(),
|
|
|
424 |
m_reason.c_str()));
|
|
|
425 |
// If fetching a specific document, this is fatal
|
|
|
426 |
if (m_forPreview) {
|
528 |
if (m_forPreview)
|
427 |
return FIError;
|
529 |
return FIError;
|
428 |
}
|
|
|
429 |
popHandler();
|
530 |
popHandler();
|
430 |
continue;
|
531 |
continue;
|
431 |
}
|
532 |
}
|
432 |
|
533 |
|
433 |
// Look at what we've got
|
534 |
// Look at the type for the next document and possibly add
|
434 |
const std::map<std::string, std::string>& docdata =
|
535 |
// handler to stack.
|
435 |
m_handlers.back()->get_meta_data();
|
536 |
switch (addHandler()) {
|
436 |
string charset, mimetype;
|
537 |
case ADD_OK: // Just go through: handler has been stacked, use it
|
437 |
getKeyValue(docdata, keycs, charset);
|
|
|
438 |
getKeyValue(docdata, keymt, mimetype);
|
|
|
439 |
|
|
|
440 |
LOGDEB(("FileInterner::internfile: next_doc is %s\n",
|
|
|
441 |
mimetype.c_str()));
|
|
|
442 |
// If we find a text/plain doc, we're done
|
|
|
443 |
if (!stringicmp(mimetype, m_targetMType))
|
|
|
444 |
break;
|
538 |
break;
|
445 |
|
539 |
case ADD_CONTINUE:
|
446 |
// Got a non text/plain doc. We need to stack another
|
540 |
// forget this doc and retrieve next from current handler
|
447 |
// filter. Check current size
|
541 |
// (ipath stays same)
|
448 |
if (m_handlers.size() > MAXHANDLERS) {
|
|
|
449 |
// Stack too big. Skip this and go on to check if there is
|
|
|
450 |
// something else in the current back()
|
|
|
451 |
LOGINFO(("FileInterner::internfile: stack too high\n"));
|
|
|
452 |
continue;
|
542 |
continue;
|
453 |
}
|
543 |
case ADD_BREAK:
|
454 |
|
544 |
// Stop looping: doc type ok, need complete its processing
|
455 |
Dijon::Filter *again = getMimeHandler(mimetype, m_cfg);
|
545 |
// and return it
|
456 |
if (!again) {
|
546 |
goto breakloop; // when you have to you have to
|
457 |
// If we can't find a filter, this doc can't be handled
|
547 |
case ADD_ERROR: return FIError;
|
458 |
// but there can be other ones so we go on
|
|
|
459 |
LOGINFO(("FileInterner::internfile: no filter for [%s]\n",
|
|
|
460 |
mimetype.c_str()));
|
|
|
461 |
continue;
|
|
|
462 |
}
|
|
|
463 |
again->set_property(Dijon::Filter::OPERATING_MODE,
|
|
|
464 |
m_forPreview ? "view" : "index");
|
|
|
465 |
again->set_property(Dijon::Filter::DEFAULT_CHARSET,
|
|
|
466 |
charset);
|
|
|
467 |
string ns;
|
|
|
468 |
const string *txt = &ns;
|
|
|
469 |
map<string,string>::const_iterator it;
|
|
|
470 |
it = docdata.find("content");
|
|
|
471 |
if (it != docdata.end())
|
|
|
472 |
txt = &it->second;
|
|
|
473 |
|
|
|
474 |
bool setres = false;
|
|
|
475 |
if (again->is_data_input_ok(Dijon::Filter::DOCUMENT_STRING)) {
|
|
|
476 |
setres = again->set_document_string(*txt);
|
|
|
477 |
} else if (again->is_data_input_ok(Dijon::Filter::DOCUMENT_DATA)) {
|
|
|
478 |
setres = again->set_document_data(txt->c_str(), txt->length());
|
|
|
479 |
}else if(again->is_data_input_ok(Dijon::Filter::DOCUMENT_FILE_NAME)) {
|
|
|
480 |
string filename;
|
|
|
481 |
if (dataToTempFile(*txt, mimetype, filename)) {
|
|
|
482 |
if (!(setres = again->set_document_file(filename))) {
|
|
|
483 |
m_tmpflgs[m_handlers.size()-1] = false;
|
|
|
484 |
m_tempfiles.pop_back();
|
|
|
485 |
}
|
548 |
}
|
486 |
}
|
549 |
|
487 |
}
|
|
|
488 |
if (!setres) {
|
|
|
489 |
LOGINFO(("FileInterner::internfile: set_doc failed inside %s\n",
|
|
|
490 |
m_fn.c_str()));
|
|
|
491 |
delete again;
|
|
|
492 |
if (m_forPreview)
|
|
|
493 |
return FIError;
|
|
|
494 |
continue;
|
|
|
495 |
}
|
|
|
496 |
// add filter and go on, maybe this one will give us text...
|
|
|
497 |
m_handlers.push_back(again);
|
|
|
498 |
if (!ipath.empty() &&
|
550 |
if (!ipath.empty() &&
|
499 |
!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
|
551 |
!m_handlers.back()->skip_to_document(vipath[m_handlers.size()-1])){
|
500 |
LOGERR(("FileInterner::internfile: can't skip\n"));
|
552 |
LOGERR(("FileInterner::internfile: can't skip\n"));
|
501 |
return FIError;
|
553 |
return FIError;
|
502 |
}
|
554 |
}
|
503 |
}
|
555 |
}
|
|
|
556 |
breakloop:
|
504 |
|
557 |
|
505 |
if (m_handlers.empty()) {
|
558 |
if (m_handlers.empty()) {
|
506 |
LOGERR(("FileInterner::internfile: conversion ended with no doc\n"));
|
559 |
LOGERR(("FileInterner::internfile: conversion ended with no doc\n"));
|
507 |
return FIError;
|
560 |
return FIError;
|
508 |
}
|
561 |
}
|
509 |
|
562 |
|
510 |
// If indexing compute ipath and significant mimetype Note that
|
563 |
// If indexing compute ipath and significant mimetype.
|
511 |
// ipath is returned through the parameter not doc.ipath We also
|
564 |
// ipath is returned through the parameter not doc.ipath We also
|
512 |
// retrieve some metadata fields from the ancesters (like date or
|
565 |
// retrieve some metadata fields from the ancesters (like date or
|
513 |
// author). This is useful for email attachments. The values will
|
566 |
// author). This is useful for email attachments. The values will
|
514 |
// be replaced by those found by dijontorcl if any, so the order
|
567 |
// be replaced by those found by dijontorcl if any, so the order
|
515 |
// of calls is important.
|
568 |
// of calls is important.
|
516 |
if (!m_forPreview)
|
569 |
if (!m_forPreview)
|
517 |
collectIpathAndMT(doc, ipath);
|
570 |
collectIpathAndMT(doc, ipath);
|
518 |
// Keep this AFTER collectIpathAndMT
|
571 |
// Keep this AFTER collectIpathAndMT
|
519 |
dijontorcl(doc);
|
572 |
dijontorcl(doc);
|
520 |
|
573 |
|
521 |
// Destack what can be
|
574 |
// Possibly destack so that we can test for FIDone.
|
522 |
while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
|
575 |
while (!m_handlers.empty() && !m_handlers.back()->has_documents()) {
|
523 |
popHandler();
|
576 |
popHandler();
|
524 |
}
|
577 |
}
|
525 |
if (m_handlers.empty())
|
578 |
if (m_handlers.empty())
|
526 |
return FIDone;
|
579 |
return FIDone;
|