|
a/src/internfile/internfile.cpp |
|
b/src/internfile/internfile.cpp |
|
... |
|
... |
652 |
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
652 |
const map<string, string>& docdata = m_handlers.back()->get_meta_data();
|
653 |
string charset, mimetype;
|
653 |
string charset, mimetype;
|
654 |
getKeyValue(docdata, cstr_dj_keycharset, charset);
|
654 |
getKeyValue(docdata, cstr_dj_keycharset, charset);
|
655 |
getKeyValue(docdata, cstr_dj_keymt, mimetype);
|
655 |
getKeyValue(docdata, cstr_dj_keymt, mimetype);
|
656 |
|
656 |
|
657 |
LOGDEB("FileInterner::addHandler: next_doc is " << (mimetype) << " target [" << (m_targetMType) << "]\n" );
|
657 |
LOGDEB("FileInterner::addHandler: next_doc is " << mimetype <<
|
|
|
658 |
" target [" << m_targetMType << "]\n");
|
658 |
|
659 |
|
659 |
// If we find a document of the target type (text/plain in
|
660 |
// If we find a document of the target type (text/plain in
|
660 |
// general), we're done decoding. If we hit text/plain, we're done
|
661 |
// general), we're done decoding. If we hit text/plain, we're done
|
661 |
// in any case
|
662 |
// in any case
|
662 |
if (!stringicmp(mimetype, m_targetMType) ||
|
663 |
if (!stringicmp(mimetype, m_targetMType) ||
|
663 |
!stringicmp(mimetype, cstr_textplain)) {
|
664 |
!stringicmp(mimetype, cstr_textplain)) {
|
664 |
m_reachedMType = mimetype;
|
665 |
m_reachedMType = mimetype;
|
665 |
LOGDEB1("FileInterner::addHandler: target reached\n" );
|
666 |
LOGDEB1("FileInterner::addHandler: target reached\n");
|
666 |
return ADD_BREAK;
|
667 |
return ADD_BREAK;
|
667 |
}
|
668 |
}
|
668 |
|
669 |
|
669 |
// We need to stack another handler. Check stack size
|
670 |
// We need to stack another handler. Check stack size
|
670 |
if (m_handlers.size() >= MAXHANDLERS) {
|
671 |
if (m_handlers.size() >= MAXHANDLERS) {
|
671 |
// Stack too big. Skip this and go on to check if there is
|
672 |
// Stack too big. Skip this and go on to check if there is
|
672 |
// something else in the current back()
|
673 |
// something else in the current back()
|
673 |
LOGERR("FileInterner::addHandler: stack too high\n" );
|
674 |
LOGERR("FileInterner::addHandler: stack too high\n");
|
674 |
return ADD_CONTINUE;
|
675 |
return ADD_CONTINUE;
|
675 |
}
|
676 |
}
|
676 |
|
677 |
|
|
|
678 |
// We must not filter out HTML when it is an intermediate
|
|
|
679 |
// conversion format. We discriminate between e.g. an HTML email
|
|
|
680 |
// attachment (needs filtering) and a result of pdf conversion
|
|
|
681 |
// (must process) by looking at the last ipath element: a
|
|
|
682 |
// conversion will have an empty one (same test as in
|
|
|
683 |
// collectIpathAndMT).
|
|
|
684 |
string ipathel;
|
|
|
685 |
getKeyValue(docdata, cstr_dj_keyipath, ipathel);
|
|
|
686 |
bool dofilter = !m_forPreview &&
|
|
|
687 |
(mimetype.compare(cstr_texthtml) || !ipathel.empty());
|
677 |
RecollFilter *newflt = getMimeHandler(mimetype, m_cfg, !m_forPreview);
|
688 |
RecollFilter *newflt = getMimeHandler(mimetype, m_cfg, dofilter);
|
678 |
if (!newflt) {
|
689 |
if (!newflt) {
|
679 |
// If we can't find a handler, this doc can't be handled
|
690 |
// If we can't find a handler, this doc can't be handled
|
680 |
// but there can be other ones so we go on
|
691 |
// but there can be other ones so we go on
|
681 |
LOGINFO("FileInterner::addHandler: no filter for [" << (mimetype) << "]\n" );
|
692 |
LOGINFO("FileInterner::addHandler: no filter for [" << mimetype <<
|
|
|
693 |
"]\n");
|
682 |
return ADD_CONTINUE;
|
694 |
return ADD_CONTINUE;
|
683 |
}
|
695 |
}
|
684 |
newflt->set_property(Dijon::Filter::OPERATING_MODE,
|
696 |
newflt->set_property(Dijon::Filter::OPERATING_MODE,
|
685 |
m_forPreview ? "view" : "index");
|
697 |
m_forPreview ? "view" : "index");
|
686 |
if (!charset.empty())
|
698 |
if (!charset.empty())
|
|
... |
|
... |
715 |
m_imgtmp = m_tempfiles.back();
|
727 |
m_imgtmp = m_tempfiles.back();
|
716 |
}
|
728 |
}
|
717 |
}
|
729 |
}
|
718 |
}
|
730 |
}
|
719 |
if (!setres) {
|
731 |
if (!setres) {
|
720 |
LOGINFO("FileInterner::addHandler: set_doc failed inside " << (m_fn) << " for mtype " << (mimetype) << "\n" );
|
732 |
LOGINFO("FileInterner::addHandler: set_doc failed inside " << m_fn <<
|
|
|
733 |
" for mtype " << mimetype << "\n");
|
721 |
delete newflt;
|
734 |
delete newflt;
|
722 |
if (m_forPreview)
|
735 |
if (m_forPreview)
|
723 |
return ADD_ERROR;
|
736 |
return ADD_ERROR;
|
724 |
return ADD_CONTINUE;
|
737 |
return ADD_CONTINUE;
|
725 |
}
|
738 |
}
|
726 |
// add handler and go on, maybe this one will give us text...
|
739 |
// add handler and go on, maybe this one will give us text...
|
727 |
m_handlers.push_back(newflt);
|
740 |
m_handlers.push_back(newflt);
|
728 |
LOGDEB1("FileInterner::addHandler: added\n" );
|
741 |
LOGDEB1("FileInterner::addHandler: added\n");
|
729 |
return ADD_OK;
|
742 |
return ADD_OK;
|
730 |
}
|
743 |
}
|
731 |
|
744 |
|
732 |
// Information and debug after a next_document error
|
745 |
// Information and debug after a next_document error
|
733 |
void FileInterner::processNextDocError(Rcl::Doc &doc)
|
746 |
void FileInterner::processNextDocError(Rcl::Doc &doc)
|
|
... |
|
... |
1001 |
// get fixed some day: internfile initialisation does not check
|
1014 |
// get fixed some day: internfile initialisation does not check
|
1002 |
// targetmtype, so that at least one conversion is always
|
1015 |
// targetmtype, so that at least one conversion is always
|
1003 |
// performed. A common case would be an "Open" on an html file
|
1016 |
// performed. A common case would be an "Open" on an html file
|
1004 |
// (we'd end up with text/plain content). As the html version is
|
1017 |
// (we'd end up with text/plain content). As the html version is
|
1005 |
// saved in this case, use it.
|
1018 |
// saved in this case, use it.
|
1006 |
if (!stringlowercmp("text/html", mimetype) && !get_html().empty()) {
|
1019 |
if (!stringlowercmp(cstr_texthtml, mimetype) && !get_html().empty()) {
|
1007 |
doc.text = get_html();
|
1020 |
doc.text = get_html();
|
1008 |
doc.mimetype = "text/html";
|
1021 |
doc.mimetype = cstr_texthtml;
|
1009 |
}
|
1022 |
}
|
1010 |
|
1023 |
|
1011 |
const char *filename;
|
1024 |
const char *filename;
|
1012 |
TempFile temp;
|
1025 |
TempFile temp;
|
1013 |
if (tofile.empty()) {
|
1026 |
if (tofile.empty()) {
|