Switch to side-by-side view

--- a/src/internfile/mh_execm.cpp
+++ b/src/internfile/mh_execm.cpp
@@ -28,6 +28,9 @@
 #include "smallut.h"
 #include "transcode.h"
 #include "md5.h"
+#include "rclconfig.h"
+#include "mimetype.h"
+#include "idfile.h"
 
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -39,6 +42,13 @@
 bool MimeHandlerExecMultiple::startCmd()
 {
     LOGDEB(("MimeHandlerExecMultiple::startCmd\n"));
+    if (params.empty()) {
+	// Hu ho
+	LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
+	m_reason = "RECFILTERROR BADCONFIG";
+	return false;
+    }
+
     // Command name
     string cmd = params.front();
     
@@ -56,20 +66,31 @@
     return true;
 }
 
-bool MimeHandlerExecMultiple::readDataElement(string& name)
+// Note: data is not used if this is the "document:" field: it goes
+// directly to m_metaData["content"] to avoid an extra copy
+// 
+// Messages are made of data elements. Each element is like:
+// name: len\ndata
+// An empty line signals the end of the message, so the whole thing
+// would look like:
+// Name1: Len1\nData1Name2: Len2\nData2\n
+bool MimeHandlerExecMultiple::readDataElement(string& name, string &data)
 {
     string ibuf;
+
+    // Read name and length
     if (m_cmd.getline(ibuf) <= 0) {
         LOGERR(("MHExecMultiple: getline error\n"));
         return false;
     }
+    // Empty line (end of message) ?
     if (!ibuf.compare("\n")) {
         LOGDEB(("MHExecMultiple: Got empty line\n"));
         name = "";
         return true;
     }
 
-    // We're expecting something like paramname: len\n
+    // We're expecting something like Name: len\n
     list<string> tokens;
     stringToTokens(ibuf, tokens);
     if (tokens.size() != 2) {
@@ -86,19 +107,21 @@
                 ibuf.c_str()));
         return false;
     }
-    LOGDEB(("MHExecMultiple: got paramname [%s] len: %d\n", 
-            name.c_str(), len));
-    // We only care about the "data:" field for now
-    string discard;
-    string *datap;
-    if (!stringlowercmp("data:", name)) {
+    LOGDEB1(("MHExecMultiple: got name [%s] len: %d\n", name.c_str(), len));
+
+    // Hack: check for 'Document:' and read directly the document data
+    // to m_metaData["content"] to avoid an extra copy of the bulky
+    // piece
+    string *datap = &data;
+    if (!stringlowercmp("document:", name)) {
         datap = &m_metaData["content"];
     } else {
-        datap = &discard;
-    }
-    // Then the data.
+        datap = &data;
+    }
+
+    // Read element data
     datap->erase();
-    if (m_cmd.receive(*datap, len) != len) {
+    if (len > 0 && m_cmd.receive(*datap, len) != len) {
         LOGERR(("MHExecMultiple: expected %d bytes of data, got %d\n", 
                 len, datap->length()));
         return false;
@@ -106,52 +129,116 @@
     return true;
 }
 
-// Execute an external program to translate a file from its native
-// format to text or html.
 bool MimeHandlerExecMultiple::next_document()
 {
+    LOGDEB(("MimeHandlerExecMultiple::next_document(): [%s]\n", m_fn.c_str()));
     if (m_havedoc == false)
 	return false;
+
     if (missingHelper) {
 	LOGDEB(("MHExecMultiple::next_document(): helper known missing\n"));
 	return false;
     }
-    if (params.empty()) {
-	// Hu ho
-	LOGERR(("MHExecMultiple::mkDoc: empty params\n"));
-	m_reason = "RECFILTERROR BADCONFIG";
-	return false;
-    }
 
     if (m_cmd.getChildPid() < 0 && !startCmd()) {
         return false;
     }
 
-    // Send request to child process
+    // Send request to child process. This maybe the first/only
+    // request for a given file, or a continuation request. We send an
+    // empty file name in the latter case.
     ostringstream obuf;
-    obuf << "FileName: " << m_fn.length() << endl << m_fn << endl;
+    if (m_filefirst) {
+        obuf << "FileName: " << m_fn.length() << "\n" << m_fn;
+        // m_filefirst is set to true by set_document_file()
+        m_filefirst = false;
+    } else {
+        obuf << "Filename: " << 0 << "\n";
+    }
+    if (m_ipath.length()) {
+        obuf << "Ipath: " << m_ipath.length() << "\n" << m_ipath;
+    }
+    obuf << "\n";
     if (m_cmd.send(obuf.str()) < 0) {
         LOGERR(("MHExecMultiple: send error\n"));
         return false;
     }
 
-    // Read answer
-    LOGDEB(("MHExecMultiple: reading answer\n"));
+    // Read answer (multiple elements)
+    LOGDEB1(("MHExecMultiple: reading answer\n"));
+    bool eof_received = false;
+    string ipath;
+    string mtype;
     for (int loop=0;;loop++) {
-        string name;
-        if (!readDataElement(name)) {
+        string name, data;
+        if (!readDataElement(name, data)) {
             return false;
         }
         if (name.empty())
             break;
+        if (!stringlowercmp("eof:", name)) {
+            LOGDEB(("MHExecMultiple: got EOF\n"));
+            eof_received = true;
+        }
+        if (!stringlowercmp("ipath:", name)) {
+            ipath = data;
+            LOGDEB(("MHExecMultiple: got ipath [%s]\n", data.c_str()));
+        }
+        if (!stringlowercmp("mimetype:", name)) {
+            mtype = data;
+            LOGDEB(("MHExecMultiple: got mimetype [%s]\n", data.c_str()));
+        }
         if (loop == 10) {
             // ?? 
             LOGERR(("MHExecMultiple: filter sent too many parameters\n"));
             return false;
         }
     }
-    
-    finaldetails();
-    m_havedoc = false;
+    // The end of data can be signaled from the filter in two ways:
+    // either by returning an empty document (if the filter just hits
+    // eof while trying to read the doc), or with an "eof:" field
+    // accompanying a normal document (if the filter hit eof at the
+    // end of the current doc, which is the preferred way).
+    if (m_metaData["content"].length() == 0) {
+        LOGDEB(("MHExecMultiple: got empty document\n"));
+        m_havedoc = false;
+        return false;
+    }
+
+    // If this has an ipath, it is an internal doc from a
+    // multi-document file. In this case, either the filter supplies the 
+    // mimetype, or the ipath MUST be a filename-like string which we can use
+    // to compute a mime type
+    if (!ipath.empty()) {
+        m_metaData["ipath"] = ipath;
+        if (mtype.empty()) {
+            mtype = mimetype(ipath, 0, RclConfig::getMainConfig(), false);
+            if (mtype.empty()) {
+                // mimetype() won't call idFile when there is no file. Do it
+                mtype = idFileMem(m_metaData["content"]);
+                if (mtype.empty()) {
+                    LOGERR(("MHExecMultiple: cant guess mime type\n"));
+                    mtype = "application/octet-stream";
+                }
+            }
+        }
+        m_metaData["mimetype"] = mtype;
+        string md5, xmd5;
+        MD5String(m_metaData["content"], md5);
+        m_metaData["md5"] = MD5HexPrint(md5, xmd5);
+    } else {
+        m_metaData.erase("ipath");
+        string md5, xmd5, reason;
+        if (MD5File(m_fn, md5, &reason)) {
+            m_metaData["md5"] = MD5HexPrint(md5, xmd5);
+        } else {
+            LOGERR(("MimeHandlerExecM: cant compute md5 for [%s]: %s\n",
+                    m_fn.c_str(), reason.c_str()));
+        }
+    }
+
+    if (eof_received)
+        m_havedoc = false;
+
     return true;
 }