--- a/src/utils/readfile.cpp
+++ b/src/utils/readfile.cpp
@@ -14,7 +14,6 @@
  *   Free Software Foundation, Inc.,
  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
-#ifndef TEST_READFILE
 #ifdef BUILDING_RECOLL
 #include "autoconfig.h"
 #else
@@ -37,20 +36,33 @@
 
 #include "readfile.h"
 #include "smallut.h"
-
-using std::string;
-
+#include "md5.h"
+
+#ifdef MDU_INCLUDE_LOG
+#include MDU_INCLUDE_LOG
+#else
+#include "log.h"
+#endif
+
+using namespace std;
+
+///////////////
+// Implementation of basic interface: read whole file to memory buffer
 class FileToString : public FileScanDo {
 public:
     FileToString(string& data) : m_data(data) {}
-    string& m_data;
-    bool init(size_t size, string *reason) {
+
+    // Note: the fstat() + reserve() (in init()) calls divide cpu
+    // usage almost by 2 on both linux i586 and macosx (compared to
+    // just append()) Also tried a version with mmap, but it's
+    // actually slower on the mac and not faster on linux.
+    virtual bool init(int64_t size, string *reason) {
         if (size > 0) {
             m_data.reserve(size);
         }
         return true;
     }
-    bool data(const char *buf, int cnt, string *reason) {
+    virtual bool data(const char *buf, int cnt, string *reason) {
         try {
             m_data.append(buf, cnt);
         } catch (...) {
@@ -59,248 +71,449 @@
         }
         return true;
     }
-};
+
+    string& m_data;
+};
+
+bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
+                    string *reason)
+{
+    FileToString accum(data);
+    return file_scan(fn, &accum, offs, cnt, reason, nullptr);
+}
 
 bool file_to_string(const string& fn, string& data, string *reason)
 {
     return file_to_string(fn, data, 0, size_t(-1), reason);
 }
-bool file_to_string(const string& fn, string& data, int64_t offs, size_t cnt,
-                    string *reason)
+
+
+/////////////
+//  Callback/filtering interface
+
+// Abstract class base for both source (origin) and filter
+// (midstream). Both have a downstream
+class FileScanUpstream {
+public:
+    virtual void setDownstream(FileScanDo *down) {
+        m_down = down;
+    }
+    virtual FileScanDo *out() {
+        return m_down;
+    }
+protected:        
+    FileScanDo *m_down{nullptr};
+};
+
+// Source element.
+class FileScanSource : public FileScanUpstream {
+public:
+    FileScanSource(FileScanDo *down) {
+        setDownstream(down);
+    }
+    virtual bool scan() = 0;
+};
+
+// Inside element of a transformation pipe. The idea is that elements
+// which don't recognize the data get themselves out of the pipe
+// (pop()). Typically, only one of the decompression modules
+// (e.g. gzip/bzip2/xz...) would remain. For now there is only gzip,
+// it pops itself if the data does not have the right magic number
+class FileScanFilter : public FileScanDo, public FileScanUpstream {
+public:
+    virtual void insertAtSink(FileScanDo *sink, FileScanUpstream *upstream) {
+        setDownstream(sink);
+        if (m_down) {
+            m_down->setUpstream(this);
+        }
+        setUpstream(upstream);
+        if (m_up) {
+            m_up->setDownstream(this);
+        }
+    }
+
+    // Remove myself from the pipe. 
+    virtual void pop() {
+        if (m_down) {
+            m_down->setUpstream(m_up);
+        }
+        if (m_up) {
+            m_up->setDownstream(m_down);
+        }
+    }
+
+    virtual void setUpstream(FileScanUpstream *up) override {
+        m_up = up;
+    }
+
+private:
+    FileScanUpstream *m_up{nullptr};
+};
+
+
+#if defined(READFILE_ENABLE_ZLIB)
+#include <zlib.h>
+
+class GzFilter : public FileScanFilter {
+public:
+    virtual ~GzFilter() {
+        if (m_initdone) {
+            inflateEnd(&m_stream);
+        }
+    }
+
+    virtual bool init(int64_t size, string *reason) override {
+        LOGDEB1("GzFilter::init\n");
+        if (out()) {
+            return out()->init(size, reason);
+        }
+        return true;
+    }
+
+    virtual bool data(const char *buf, int cnt, string *reason) override {
+        LOGDEB1("GzFilter::data: cnt " << cnt << endl);
+
+        int error;
+        m_stream.next_in = (Bytef*)buf;
+        m_stream.avail_in = cnt;
+        
+        if (m_initdone == false) {
+            m_initdone = true;
+            // We do not support a first read cnt < 2. We probably should.
+            if (cnt < 2) {
+                if (reason)
+                    *reason += "GzFilter: first data count < 2";
+                return false;
+            }
+            const unsigned char *ubuf = (const unsigned char *)buf;
+            if (ubuf[0] != 0x1f || ubuf[1] != 0x8b) {
+                LOGDEB1("GzFilter::data: not gzip. out() is " << out() << "\n");
+                pop();
+                if (out()) {
+                    return out()->data(buf, cnt, reason);
+                } else {
+                    return false;
+                }
+            }
+            m_stream.opaque = nullptr;
+            m_stream.zalloc = alloc_func;
+            m_stream.zfree = free_func;
+            m_stream.next_out = (Bytef*)m_obuf;
+            m_stream.avail_out = m_obs;
+            if ((error = inflateInit2(&m_stream, 15+32)) != Z_OK) {
+                LOGERR("inflateInit2 error: " << error << endl);
+                if (reason) {
+                    *reason += " Zlib inflateinit failed";
+                    if (m_stream.msg && *m_stream.msg) {
+                        *reason += string(": ") + m_stream.msg;
+                    }
+                }
+                return false;
+            }
+        }
+        
+        while (m_stream.avail_in != 0) {
+            m_stream.next_out = (Bytef*)m_obuf;
+            m_stream.avail_out = m_obs;
+            if ((error = inflate(&m_stream, Z_SYNC_FLUSH)) < Z_OK) {
+                LOGERR("inflate error: " << error << endl);
+                if (reason) {
+                    *reason += " Zlib inflate failed";
+                    if (m_stream.msg && *m_stream.msg) {
+                        *reason += string(": ") + m_stream.msg;
+                    }
+                }
+                return false;
+            }
+            if (out() &&
+                !out()->data(m_obuf, m_obs - m_stream.avail_out, reason)) {
+                return false;
+            }
+        }
+        return true;
+    }
+    
+    static voidpf alloc_func(voidpf opaque, uInt items, uInt size) {
+        return malloc(items * size);
+    }
+    static void free_func(voidpf opaque, voidpf address) {
+        free(address);
+    }
+
+    bool m_initdone{false};
+    z_stream m_stream;
+    char m_obuf[10000];
+    const int m_obs{10000};
+};
+#endif // GZ
+
+class FileScanMd5 : public FileScanFilter {
+public:
+    FileScanMd5(string& d) : digest(d) {}
+    virtual bool init(int64_t size, string *reason) override {
+        LOGDEB1("FileScanMd5: init\n");
+	MD5Init(&ctx);
+        if (out()) {
+            return out()->init(size, reason);
+        }
+	return true;
+    }
+    virtual bool data(const char *buf, int cnt, string *reason) override {
+        LOGDEB1("FileScanMd5: data. cnt " << cnt << endl);
+	MD5Update(&ctx, (const unsigned char*)buf, cnt);
+        if (out() && !out()->data(buf, cnt, reason)) {
+            return false;
+        }
+	return true;
+    }
+    bool finish() {
+        LOGDEB1("FileScanMd5: finish\n");
+        MD5Final(digest, &ctx);
+        return true;
+    }
+    string &digest;
+    MD5_CTX ctx;
+};
+
+
+// Source taking data from a regular file
+class FileScanSourceFile : public FileScanSource {
+public:
+    FileScanSourceFile(FileScanDo *next, const string& fn, int64_t startoffs,
+                       int64_t cnttoread, string *reason)
+        : FileScanSource(next), m_fn(fn), m_startoffs(startoffs),
+          m_cnttoread(cnttoread), m_reason(reason) { }
+
+    virtual bool scan() {
+        LOGDEB1("FileScanSourceFile: reading " << m_fn << " offs " <<
+               m_startoffs<< " cnt " << m_cnttoread << " out " << out() << endl);
+        const int RDBUFSZ = 8192;
+        bool ret = false;
+        bool noclosing = true;
+        int fd = 0;
+        struct stat st;
+        // Initialize st_size: if fn.empty() , the fstat() call won't happen.
+        st.st_size = 0;
+
+        // If we have a file name, open it, else use stdin.
+        if (!m_fn.empty()) {
+            fd = open(m_fn.c_str(), O_RDONLY | O_BINARY);
+            if (fd < 0 || fstat(fd, &st) < 0) {
+                catstrerror(m_reason, "open/stat", errno);
+                return false;
+            }
+            noclosing = false;
+        }
+
+#if defined O_NOATIME && O_NOATIME != 0
+        if (fcntl(fd, F_SETFL, O_NOATIME) < 0) {
+            // perror("fcntl");
+        }
+#endif
+        if (out()) {
+            if (m_cnttoread != -1 && m_cnttoread) {
+                out()->init(m_cnttoread + 1, m_reason);
+            } else if (st.st_size > 0) {
+                out()->init(st.st_size + 1, m_reason);
+            } else {
+                out()->init(0, m_reason);
+            }
+        }
+
+        int64_t curoffs = 0;
+        if (m_startoffs > 0 && !m_fn.empty()) {
+            if (lseek(fd, m_startoffs, SEEK_SET) != m_startoffs) {
+                catstrerror(m_reason, "lseek", errno);
+                return false;
+            }
+            curoffs = m_startoffs;
+        }
+
+        char buf[RDBUFSZ];
+        int64_t totread = 0;
+        for (;;) {
+            size_t toread = RDBUFSZ;
+            if (m_startoffs > 0 && curoffs < m_startoffs) {
+                toread = size_t(MIN(RDBUFSZ, m_startoffs - curoffs));
+            }
+
+            if (m_cnttoread != -1) {
+                toread = MIN(toread, (uint64_t)(m_cnttoread - totread));
+            }
+            ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
+            if (n < 0) {
+                catstrerror(m_reason, "read", errno);
+                goto out;
+            }
+            if (n == 0) {
+                break;
+            }
+            curoffs += n;
+            if (curoffs - n < m_startoffs) {
+                continue;
+            }
+            if (!out()->data(buf, n, m_reason)) {
+                goto out;
+            }
+            totread += n;
+            if (m_cnttoread > 0 && totread >= m_cnttoread) {
+                break;
+            }
+        }
+
+        ret = true;
+    out:
+        if (fd >= 0 && !noclosing) {
+            close(fd);
+        }
+        return ret;
+    }
+    
+protected:
+    string m_fn;
+    int64_t m_startoffs;
+    int64_t m_cnttoread;
+    string *m_reason;
+};
+
+
+#if defined(READFILE_ENABLE_MINIZ)
+#include "miniz.h"
+
+// Source taking data from a ZIP archive member
+class FileScanSourceZip : public FileScanSource {
+public:
+    FileScanSourceZip(FileScanDo *next, const string& fn, const string& member,
+                       string *reason)
+        : FileScanSource(next), m_fn(fn), m_member(member),
+          m_reason(reason) { }
+
+    virtual bool scan() {
+        bool ret = false;
+        mz_zip_archive zip;
+        mz_zip_zero_struct(&zip);
+        void *opaque = this;
+
+        if (!mz_zip_reader_init_file(&zip, m_fn.c_str(), 0)) {
+            if (m_reason) {
+                *m_reason += "mz_zip_reader_init_file() failed: ";
+                *m_reason += string(mz_zip_get_error_string(zip.m_last_error));
+            }
+            return false;
+        }
+        mz_uint32 file_index;
+        if (mz_zip_reader_locate_file_v2(&zip, m_member.c_str(), NULL, 0,
+                                         &file_index) < 0) {
+            if (m_reason) {
+                *m_reason += "mz_zip_reader_locate_file() failed: ";
+                *m_reason += string(mz_zip_get_error_string(zip.m_last_error));
+            }
+            goto out;
+        }
+
+        mz_zip_archive_file_stat zstat;
+        if (!mz_zip_reader_file_stat(&zip, file_index, &zstat)) {
+            if (m_reason) {
+                *m_reason += "mz_zip_reader_file_stat() failed: ";
+                *m_reason += string(mz_zip_get_error_string(zip.m_last_error));
+            }
+            goto out;
+        }
+        if (out()) {
+            if (!out()->init(zstat.m_uncomp_size, m_reason)) {
+                goto out;
+            }
+        }
+                
+        if (!mz_zip_reader_extract_to_callback(
+                &zip, file_index, write_cb, opaque, 0)) {
+            if (m_reason) {
+                *m_reason += "mz_zip_reader_extract_to_callback() failed: ";
+                *m_reason += string(mz_zip_get_error_string(zip.m_last_error));
+            }
+            goto out;
+        }
+        
+        ret = true;
+    out:
+        mz_zip_reader_end(&zip);
+        return ret;
+    }
+
+    static size_t write_cb(void *pOpaque, mz_uint64 file_ofs,
+                           const void *pBuf, size_t n) {
+        const char *cp = (const char*)pBuf;
+        LOGDEB1("write_cb: ofs " << file_ofs << " cnt " << n << " data: " <<
+                string(cp, n) << endl);
+        FileScanSourceZip *ths = (FileScanSourceZip *)pOpaque;
+        if (ths->out()) {
+            if (!ths->out()->data(cp, n, ths->m_reason)) {
+                return (size_t)-1;
+            }
+        }
+        return n;
+    }
+    
+protected:
+    string m_fn;
+    string m_member;
+    string *m_reason;
+};
+
+bool file_scan(const std::string& filename, const std::string& membername,
+               FileScanDo* doer, std::string *reason)
 {
-    FileToString accum(data);
-    return file_scan(fn, &accum, offs, cnt, reason);
+    if (membername.empty()) {
+        return file_scan(filename, doer, 0, -1, reason, nullptr);
+    } else {
+            FileScanSourceZip source(doer, filename, membername, reason);
+            return source.scan();
+    }
+}
+
+#endif // READFILE_ENABLE_ZIP
+
+bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
+               int64_t cnttoread, string *reason, string *md5p)
+{
+    LOGDEB("file_scan: doer " << doer << endl);
+#if defined(READFILE_ENABLE_ZLIB)
+    bool nodecomp = startoffs != 0;
+#endif
+    if (startoffs < 0) {
+        startoffs = 0;
+    }
+    
+    FileScanSourceFile source(doer, fn, startoffs, cnttoread, reason);
+    FileScanUpstream *up = &source;
+
+    // We compute the MD5 on the uncompressed data, so insert this
+    // right at the source.
+    string digest;
+    FileScanMd5 md5filter(digest);
+    if (md5p) {
+        md5filter.insertAtSink(doer, up);
+        up = &md5filter;
+    }
+    
+#if defined(READFILE_ENABLE_ZLIB)
+    GzFilter gzfilter;
+    if (!nodecomp) {
+        gzfilter.insertAtSink(doer, up);
+        up = &gzfilter;
+    }
+#endif
+
+    bool ret = source.scan();
+
+    if (md5p) {
+        md5filter.finish();
+        MD5HexPrint(digest, *md5p);
+    }
+    return ret;
 }
 
 bool file_scan(const string& fn, FileScanDo* doer, string *reason)
 {
-    return file_scan(fn, doer, 0, size_t(-1), reason);
+    return file_scan(fn, doer, 0, -1, reason, nullptr);
 }
-
-const int RDBUFSZ = 8192;
-// Note: the fstat() + reserve() (in init()) calls divide cpu usage almost by 2
-// on both linux i586 and macosx (compared to just append())
-// Also tried a version with mmap, but it's actually slower on the mac and not
-// faster on linux.
-bool file_scan(const string& fn, FileScanDo* doer, int64_t startoffs,
-               size_t cnttoread, string *reason)
-{
-    if (startoffs < 0) {
-        *reason += " file_scan: negative startoffs not allowed";
-        return false;
-    }
-
-    bool ret = false;
-    bool noclosing = true;
-    int fd = 0;
-    struct stat st;
-    // Initialize st_size: if fn.empty() , the fstat() call won't happen.
-    st.st_size = 0;
-
-    // If we have a file name, open it, else use stdin.
-    if (!fn.empty()) {
-        fd = open(fn.c_str(), O_RDONLY | O_BINARY);
-        if (fd < 0 || fstat(fd, &st) < 0) {
-            catstrerror(reason, "open/stat", errno);
-            return false;
-        }
-        noclosing = false;
-    }
-
-#if defined O_NOATIME && O_NOATIME != 0
-    if (fcntl(fd, F_SETFL, O_NOATIME) < 0) {
-        // perror("fcntl");
-    }
-#endif
-
-    if (cnttoread != (size_t) - 1 && cnttoread) {
-        doer->init(cnttoread + 1, reason);
-    } else if (st.st_size > 0) {
-        doer->init(size_t(st.st_size + 1), reason);
-    } else {
-        doer->init(0, reason);
-    }
-
-    int64_t curoffs = 0;
-    if (startoffs > 0 && !fn.empty()) {
-        if (lseek(fd, startoffs, SEEK_SET) != startoffs) {
-            catstrerror(reason, "lseek", errno);
-            return false;
-        }
-        curoffs = startoffs;
-    }
-
-    char buf[RDBUFSZ];
-    size_t totread = 0;
-    for (;;) {
-        size_t toread = RDBUFSZ;
-        if (startoffs > 0 && curoffs < startoffs) {
-            toread = size_t(MIN(RDBUFSZ, startoffs - curoffs));
-        }
-
-        if (cnttoread != size_t(-1)) {
-            toread = MIN(toread, cnttoread - totread);
-        }
-        ssize_t n = static_cast<ssize_t>(read(fd, buf, toread));
-        if (n < 0) {
-            catstrerror(reason, "read", errno);
-            goto out;
-        }
-        if (n == 0) {
-            break;
-        }
-
-        curoffs += n;
-        if (curoffs - n < startoffs) {
-            continue;
-        }
-
-        if (!doer->data(buf, n, reason)) {
-            goto out;
-        }
-        totread += n;
-        if (cnttoread > 0 && totread >= cnttoread) {
-            break;
-        }
-    }
-
-    ret = true;
-out:
-    if (fd >= 0 && !noclosing) {
-        close(fd);
-    }
-    return ret;
-}
-
-#else // Test
-#include "autoconfig.h"
-
-#include <stdio.h>
-#include <sys/types.h>
-#include "safesysstat.h"
-#include <stdlib.h>
-
-#include <string>
-#include <iostream>
-using namespace std;
-
-#include "readfile.h"
-#include "fstreewalk.h"
-
-using namespace std;
-
-class myCB : public FsTreeWalkerCB {
-public:
-    FsTreeWalker::Status processone(const string& path,
-                                    const struct stat *st,
-                                    FsTreeWalker::CbFlag flg) {
-        if (flg == FsTreeWalker::FtwDirEnter) {
-            //cout << "[Entering " << path << "]" << endl;
-        } else if (flg == FsTreeWalker::FtwDirReturn) {
-            //cout << "[Returning to " << path << "]" << endl;
-        } else if (flg == FsTreeWalker::FtwRegular) {
-            //cout << path << endl;
-            string s, reason;
-            if (!file_to_string(path, s, &reason)) {
-                cerr << "Failed: " << reason << " : " << path << endl;
-            } else {
-                //cout <<
-                //"================================================" << endl;
-                cout << path << endl;
-                //      cout << s;
-            }
-            reason.clear();
-        }
-        return FsTreeWalker::FtwOk;
-    }
-};
-
-static int     op_flags;
-#define OPT_MOINS 0x1
-#define OPT_c     0x2
-#define OPT_o     0x4
-
-static const char *thisprog;
-static char usage [] =
-    "trreadfile [-o offs] [-c cnt] topdirorfile\n\n"
-    ;
-static void
-Usage(void)
-{
-    fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
-    exit(1);
-}
-
-int main(int argc, const char **argv)
-{
-    int64_t offs = 0;
-    size_t cnt = size_t(-1);
-    thisprog = argv[0];
-    argc--;
-    argv++;
-
-    while (argc > 0 && **argv == '-') {
-        (*argv)++;
-        if (!(**argv))
-            /* Cas du "adb - core" */
-        {
-            Usage();
-        }
-        while (**argv)
-            switch (*(*argv)++) {
-            case 'c':
-                op_flags |= OPT_c;
-                if (argc < 2) {
-                    Usage();
-                }
-                cnt = atoll(*(++argv));
-                argc--;
-                goto b1;
-            case 'o':
-                op_flags |= OPT_o;
-                if (argc < 2) {
-                    Usage();
-                }
-                offs = strtoull(*(++argv), 0, 0);
-                argc--;
-                goto b1;
-            default:
-                Usage();
-                break;
-            }
-b1:
-        argc--;
-        argv++;
-    }
-
-    if (argc != 1) {
-        Usage();
-    }
-    string top = *argv++;
-    argc--;
-    cerr << "filename " << top << " offs " << offs << " cnt " << cnt << endl;
-
-    struct stat st;
-    if (!top.empty() && stat(top.c_str(), &st) < 0) {
-        perror("stat");
-        exit(1);
-    }
-    if (!top.empty() && S_ISDIR(st.st_mode)) {
-        FsTreeWalker walker;
-        myCB cb;
-        walker.walk(top, cb);
-        if (walker.getErrCnt() > 0) {
-            cout << walker.getReason();
-        }
-    } else {
-        string s, reason;
-        if (!file_to_string(top, s, offs, cnt, &reason)) {
-            cerr << reason << endl;
-            exit(1);
-        } else {
-            cout << s;
-        }
-    }
-    exit(0);
-}
-#endif //TEST_READFILE