Switch to side-by-side view

--- a
+++ b/src/python/samples/rclmbox.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+
+import mailbox
+import email.header
+import email.utils
+#import sys
+import recollq
+import os
+import stat
+
+#mbfile = "/Users/dockes/projets/fulltext/testrecoll/mail/fred"
+mbfile = "/Users/dockes/mail/outmail"
+rclconf = "/Users/dockes/.recoll-test"
+
+def header_value(msg, nm, to_utf = False):
+    value = msg.get(nm)
+    if value == None:
+        return ""
+    value = value.replace("\n", "")
+    value = value.replace("\r", "")
+    #print value
+    parts = email.header.decode_header(value)
+    #print parts
+    univalue = u""
+    for part in parts:
+        if part[1] != None:
+            univalue += unicode(part[0], part[1]) + " "
+        else:
+            univalue += part[0] + " "
+    if to_utf:
+        return univalue.encode('utf-8')
+    else:
+        return univalue
+
+class mbox_indexer:
+    def __init__(self, mbfile):
+        self.mbfile = mbfile
+        stdata = os.stat(mbfile)
+        self.fmtime = stdata[stat.ST_MTIME]
+        self.fbytes = stdata[stat.ST_SIZE]
+        self.msgnum = 1
+
+    def sig(self):
+        return str(self.fmtime) + ":" + str(self.fbytes)
+    def udi(self, msgnum):
+        return self.mbfile + ":" + str(msgnum)
+
+    def index(self, db):
+        if not db.needUpdate(self.udi(1), self.sig()):
+            return None
+        mb = mailbox.mbox(self.mbfile)
+        for msg in mb.values():
+            self.index_message(db, msg)
+            self.msgnum += 1
+
+    def index_message(self, db, msg):
+        doc = recollq.Doc()
+        doc.author = header_value(msg, "From")
+        # url
+        doc.url = "file://" + self.mbfile
+        # utf8fn
+        # ipath
+        doc.ipath = str(self.msgnum)
+        # mimetype
+        doc.mimetype = "message/rfc822"
+        # mtime
+        dte = header_value(msg, "Date")
+        tm = email.utils.parsedate_tz(dte)
+        if tm == None:
+            doc.mtime = str(self.fmtime)
+        else:
+            doc.mtime = str(email.utils.mktime_tz(tm))
+        # origcharset
+        # title
+        doc.title = header_value(msg, "Subject")
+        # keywords
+        # abstract
+        # author
+        # fbytes
+        doc.fbytes = str(self.fbytes)
+        # text
+        text = u""
+        text += u"From: " + header_value(msg, "From") + u"\n"
+        text += u"To: " + header_value(msg, "To") + u"\n"
+        text += u"Subject: " + header_value(msg, "Subject") + u"\n"
+        #text += u"Message-ID: " + header_value(msg, "Message-ID") + u"\n"
+        text += u"\n"
+        for part in msg.walk():
+            if part.is_multipart():
+                pass #print "Multipart: " + part.get_content_type()
+            else:
+                ct = part.get_content_type()
+                #print "Simple: " + ct
+                if ct.lower() == "text/plain":
+                    charset = part.get_content_charset("iso-8859-1")
+                    text += unicode(part.get_payload(None, True), charset)
+        doc.text = text
+        # dbytes
+        doc.dbytes = str(len(text))
+        # sig
+        doc.sig = self.sig()
+        udi = self.udi(self.msgnum)
+        db.addOrUpdate(udi, u"", doc)
+
+
+db = recollq.connect(confdir=rclconf, writable=1)
+
+mbidx = mbox_indexer(mbfile)
+mbidx.index(db)