Switch to unified view

a/src/python/samples/rclmbox.py b/src/python/samples/rclmbox.py
...
...
4
4
5
import mailbox
5
import mailbox
6
import email.header
6
import email.header
7
import email.utils
7
import email.utils
8
#import sys
8
#import sys
9
try:
10
    from recoll import recoll
11
except:
9
import recoll
12
    import recoll
13
10
import os
14
import os
11
import stat
15
import stat
12
16
13
#mbfile = "/Users/dockes/projets/fulltext/testrecoll/mail/fred"
17
mbfile = os.path.expanduser("~/mbox")
14
mbfile = "/Users/dockes/mail/outmail"
18
rclconf = os.path.expanduser("~/.recoll")
15
rclconf = "/Users/dockes/.recoll-test"
16
19
17
def header_value(msg, nm, to_utf = False):
20
def header_value(msg, nm, to_utf = False):
18
    value = msg.get(nm)
21
    value = msg.get(nm)
19
    if value == None:
22
    if value == None:
20
        return ""
23
        return ""
...
...
47
    def udi(self, msgnum):
50
    def udi(self, msgnum):
48
        return self.mbfile + ":" + str(msgnum)
51
        return self.mbfile + ":" + str(msgnum)
49
52
50
    def index(self, db):
53
    def index(self, db):
51
        if not db.needUpdate(self.udi(1), self.sig()):
54
        if not db.needUpdate(self.udi(1), self.sig()):
55
            print("Index is up to date");
52
            return None
56
            return None
53
        mb = mailbox.mbox(self.mbfile)
57
        mb = mailbox.mbox(self.mbfile)
54
        for msg in mb.values():
58
        for msg in mb.values():
59
            print("Indexing message %d" % self.msgnum);
55
            self.index_message(db, msg)
60
            self.index_message(db, msg)
56
            self.msgnum += 1
61
            self.msgnum += 1
57
62
58
    def index_message(self, db, msg):
63
    def index_message(self, db, msg):
59
        doc = recoll.Doc()
64
        doc = recoll.Doc()
...
...
88
        text += u"Subject: " + header_value(msg, "Subject") + u"\n"
93
        text += u"Subject: " + header_value(msg, "Subject") + u"\n"
89
        #text += u"Message-ID: " + header_value(msg, "Message-ID") + u"\n"
94
        #text += u"Message-ID: " + header_value(msg, "Message-ID") + u"\n"
90
        text += u"\n"
95
        text += u"\n"
91
        for part in msg.walk():
96
        for part in msg.walk():
92
            if part.is_multipart():
97
            if part.is_multipart():
93
                pass #print "Multipart: " + part.get_content_type()
98
                pass 
94
            else:
99
            else:
95
                ct = part.get_content_type()
100
                ct = part.get_content_type()
96
                #print "Simple: " + ct
97
                if ct.lower() == "text/plain":
101
                if ct.lower() == "text/plain":
98
                    charset = part.get_content_charset("iso-8859-1")
102
                    charset = part.get_content_charset("iso-8859-1")
99
                    print "charset: ", charset
103
                    #print "charset: ", charset
100
                    print "text: ", part.get_payload(None, True)
104
                    #print "text: ", part.get_payload(None, True)
101
                    text += unicode(part.get_payload(None, True), charset)
105
                    text += unicode(part.get_payload(None, True), charset)
102
        doc.text = text
106
        doc.text = text
103
        # dbytes
107
        # dbytes
104
        doc.dbytes = str(len(text))
108
        doc.dbytes = str(len(text))
105
        # sig
109
        # sig