|
a/src/python/samples/rclmbox.py |
|
b/src/python/samples/rclmbox.py |
|
... |
|
... |
4 |
|
4 |
|
5 |
import mailbox
|
5 |
import mailbox
|
6 |
import email.header
|
6 |
import email.header
|
7 |
import email.utils
|
7 |
import email.utils
|
8 |
#import sys
|
8 |
#import sys
|
|
|
9 |
try:
|
|
|
10 |
from recoll import recoll
|
|
|
11 |
except:
|
9 |
import recoll
|
12 |
import recoll
|
|
|
13 |
|
10 |
import os
|
14 |
import os
|
11 |
import stat
|
15 |
import stat
|
12 |
|
16 |
|
13 |
#mbfile = "/Users/dockes/projets/fulltext/testrecoll/mail/fred"
|
17 |
mbfile = os.path.expanduser("~/mbox")
|
14 |
mbfile = "/Users/dockes/mail/outmail"
|
18 |
rclconf = os.path.expanduser("~/.recoll")
|
15 |
rclconf = "/Users/dockes/.recoll-test"
|
|
|
16 |
|
19 |
|
17 |
def header_value(msg, nm, to_utf = False):
|
20 |
def header_value(msg, nm, to_utf = False):
|
18 |
value = msg.get(nm)
|
21 |
value = msg.get(nm)
|
19 |
if value == None:
|
22 |
if value == None:
|
20 |
return ""
|
23 |
return ""
|
|
... |
|
... |
47 |
def udi(self, msgnum):
|
50 |
def udi(self, msgnum):
|
48 |
return self.mbfile + ":" + str(msgnum)
|
51 |
return self.mbfile + ":" + str(msgnum)
|
49 |
|
52 |
|
50 |
def index(self, db):
|
53 |
def index(self, db):
|
51 |
if not db.needUpdate(self.udi(1), self.sig()):
|
54 |
if not db.needUpdate(self.udi(1), self.sig()):
|
|
|
55 |
print("Index is up to date");
|
52 |
return None
|
56 |
return None
|
53 |
mb = mailbox.mbox(self.mbfile)
|
57 |
mb = mailbox.mbox(self.mbfile)
|
54 |
for msg in mb.values():
|
58 |
for msg in mb.values():
|
|
|
59 |
print("Indexing message %d" % self.msgnum);
|
55 |
self.index_message(db, msg)
|
60 |
self.index_message(db, msg)
|
56 |
self.msgnum += 1
|
61 |
self.msgnum += 1
|
57 |
|
62 |
|
58 |
def index_message(self, db, msg):
|
63 |
def index_message(self, db, msg):
|
59 |
doc = recoll.Doc()
|
64 |
doc = recoll.Doc()
|
|
... |
|
... |
88 |
text += u"Subject: " + header_value(msg, "Subject") + u"\n"
|
93 |
text += u"Subject: " + header_value(msg, "Subject") + u"\n"
|
89 |
#text += u"Message-ID: " + header_value(msg, "Message-ID") + u"\n"
|
94 |
#text += u"Message-ID: " + header_value(msg, "Message-ID") + u"\n"
|
90 |
text += u"\n"
|
95 |
text += u"\n"
|
91 |
for part in msg.walk():
|
96 |
for part in msg.walk():
|
92 |
if part.is_multipart():
|
97 |
if part.is_multipart():
|
93 |
pass #print "Multipart: " + part.get_content_type()
|
98 |
pass
|
94 |
else:
|
99 |
else:
|
95 |
ct = part.get_content_type()
|
100 |
ct = part.get_content_type()
|
96 |
#print "Simple: " + ct
|
|
|
97 |
if ct.lower() == "text/plain":
|
101 |
if ct.lower() == "text/plain":
|
98 |
charset = part.get_content_charset("iso-8859-1")
|
102 |
charset = part.get_content_charset("iso-8859-1")
|
99 |
print "charset: ", charset
|
103 |
#print "charset: ", charset
|
100 |
print "text: ", part.get_payload(None, True)
|
104 |
#print "text: ", part.get_payload(None, True)
|
101 |
text += unicode(part.get_payload(None, True), charset)
|
105 |
text += unicode(part.get_payload(None, True), charset)
|
102 |
doc.text = text
|
106 |
doc.text = text
|
103 |
# dbytes
|
107 |
# dbytes
|
104 |
doc.dbytes = str(len(text))
|
108 |
doc.dbytes = str(len(text))
|
105 |
# sig
|
109 |
# sig
|