Reputation: 15
I'm trying to get only the text/plain
section of an email i received with imaplib in Python:
#!/usr/bin/env python
import imaplib
import sys
from pprint import pprint
from email.parser import HeaderParser
from email.header import decode_header
reload(sys)
sys.setdefaultencoding("utf-8")
conn = imaplib.IMAP4_SSL('host')
conn.login('username', 'password')
#conn.select('Inbox', readonly=True)
conn.select('Inbox')
a, b = conn.search(None, '(UNSEEN)')
if b[0]:
c = b[0]
d = c.split()
e = d[-1]
#(BODY[HEADER.FIELDS (SUBJECT FROM)])
data = conn.fetch(e,'(BODY[1] BODY[HEADER.FIELDS (SUBJECT FROM)])')
#pprint(data)
body_data = data[1][0][1]
header_data = data[1][1][1]
#print(body_data)
# parser = HeaderParser()
# msg = parser.parsestr(header_data)
#
# print (decode_header(msg['From'])[0][0].decode('utf-8') + ": " + decode_header(msg['Subject'])[0][0].decode('utf-8'))
In case of a multipart message though, what i end up with "body_data" as the body of the message with all the sections.
I tried with the example here:
raw_message = data[0][1]
msg = email.message_from_string(raw_message)
for part in msg.walk():
# each part is a either non-multipart, or another multipart message
# that contains further parts... Message is organized like a tree
if part.get_content_type() == 'text/plain':
print part.get_payload() # prints the raw text
However, for some reason, part.get_payload()
returns only a single character from the text.
I'm forced to use Python 2.7.13.
Upvotes: 0
Views: 1500
Reputation: 15
I was able to get what i want with a change of BODY[1]
to BODY[1.1]
in the fetch
command:
#!/usr/bin/env python
import imaplib
import sys
from email.parser import HeaderParser
from email.header import decode_header
reload(sys)
sys.setdefaultencoding("utf-8")
conn = imaplib.IMAP4_SSL('host')
conn.login('username', 'password')
#conn.select('Inbox', readonly=True)
conn.select('Inbox')
a, b = conn.search(None, '(UNSEEN)')
if b[0]:
c = b[0]
d = c.split()
e = d[-1]
data = conn.fetch(e,'(BODY[1.1] BODY[HEADER.FIELDS (SUBJECT FROM)])')
body_data = data[1][0][1]
header_data = data[1][1][1]
print(body_data) #Now "body_data" always contains the body, i.e. only the "text/plain" section
...
Upvotes: 1