Reputation: 53
I am working on a project where I get emails with a specific 'subject'. There are forwarded to me by users. The body consists of text but in the original email and no new text is entered above the forwarded line. There are also attachments to either of the part of the email.
I wrote the following code using python and IMAP and am able to store attachments and body only if the email is NEW and not a forwarded email.
def getAllEmails(username, password, subject, fromEmail, folderName):
mail = imaplib.IMAP4_SSL("imap.outlook.com")
mail.login(username, password)
print("Login success..........")
mail.select("inbox")
result, data = mail.search(None, 'SUBJECT', '"{}"'.format(subject))
inbox_item_list_subject = data[0].split()
result, data = mail.search(None, 'FROM', '"{}"'.format(fromEmail))
inbox_item_list_sender = data[0].split()
inbox_item_list = list(set(inbox_item_list_subject) & set(inbox_item_list_sender))
counter = 0
for item in inbox_item_list:
counter+=1
result2, email_data = mail.fetch(item,'(RFC822)')
raw_email = email_data[0][1].decode("utf-8")
email_message = email.message_from_string(raw_email)
#getting information about the mail like to, from,subject, date.
to_ = email_message['To']
from_ = email_message['From']
subject_ = email_message['Subject']
date_ = email_message['date']
# setting the format to save in text file.
to_ = "to: "
from_ = "from: " + from_ + str("\n")
date_ = "date: " + date_ + str("\n")
subject__ = "subject: " + subject_ + str("\n")
# accessing the subparts of email_message
for part in email_message.walk():
if part.get_content_maintype == 'multipart':
continue
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
filename = part.get_filename()
ext = mimetypes.guess_extension(part.get_content_type())
# allowing pdf, jpg, png and doc format only
if ext == '.pdf' or ext == '.csv' or ext == '.png' or ext == '.docx' or ext == '.xlsx':
if filename:
save_path = os.path.join(os.getcwd(), folderName, subject_)
if not os.path.exists(save_path):
os.makedirs(save_path)
with open(os.path.join(save_path, filename), 'wb') as fp:
fp.write(part.get_payload(decode=True))
fp.close()
# getting the body part of the mail.
try:
body = part.get_payload(decode=True).decode()
except:
pass
# saving the required information in a file named as "textfile.txt".
if content_type == "text/plain" and "attachment" not in content_disposition:
save_path = os.path.join(os.getcwd(), folderName, subject_)
if not os.path.exists(save_path):
os.makedirs(save_path)
filename = "textfile.txt"
with open(os.path.join(save_path, filename), 'w+', encoding='utf-8') as fp:
fp.writelines(to_)
fp.writelines(from_)
fp.writelines(date_)
fp.writelines(subject__)
fp.writelines(body)
fp.close()
mail.close()
mail.logout()
I am looking to store body text and attachments even if it's a forward email??
Upvotes: 2
Views: 1839
Reputation: 196
Seems like you already have the part where you are extracting the attachments. Try this code to retrieve the body of a multipart email.
You may have to figure out how to merge your part with this one.
def getAll(username, password, folderName):
mail = imaplib.IMAP4_SSL("imap.outlook.com")
mail.login(username, password)
print("Login success..........")
mail.select("INBOX")
result, data = mail.search(None, '(FROM "[email protected]" SUBJECT "Subject-Name")')
for num in data[0].split():
h, d = mail.fetch(num, '(RFC822)')
raw_email = d[0][1].decode("utf-8")
message = email.message_from_string(raw_email)
email_from = str(make_header(decode_header(message['From'])))
subject = str(make_header(decode_header(message['Subject'])))
print("SUBJECT: "+ subject)
print("FROM: "+ email_from)
msg_encoding = 'iso-2022-jp'
if message.is_multipart() == False:
single = bytearray(message.get_payload(), msg_encoding)
body = single.decode(encoding = msg_encoding)
else:
multi = message.get_payload()[0]
body = multi.get_payload(decode=True).decode(encoding = msg_encoding)
body = re.sub('<[^<]+?>', '', body) # Remove special characters
print("Printing the body:" + body)
Upvotes: 2