Reputation: 27
I'm trying to write a script that finds emails from WeTransfer, downloads the attached files and deposits them to a directory on my computer. The code i have detects only one email from WeTransfer, says that the link is invalid and that it failed to extract download link. I'm stumped. Is my email parsing code somehow not correct?
import imaplib
import email
from email.header import decode_header
import requests
from bs4 import BeautifulSoup
import os
import shutil
from urllib.parse import urlparse, parse_qs
# Email credentials and IMAP server configuration for Yahoo Mail
EMAIL = 'xxxxxx.com'
PASSWORD = 'xxxxxx' # Use Yahoo account password or an App Password if you have 2FA enabled
IMAP_SERVER = 'imap.mail.yahoo.com'
IMAP_PORT = 993
# Define the folder to save the files
DOWNLOAD_DIR = '/home/JPW/Downloads'
ARCHIVE_DIR = '/home/JPW/archives'
def extract_download_link(we_transfer_url):
# Parse the WeTransfer URL
parsed_url = urlparse(we_transfer_url)
if parsed_url.hostname == 'wetransfer.com' and parsed_url.path.startswith('/downloads'):
# Extract the unique ID from the URL
unique_id = parsed_url.path.split('/')[-1]
# Construct the download URL using the unique ID
download_url = f"https://wetransfer.com/api/v4/transfers/{unique_id}/download"
return download_url
else:
print("Invalid WeTransfer URL. Only WeTransfer download links are supported.")
return None
def download_file(url, download_dir):
response = requests.get(url)
if response.status_code == 200:
filename = os.path.basename(url)
filepath = os.path.join(download_dir, filename)
with open(filepath, 'wb') as f:
f.write(response.content)
print("File downloaded successfully.")
return filepath
else:
print("Failed to download the file.")
return None
def move_file(src_path, dest_dir):
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)
shutil.move(src_path, dest_dir)
def get_latest_wetransfer_link():
mail = imaplib.IMAP4_SSL(IMAP_SERVER, IMAP_PORT)
mail.login(EMAIL, PASSWORD)
mail.select('inbox')
status, messages = mail.search(None, '(FROM "[email protected]")')
email_ids = messages[0].split()
if not email_ids:
print("No mails from Eugene found.")
return None
latest_email_id = email_ids[-1]
status, msg_data = mail.fetch(latest_email_id, '(RFC822)')
msg = email.message_from_bytes(msg_data[0][1])
mail.logout()
for part in msg.walk():
if part.get_content_type() == 'text/html':
html_content = part.get_payload(decode=True).decode()
soup = BeautifulSoup(html_content, 'html.parser')
all_links = soup.find_all('a', href=True)
for a in all_links:
if 'wetransfer.com/downloads' in a['href']:
print("WeTransfer link found:", a['href'])
return a['href']
print("No WeTransfer link found in the latest email.")
return None
if __name__ == "__main__":
link = get_latest_wetransfer_link()
if link:
download_link = extract_download_link(link) # Calling extract_download_link here
if download_link:
downloaded_file = download_file(download_link, DOWNLOAD_DIR)
if downloaded_file:
move_file(downloaded_file, ARCHIVE_DIR)
print(f"File moved to {ARCHIVE_DIR}")
else:
print("Failed to download the file.")
else:
print("Failed to extract download link.")
else:
print("No WeTransfer link found in the latest email.")
Upvotes: 0
Views: 93