Reputation: 635
I am already connected to the instance and I want to upload the files that are generated from my python script directly to S3. I have tried this:
import boto
s3 = boto.connect_s3()
bucket = s3.get_bucket('alexandrabucket')
from boto.s3.key import Key
key = bucket.new_key('s0').set_contents_from_string('some content')
but this is rather creating a new file s0 with the context "same content" while I want to upload the directory s0 to mybucket.
I had a look also to s3put but I didn't manage to get what I want.
Upvotes: 61
Views: 97174
Reputation: 101
Compressing the folder to .tar.gz
file might be useful if you need (like I did) to upload a folder just to download it later elsewhere:
import boto3
import tarfile
from pathlib import Path
s3 = boto3.resource('s3')
bucket = s3.Bucket('my-bucket')
folder_to_upload = Path('C:/Users/uploader/data')
# Create file: 'C:/Users/uploader/data.tar.gz'
tar_filename = folder_to_upload.with_suffix('.tar.gz')
with tarfile.open(tar_filename, "w:gz") as tar:
tar.add(folder_to_upload, arcname=folder_to_upload.name)
# Uploads the file to: 's3://my-bucket/targz-stuff/data.tar.gz'
s3_tar_path = f'targz-stuff/{tar_filename.name}'
bucket.upload_file(tar_filename, s3_tar_path)
# Remove file 'C:/Users/uploader/data.tar.gz'
tar_filename.unlink()
# Download file to 'C:/Users/downloader/data.tar.gz'
download_parent_folder = Path('C:/Users/downloader')
downloader_tar_filename = download_parent_folder / tar_filename.name
bucket.download_file(s3_tar_path, downloader_tar_filename)
# Extract and create folder 'C:/Users/downloader/data'
with tarfile.open(downloader_tar_filename) as f:
f.extractall(download_parent_folder)
# Remove file 'C:/Users/downloader/data.tar.gz'
downloader_tar_filename.unlink()
Upvotes: 0
Reputation: 51
This is my solution using pathlib
instead:
import boto3
from pathlib import Path
def upload_directory_to_s3(directory: str) -> None:
for path in Path(directory).rglob('*'):
if path.is_file():
boto3.client('s3').upload_file(file_name=path, bucket=bucket, object_name=f'{key}/{path}')
Upvotes: 0
Reputation: 156
Somehow the other snippets did not really work for me, this is a modification of the snippet from user 923227 that does.
This code copies all files in a directory and maintains the directory in S3, e.g.2023/01/file.jpg
will be in the bucket as 2023/01/file.jpg
.
import os
import sys
import boto3
client = boto3.client('s3')
local_path = "your-path/data"
bucketname = "bucket-name"
for path, dirs, files in os.walk(local_path):
for file in files:
file_s3 = os.path.normpath(path + '/' + file)
file_local = os.path.join(path, file)
print("Upload:", file_local, "to target:", file_s3, end="")
client.upload_file(file_local, bucketname, file_s3)
print(" ...Success")
Upvotes: 3
Reputation: 1
Simply running terminal commands using os module with F string works
import os
ActualFolderName = "FolderToBeUploadedOnS3"
os.system(f'aws s3 cp D:\<PathToYourFolder>\{ActualFolderName} s3://<BucketName>/{ActualFolderName}/ --recursive')
Upvotes: 0
Reputation: 2830
The s3fs package provides nice functionalities to handle such cases
s3_file = s3fs.S3FileSystem()
local_path = "some_dir_path/some_dir_path/"
s3_path = "bucket_name/dir_path"
s3_file.put(local_path, s3_path, recursive=True)
Upvotes: 12
Reputation: 2123
Updated @user 923227's answer to (1) include newer boto3 interface (2) work with nuances of windows double backslash (3) cleaner tqdm progress bar:
import os
from tqdm import tqdm
def upload_folder_to_s3(s3_client, s3bucket, input_dir, s3_path):
pbar = tqdm(os.walk(input_dir))
for path, subdirs, files in pbar:
for file in files:
dest_path = path.replace(input_dir, "").replace(os.sep, '/')
s3_file = f'{s3_path}/{dest_path}/{file}'.replace('//', '/')
local_file = os.path.join(path, file)
s3_client.upload_file(local_file, s3bucket, s3_file)
pbar.set_description(f'Uploaded {local_file} to {s3_file}')
print(f"Successfully uploaded {input_dir} to S3 {s3_path}")
Usage example:
s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
upload_folder_to_s3(s3_client, 'BUCKET-NAME', <local-directory>, <s3-directory>)
Upvotes: 2
Reputation: 3492
This solution does not use boto, but I think it could do what the OP wants.
It uses awscli and Python.
import os
class AwsCredentials:
def __init__(self, access_key: str, secret_key: str):
self.access_key = access_key
self.secret_key = secret_key
def to_command(self):
credentials = f'AWS_ACCESS_KEY_ID={self.access_key} AWS_SECRET_ACCESS_KEY={self.secret_key}'
return credentials
def sync_s3_bucket(credentials: AwsCredentials, source_path: str, bucket: str) -> None:
command = f'{credentials.to_command()} aws s3 sync {source_path} s3://{bucket}'
result = os.system(command)
assert result == 0, f'The s3 sync was not successful, error code: {result}'
Please consider getting the AWS credentials from a file or from the environment.
The documentation for the s3 sync
command is here.
Upvotes: 0
Reputation: 189
This is the code I used which recursively upload files from the specified folder to the specified s3 path. Just add S3 credential and bucket details in the script:
https://gist.github.com/hari116/4ab5ebd885b63e699c4662cd8382c314/
#!/usr/bin/python
"""Usage: Add bucket name and credentials
script.py <source folder> <s3 destination folder >"""
import os
from sys import argv
import boto3
from botocore.exceptions import NoCredentialsError
ACCESS_KEY = ''
SECRET_KEY = ''
host = ''
bucket_name = ''
local_folder, s3_folder = argv[1:3]
walks = os.walk(local_folder)
# Function to upload to s3
def upload_to_aws(bucket, local_file, s3_file):
"""local_file, s3_file can be paths"""
s3 = boto3.client('s3', aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
print(' Uploading ' +local_file + ' as ' + bucket + '/' +s3_file)
try:
s3.upload_file(local_file, bucket, s3_file)
print(' '+s3_file + ": Upload Successful")
print(' ---------')
return True
except NoCredentialsError:
print("Credentials not available")
return False
"""For file names"""
for source, dirs, files in walks:
print('Directory: ' + source)
for filename in files:
# construct the full local path
local_file = os.path.join(source, filename)
# construct the full Dropbox path
relative_path = os.path.relpath(local_file, local_folder)
s3_file = os.path.join(s3_folder, relative_path)
# Invoke upload function
upload_to_aws(bucket_name, local_file, s3_file)
Upvotes: 3
Reputation: 451
Another method that did not exist when this question was first asked is to use python-rclone (https://github.com/ddragosd/python-rclone/blob/master/README.md).
This requires a download of rclone and a working rclone config. Commonly used for AWS (https://rclone.org/s3/) but can be used for other providers as well.
install('python-rclone')
import rclone
cfg_path = r'(path to rclone config file here)'
with open(cfg_path) as f:
cfg = f.read()
# Implementation
# Local file to cloud server
result = rclone.with_config(cfg).run_cmd(command="sync", extra_args=["/home/demodir/", "AWS test:dummydir/etc/"])
# Cloud server to cloud server
result = rclone.with_config(cfg).run_cmd(command="sync", extra_args=["Gdrive:test/testing/", "AWS test:dummydir/etc/"
This allows you to run a "sync" command similar to the AWS CLI within your python code by reading in the config file and mapping your output via kwargs (extra_args)
Upvotes: 0
Reputation: 2715
I built the function based on the feedback from @JDPTET, however,
os.path.normpath
def upload_folder_to_s3(s3bucket, inputDir, s3Path):
print("Uploading results to s3 initiated...")
print("Local Source:",inputDir)
os.system("ls -ltR " + inputDir)
print("Dest S3path:",s3Path)
try:
for path, subdirs, files in os.walk(inputDir):
for file in files:
dest_path = path.replace(inputDir,"")
__s3file = os.path.normpath(s3Path + '/' + dest_path + '/' + file)
__local_file = os.path.join(path, file)
print("upload : ", __local_file, " to Target: ", __s3file, end="")
s3bucket.upload_file(__local_file, __s3file)
print(" ...Success")
except Exception as e:
print(" ... Failed!! Quitting Upload!!")
print(e)
raise e
s3 = boto3.resource('s3', region_name='us-east-1')
s3bucket = s3.Bucket("<<s3bucket_name>>")
upload_folder_to_s3(s3bucket, "<<Local Folder>>", "<<s3 Path>>")
Upvotes: 5
Reputation: 1307
For reading file form folder we can use
import boto
from boto.s3.key import Key
keyId = 'YOUR_AWS_ACCESS_KEY_ID'
sKeyId='YOUR_AWS_ACCESS_KEY_ID'
bucketName='your_bucket_name'
conn = boto.connect_s3(keyId,sKeyId)
bucket = conn.get_bucket(bucketName)
for key in bucket.list():
print ">>>>>"+key.name
pathV = key.name.split('/')
if(pathV[0] == "data"):
if(pathV[1] != ""):
srcFileName = key.name
filename = key.name
filename = filename.split('/')[1]
destFileName = "model/data/"+filename
k = Key(bucket,srcFileName)
k.get_contents_to_filename(destFileName)
elif(pathV[0] == "nlu_data"):
if(pathV[1] != ""):
srcFileName = key.name
filename = key.name
filename = filename.split('/')[1]
destFileName = "model/nlu_data/"+filename
k = Key(bucket,srcFileName)
k.get_contents_to_filename(destFileName)
Upvotes: 1
Reputation: 742
You could do the following:
import os
import boto3
s3_resource = boto3.resource("s3", region_name="us-east-1")
def upload_objects():
try:
bucket_name = "S3_Bucket_Name" #s3 bucket name
root_path = 'D:/sample/' # local folder for upload
my_bucket = s3_resource.Bucket(bucket_name)
for path, subdirs, files in os.walk(root_path):
path = path.replace("\\","/")
directory_name = path.replace(root_path,"")
for file in files:
my_bucket.upload_file(os.path.join(path, file), directory_name+'/'+file)
except Exception as err:
print(err)
if __name__ == '__main__':
upload_objects()
Upvotes: 3
Reputation: 569
The following function can be used to upload directory to s3 via boto.
def uploadDirectory(path,bucketname):
for root,dirs,files in os.walk(path):
for file in files:
s3C.upload_file(os.path.join(root,file),bucketname,file)
Provide a path to the directory and bucket name as the inputs. The files are placed directly into the bucket. Alter the last variable of the upload_file() function to place them in "directories".
Upvotes: 46
Reputation: 45846
There is nothing in the boto
library itself that would allow you to upload an entire directory. You could write your own code to traverse the directory using os.walk
or similar and to upload each individual file using boto.
There is a command line utility in boto called s3put
that could handle this or you could use the AWS CLI tool which has a lot of features that allow you to upload entire directories or even sync the S3 bucket with a local directory or vice-versa.
Upvotes: 30