Reputation: 15
I am trying to upload files inside a folder to a S3 bucket but I cannot seem to upload all files. Here is my code:
try:
for folder in os.listdir('raw_data/'):
for files in os.listdir(f'raw_data/{folder}'):
if folder == 'malay':
upload_file_bucket = 'book-reviews-analysis'
upload_file_key = 'malay/' + str(files)
client.upload_file(files, upload_file_bucket, upload_file_key)
logger.info('--DONE UPLOADING FILE TO BUCKET--')
elif folder == 'english':
upload_file_bucket='book-reviews-analysis'
upload_file_key='english/' + str(files)
client.upload_file(files,upload_file_bucket,upload_file_key)
logger.info('--DONE UPLOADING FILE TO BUCKET--')
except ClientError as e:
print(e)
logger.error(e)
The weird thing is that it does upload a file in the english folder but not in the 'malay' folder. I got the following error and I am very certain that the file I want to upload is in that folder.
Traceback (most recent call last):
File "D:/personal project/Book review analysis/book_reviews_analysis/pipeline.py", line 64, in <module>
main()
File "D:/personal project/Book review analysis/book_reviews_analysis/pipeline.py", line 61, in main
upload_to_s3()
File "D:/personal project/Book review analysis/book_reviews_analysis/pipeline.py", line 42, in upload_to_s3
client.upload_file(files, upload_file_bucket, upload_file_key)
File "D:\personal project\Book review analysis\book_reviews_analysis\env\lib\site-packages\boto3\s3\inject.py", line 148, in upload_file
callback=Callback,
File "D:\personal project\Book review analysis\book_reviews_analysis\env\lib\site-packages\boto3\s3\transfer.py", line 288, in upload_file
future.result()
File "D:\personal project\Book review analysis\book_reviews_analysis\env\lib\site-packages\s3transfer\futures.py", line 103, in result
return self._coordinator.result()
File "D:\personal project\Book review analysis\book_reviews_analysis\env\lib\site-packages\s3transfer\futures.py", line 266, in result
raise self._exception
File "D:\personal project\Book review analysis\book_reviews_analysis\env\lib\site-packages\s3transfer\tasks.py", line 269, in _main
self._submit(transfer_future=transfer_future, **kwargs)
File "D:\personal project\Book review analysis\book_reviews_analysis\env\lib\site-packages\s3transfer\upload.py", line 585, in _submit
upload_input_manager.provide_transfer_size(transfer_future)
File "D:\personal project\Book review analysis\book_reviews_analysis\env\lib\site-packages\s3transfer\upload.py", line 244, in provide_transfer_size
self._osutil.get_file_size(transfer_future.meta.call_args.fileobj)
File "D:\personal project\Book review analysis\book_reviews_analysis\env\lib\site-packages\s3transfer\utils.py", line 247, in get_file_size
return os.path.getsize(filename)
File "C:\Users\aliff\AppData\Local\Programs\Python\Python37\lib\genericpath.py", line 50, in getsize
return os.stat(filename).st_size
FileNotFoundError: [WinError 2] The system cannot find the file specified: 'politik_untuk_pemula.CSV'
Process finished with exit code 1
Upvotes: 0
Views: 286
Reputation: 6780
As @ewokx mentioned in their comment:
files
doesn't have the path.
That said, try using pathlib.Path
, like so:
from pathlib import Path
src_dir = Path("raw_data")
files_coll = src_dir.glob("*/*")
for one_file in files_coll:
folder = one_file.parent.name
if folder == "malay":
upload_file_bucket = 'book-reviews-analysis'
upload_file_key = 'malay/' + str(one_file.name)
# Check boto3 documentation; if upload_file() accepts Path-like object, then no need for str()
client.upload_file(str(one_file), upload_file_bucket, upload_file_key)
logger.info('--DONE UPLOADING FILE TO BUCKET--')
elif folder == "english":
...
Upvotes: 1