Reputation: 889
In a jupyter notebook I am able to create two Pandas dataframes and export them to individual sheets of an Excel workbook with some additional formatting, including text-wrapping, frozen pane, bold headers and auto-filters.
Code below:
# MODULES
import pandas as pd
import numpy as np
from datetime import date
#CONSTRUCT DATAFRAME
df = pd.DataFrame({'Data': [10, 22, 31, 43, 57, 99, 65, 74, 88],
'Data2':[10, 22, 31, 43, 57, 99, 65, 74, 88],
'Data3':[10, 22, 31, 43, 57, 99, 65, 74, 88]})
#CONSTRUCT DATAFRAME
df2 = pd.DataFrame({'df2_Data': ['blue', 'yellow', 'purple', 'orange', 'green', 'brown', 'gray', 'white', 'red'],
'df2_Data2':['bike', 'car', 'bus', 'train', 'boat', 'truck', 'plane', 'scooter', 'skateboard'],
'df2_Data3':['chicken', 'cow', 'dog', 'crocodile', 'snake', 'pig', 'rat', 'mouse', 'monkey']})
#DATE FOR INCLUSION IN FILENAME
today = date.today()
d2 = today.strftime("%B %d, %Y")
writer = pd.ExcelWriter('ExcelExample{}.xlsx'.format(d2), engine='xlsxwriter')
sheets_in_writer=['Sheet1','sheet2']
data_frame_for_writer=[df, df2]
for i,j in zip(data_frame_for_writer,sheets_in_writer):
i.to_excel(writer,j,index=False)
### Assign WorkBook
workbook=writer.book
# Add a header format
header_format = workbook.add_format({'bold': True,'text_wrap': True,'size':10,
'valign': 'top','fg_color': '#c7e7ff','border': 1})
### Apply same format on each sheet being saved
for i,j in zip(data_frame_for_writer,sheets_in_writer):
for col_num, value in enumerate(i.columns.values):
writer.sheets[j].set_column(0, max_col - 1, 12)
writer.sheets[j].write(0, col_num, value, header_format)
writer.sheets[j].autofilter(0,0,0,i.shape[1]-1)
writer.sheets[j].freeze_panes(1,0)
writer.save()
This produces the following Excel workbook in which both/all sheets have the formatting applied.
I'm struggling to reproduce this functionality within an AWS lambda function due to my unfamiliarity with boto3
My attempt is as follows, which does save an Excel file to the specified destination but of course has none of the required formatting.
import boto3
import numpy as np
import pandas as pd
import io
from io import BytesIO
from io import StringIO
from datetime import date
def lambda_handler(event, context):
s3 = boto3.client('s3')
# STORE TODAY'S DATE
today = date.today()
d2 = today.strftime("%B %d, %Y")
print("d2 =", d2)
#SPECIFY OUTPUT EXCEL FILE NAME AND FILEPATH
bucket = 'brnddmn-s3'
filepath = 'output/'ExcelExample{}.xlsx'.format(d2)
# GENERATE AND EXPORT EXCEL FILE TO BUCKET
with io.BytesIO() as output:
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
df.to_excel(writer, sheet_name = 'Sheet1')
df2.to_excel(writer, sheet_name = 'Sheet2')
data = output.getvalue()
s3 = boto3.resource('s3')
s3.Bucket(bucket).put_object(Key=filepath, Body=data)
Any help greatly appreciated.
Upvotes: 4
Views: 5460
Reputation: 2694
AWS Lambdas are guaranteed to have 512mb /tmp space, so a potential solution that will help you develop locally is to change
writer = pd.ExcelWriter('ExcelExample{}.xlsx'.format(d2), engine='xlsxwriter')
to
output_file_name = "ExcelExample{}.xlsx".format(d2)
local_file_path = f"/tmp/{output_file_name}"
writer = pd.ExcelWriter(local_file_path, engine="xlsxwriter")
which then would allow you to use
import os
import boto3
bucket = "brnddmn-s3"
s3_file_path = f"/output/{output_file_name}"
boto3.resource("s3").Object(bucket, s3_file_path).upload_file(local_file_path)
os.remove(local_file_path)
That should be the simplest change to keep you going, assuming ~500mb is enough for each workbook.
Upvotes: 2
Reputation: 238957
I tried to verify the issue, but apart from syntax errors and missing definitions of data-frames, it all works as expected. The corrected example version that works is below.
The only other comment I can make is to make sure that you have pandas
and XlsxWriter
setup for you lambda, for example, in a lambda layer.
import boto3
import json
import io
from datetime import date
import pandas as pd
s3 = boto3.resource('s3')
def lambda_handler(event, context):
#CONSTRUCT DATAFRAME
df = pd.DataFrame({'Data': [10, 22, 31, 43, 57, 99, 65, 74, 88],
'Data2':[10, 22, 31, 43, 57, 99, 65, 74, 88],
'Data3':[10, 22, 31, 43, 57, 99, 65, 74, 88]})
#CONSTRUCT DATAFRAME
df2 = pd.DataFrame({'df2_Data': ['blue', 'yellow', 'purple', 'orange', 'green', 'brown', 'gray', 'white', 'red'],
'df2_Data2':['bike', 'car', 'bus', 'train', 'boat', 'truck', 'plane', 'scooter', 'skateboard'],
'df2_Data3':['chicken', 'cow', 'dog', 'crocodile', 'snake', 'pig', 'rat', 'mouse', 'monkey']})
today = date.today()
d2 = today.strftime("%B %d, %Y")
io_buffer = io.BytesIO()
writer = pd.ExcelWriter(io_buffer, engine='xlsxwriter')
sheets_in_writer=['Sheet1','sheet2']
data_frame_for_writer=[df, df2]
for i,j in zip(data_frame_for_writer,sheets_in_writer):
i.to_excel(writer,j,index=False)
### Assign WorkBook
workbook=writer.book
# Add a header format
header_format = workbook.add_format({'bold': True,'text_wrap': True,'size':10,
'valign': 'top','fg_color': '#c7e7ff','border': 1})
max_col=3
### Apply same format on each sheet being saved
for i,j in zip(data_frame_for_writer,sheets_in_writer):
for col_num, value in enumerate(i.columns.values):
writer.sheets[j].set_column(0, max_col - 1, 12)
writer.sheets[j].write(0, col_num, value, header_format)
writer.sheets[j].autofilter(0,0,0,i.shape[1]-1)
writer.sheets[j].freeze_panes(1,0)
writer.save()
bucket = 'brnddmn-s3'
filepath = 'output/ExcelExample{}.xlsx'.format(d2)
data = io_buffer.getvalue()
s3.Bucket(bucket).put_object(Key=filepath, Body=data)
return {
'statusCode': 200,
'body': json.dumps(filepath)
}
Upvotes: 3