Reputation: 51
Since days I am struggling with this problem: I have a large script where a function is exporting tables in an Excel workbook, each table into a different worksheet. Additionally, I want to give format to the worksheets using engine xlsxwriter. I use the instance with pd.ExcelWriter() as writer. This works fine for an Excel workbook with a single sheet: using to_excel the table is exported and immediately I use an ad hoc function I created to format it.
Code structure:
Global Excel_formatting function that gives format (input: table, sheet name, text strings)
Script function_tables function (input: dataframe, pathfile) that creates subtables from input dataframe, and uses instance pd.ExcelWriter to:
-- export the Excel worksheets
-- call Excel_formatting function to format the worksheets
At high level, the script calls function_tables
See below the complete code:
# Global function to format the output tables
def Excel_formatting(table_input, sheet_name_input, title_in, remark_in, start_row_input):
# Assign WorkBook and worksheet
workbook = writer.book
worksheet = writer.sheets[sheet_name_input]
start_column = 0
# Title and remark
worksheet.write(0, start_column, title_in,
workbook.add_format({'bold': True,
'color': '#8B0000',
'size': 18,
'align':'left'}))
worksheet.write(1, start_column+1, remark_in,
workbook.add_format({'italic': True,
'size': 11,
'align':'left'}))
# Format header (on top of existing header)
header_format = workbook.add_format({'bold': True,
'text_wrap': False,
'fg_color': '#FF8B8B',
'border': 1,
'align':'center'})
for col_num, value in enumerate(table_input.columns.values):
worksheet.write(start_row_input, col_num, value, header_format)
# Freeze panes / Can also be done with to_excel
worksheet.freeze_panes(start_row_input+1, 0)
# Set column width
end_column = len(table_input.columns)
worksheet.autofit()
# Add autofilter to header
worksheet.autofilter(start_row_input, 0, start_row_input, end_column-1)
# Add logo (if present, to avoid script error)
figure_path = 'Logo.JPG'
if (os.path.exists(figure_path) == True):
worksheet.insert_image(0, start_column+5, figure_path, {'x_scale': 0.1, 'y_scale': 0.08, 'decorative': True})
# End of function
return workbook.close()
def function_tables(x, filename):
# Here the function creates subtables from input dataframe
df = x
Table_1 = df.groupby(['Feature 1'])['Deviation'].sum().reset_index()
Table_2 = df.groupby(['Feature 2'])['Deviation'].sum().reset_index()
# ...
Table_N = df.groupby(['Feature N'])['Deviation'].sum().reset_index()
# Export tables adding new sheets to the same Excel workbook
with pd.ExcelWriter(filename, engine='xlsxwriter', mode='w') as writer:
start_row = 2
Table_1.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.1', startrow=start_row)
Table_2.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.2', startrow=start_row)
# ...
Table_N.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.N', startrow=start_row)
# Formatting the worksheets calling the global function
title_input_1 = 'Title for overview table 1'
remark_input_1 = 'Remark Table 1'
Excel_formatting(Table_2, 'Overview Feat.2', title_input_1, remark_input_1, start_row)
title_input_2 = 'Title for overview table 2'
remark_input_2 = 'Remark Table 2'
# ...
Excel_formatting(Table_2, 'Overview Feat.N', title_input_2, remark_input_2, start_row)
title_input_N = 'Title for overview table N'
remark_input_N = 'Remark Table N'
Excel_formatting(Table_1, 'Overview Feat.N', title_input_N, remark_input_N, start_row)
# Call section of script
function_tables(df_input, Path_filename)
I tried also openpyxl, a loop through the tables using a dictionary for the input or not having the formatting function as global but inside the writer instance but all failed, always giving me the same error:
worksheet = writer.sheets[sheet_name_input]
KeyError: 'Overview Feat.1'
It looks that it cannot find the sheetname. Any help? A poorsman alternative will be to create N Excel workbooks and then merged all them, but I prefer not to do so, it must be a more pythonic way to work this, right?
A million thanks!
Upvotes: 1
Views: 74
Reputation: 41574
There are a few issues in the code: the writer
object needs to be passed to the Excel_formatting()
function, the writer shouldn't be closed in that function, and there are some typos in the titles, captions and variable names.
Here is a working example with those issues fixed. I've added sample data frames, you can replace that with your groupby()
code.
import pandas as pd
import os
# Global function to format the output tables
def Excel_formatting(table_input, writer, sheet_name_input, title_in, remark_in, start_row_input):
# Assign WorkBook and worksheet
workbook = writer.book
worksheet = writer.sheets[sheet_name_input]
start_column = 0
# Title and remark
worksheet.write(0, start_column, title_in,
workbook.add_format({'bold': True,
'color': '#8B0000',
'size': 18,
'align': 'left'}))
worksheet.write(1, start_column + 1, remark_in,
workbook.add_format({'italic': True,
'size': 11,
'align': 'left'}))
# Format header (on top of existing header)
header_format = workbook.add_format({'bold': True,
'text_wrap': False,
'fg_color': '#FF8B8B',
'border': 1,
'align': 'center'})
for col_num, value in enumerate(table_input.columns.values):
worksheet.write(start_row_input, col_num, value, header_format)
# Freeze panes / Can also be done with to_excel
worksheet.freeze_panes(start_row_input + 1, 0)
# Set column width
end_column = len(table_input.columns)
worksheet.autofit()
# Add autofilter to header
worksheet.autofilter(start_row_input, 0, start_row_input, end_column - 1)
# Add logo (if present, to avoid script error)
figure_path = 'Logo.JPG'
if os.path.exists(figure_path):
worksheet.insert_image(0, start_column + 5, figure_path, {'x_scale': 0.1, 'y_scale': 0.08, 'decorative': True})
def function_tables(x, filename):
Table_1 = pd.DataFrame({'Data': [11, 12, 13, 14]})
Table_2 = pd.DataFrame({'Data': [11, 12, 13, 14]})
# ...
Table_N = pd.DataFrame({'Data': [11, 12, 13, 14]})
# Export tables adding new sheets to the same Excel workbook
with pd.ExcelWriter(filename, engine='xlsxwriter', mode='w') as writer:
start_row = 2
Table_1.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.1', startrow=start_row)
Table_2.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.2', startrow=start_row)
# ...
Table_N.to_excel(writer, index=True, header=True, sheet_name='Overview Feat.N', startrow=start_row)
# Formatting the worksheets calling the global function
title_input_1 = 'Title for overview table 1'
remark_input_1 = 'Remark Table 1'
Excel_formatting(Table_1, writer, 'Overview Feat.1', title_input_1, remark_input_1, start_row)
title_input_2 = 'Title for overview table 2'
remark_input_2 = 'Remark Table 2'
Excel_formatting(Table_2, writer, 'Overview Feat.2', title_input_2, remark_input_2, start_row)
title_input_N = 'Title for overview table N'
remark_input_N = 'Remark Table N'
Excel_formatting(Table_N, writer, 'Overview Feat.N', title_input_N, remark_input_N, start_row)
# Call section of script
function_tables(None, 'test.xlsx')
Output:
However, to make it more generic it would be best to handle the main function in a loop like this:
def function_tables(x, filename):
writer = pd.ExcelWriter(filename, engine='xlsxwriter')
Table_1 = pd.DataFrame({'Data': [11, 12, 13, 14]})
Table_2 = pd.DataFrame({'Data': [11, 12, 13, 14]})
# ...
Table_N = pd.DataFrame({'Data': [11, 12, 13, 14]})
# In a real case you would probably append() these in a loop.
dfs = [Table_1, Table_2, Table_N]
for i, df in enumerate(dfs, 1):
start_row = 2
df.to_excel(writer, index=True, header=True, sheet_name=f'Overview Feat.{i}', startrow=start_row)
# Formatting the worksheets calling the global function
title_input = f'Title for overview table {i}'
remark_input = f'Remark Table {i}'
Excel_formatting(df, writer, f'Overview Feat.{i}', title_input, remark_input, start_row)
writer.close()
Upvotes: 1