Reputation: 29
I have found a rather odd behaviour by chance, but could not come up with an explanation for it yet. Maybe someone has an explanation for it.
The code below produces the output in the picture. Seemingly is "Chart Ok" the one, which I am looking for, yet on a closer look (in Excel) we see that the data is not properly referenced (due to the values being seen as "general" instead of "number" format). Yet Excel is able to create the chart somewhat properly.
The actual proper way would be the chart next to it "Missing chart", however here Excel is not able to create the graph. The only difference in the code being the 's around the sheetname in create_chart_missing_chart around the values property in add_series.
Obviously the fix for this behaviour is to set the actual numbers to a number format in the dataframe. (I guess it's the best to do before the data is written into Excel)
I am using:
import pandas as pd
from xlsxwriter.utility import xl_rowcol_to_cell
def main():
df = pd.DataFrame({"text": ["some value1", "some value2", "sum of values"], "valeus": ["2", "4", "6"]})
with pd.ExcelWriter("test.xlsx") as writer:
create_worksheet(df, writer, "Some name with space")
return 0
def create_worksheet(df: pd.DataFrame, writer, sheet: str):
df.to_excel(writer, sheet, index=False, startrow=1, startcol=1, header=False)
wb = writer.book
ws = writer.sheets[sheet]
cell_format = wb.add_format({"bold": True, "border": True, "border_color": "white"})
cell_grid = wb.add_format({"border": True, "border_color": "white"})
ws.set_column("A:AA", 10, cell_grid)
ws.set_column("B:B", 25, cell_format)
chart_OK = create_chart_ok(wb, df, sheet)
chart_missing_values = create_chart_missing_values(wb, df, sheet)
chart_missing_chart = create_chart_missing_chart(wb, df, sheet)
chart_missing_x = create_chart_missing_x(wb, df, sheet)
# This creates a visible chart even if the values to display are stored in excel as string,
# the only odd thing is the missing cross reference between the chart and the data
ws.insert_chart("B6", chart_OK)
# This creates a chart, where instead of the values from B2 and B3 only "1" and "2" are shown in the legend
ws.insert_chart("B22", chart_missing_values)
# This would actually create a valid chart crossreferencing correctly to the data,
# if the values for the chart in C2 and C3 where handled by excel as numbers instead of "general"
ws.insert_chart("K6", chart_missing_chart)
# Here we have the missing chart, due to the values in C2 and C3 being seen as text
# additionally there is also missing values for the legend
ws.insert_chart("K22", chart_missing_x)
return 0
def create_chart_ok(wb, df: pd.DataFrame, sheet: str):
chart = wb.add_chart({'type': 'pie'})
chart.set_title({"name": "Chart OK", "name_font": {"bold": False, "size": 14}})
chart.add_series({
# Mind the missing ' characters around the sheet name in values
"values": "=" + str(sheet) + "!$C$2:" +
xl_rowcol_to_cell(len(df.index) - 1, 2, row_abs=True, col_abs=True),
# Mind the ' characters around the sheet name in values
"categories": "='" + str(sheet) + "'!$B$2:" +
xl_rowcol_to_cell(len(df.index) - 1, 1, row_abs=True, col_abs=True),
'points': [
{'fill': {'color': "9bbb59"}},
{'fill': {'color': '4f81bd'}},
],
})
return chart
def create_chart_missing_values(wb, df: pd.DataFrame, sheet: str):
chart = wb.add_chart({'type': 'pie'})
chart.set_title({"name": "Missing Values in legend", "name_font": {"bold": False, "size": 14}})
chart.add_series({
# Mind the missing ' characters around the sheet name in values
"values": "=" + str(sheet) + "!$C$2:" +
xl_rowcol_to_cell(len(df.index) - 1, 2, row_abs=True, col_abs=True),
# Mind the missing ' characters around the sheet name in categories
"categories": "=" + str(sheet) + "!$B$2:" +
xl_rowcol_to_cell(len(df.index) - 1, 1, row_abs=True, col_abs=True),
'points': [
{'fill': {'color': "9bbb59"}},
{'fill': {'color': '4f81bd'}},
],
})
return chart
def create_chart_missing_chart(wb, df: pd.DataFrame, sheet: str):
chart = wb.add_chart({'type': 'pie'})
chart.set_title({"name": "Missing chart", "name_font": {"bold": False, "size": 14}})
chart.add_series({
# Mind the ' characters around the sheet name in values
"values": "='" + str(sheet) + "'!$C$2:" +
xl_rowcol_to_cell(len(df.index) - 1, 2, row_abs=True, col_abs=True),
# Mind the ' characters around the sheet name in categories
"categories": "='" + str(sheet) + "'!$B$2:" +
xl_rowcol_to_cell(len(df.index) - 1, 1, row_abs=True, col_abs=True),
'points': [
{'fill': {'color': "9bbb59"}},
{'fill': {'color': '4f81bd'}},
],
})
return chart
def create_chart_missing_x(wb, df: pd.DataFrame, sheet: str):
chart = wb.add_chart({'type': 'pie'})
chart.set_title({"name": "Missing whatever", "name_font": {"bold": False, "size": 14}})
chart.add_series({
# Mind the ' characters around the sheet name in values
"values": "='" + str(sheet) + "'!$C$2:" +
xl_rowcol_to_cell(len(df.index) - 1, 2, row_abs=True, col_abs=True),
# Mind the missing ' characters around the sheet name in categories
"categories": "=" + str(sheet) + "!$B$2:" +
xl_rowcol_to_cell(len(df.index) - 1, 1, row_abs=True, col_abs=True),
'points': [
{'fill': {'color': "9bbb59"}},
{'fill': {'color': '4f81bd'}},
],
})
return chart
if __name__ == "__main__":
main()
Upvotes: 1
Views: 397
Reputation: 41644
Xlsxwriter charts odd behaviour with sheet names consisting of multiple words and writing numbers as text into excel. I have found a rather odd behaviour by chance, but could not come up with an explanation for it yet. Maybe someone has an explanation for it.
This odd behaviour is really just Excel's default behaviour when you try to create charts in the way you are trying to create them. If you replicated the same charts manually in Excel then you would get the same results. XlsxWriter is just creating the charts you tell it to create.
Some of the issues in the examples are:
To fix the first of these issues you can manually single quote the worksheet name or better still let XlsxWriter do it via the add_series()
list syntax. The second issue can be fixed by converting the string values to numbers or using the XlsxWriter strings_to_numbers
option to do it.
Changing the example to do both of these, and fix the colour names, would give you something like this:
import pandas as pd
def main():
df = pd.DataFrame({'text': ['some value1', 'some value2', 'sum of values'],
'values': ['2', '4', '6']})
options = {'options': {'strings_to_numbers': True}}
with pd.ExcelWriter('test.xlsx', engine='xlsxwriter',
engine_kwargs=options) as writer:
create_worksheet(df, writer, 'Some name with space')
return 0
def create_worksheet(df: pd.DataFrame, writer, sheet: str):
df.to_excel(writer, sheet, index=False, startrow=1, startcol=1,
header=False)
wb = writer.book
ws = writer.sheets[sheet]
cell_format = wb.add_format({'bold': True, 'border': True,
'border_color': 'white'})
cell_grid = wb.add_format({'border': True,
'border_color': 'white'})
ws.set_column('A:AA', 10, cell_grid)
ws.set_column('B:B', 25, cell_format)
chart_OK = create_chart_ok(wb, df, sheet)
ws.insert_chart('B6', chart_OK)
return 0
def create_chart_ok(wb, df: pd.DataFrame, sheet: str):
chart = wb.add_chart({'type': 'pie'})
chart.set_title({'name': 'Chart OK',
'name_font': {'bold': False, 'size': 14}})
first_row = 1
last_row = first_row + df.shape[0] - 2
chart.add_series({
'categories': [sheet, first_row, 1, last_row, 1],
'values': [sheet, first_row, 2, last_row, 2],
'points': [
{'fill': {'color': '#9bbb59'}},
{'fill': {'color': '#4f81bd'}},
],
})
return chart
if __name__ == '__main__':
main()
Output:
And finally, a better way to hide the worksheet gridlines would be to use the worksheet.hide_gridliens() method.
Upvotes: 1