Gergo Peltz
Gergo Peltz

Reputation: 29

Xlsxwriter charts odd behaviour with sheet names consisting of multiple words and writing numbers as text into excel

I have found a rather odd behaviour by chance, but could not come up with an explanation for it yet. Maybe someone has an explanation for it.

The code below produces the output in the picture. Seemingly is "Chart Ok" the one, which I am looking for, yet on a closer look (in Excel) we see that the data is not properly referenced (due to the values being seen as "general" instead of "number" format). Yet Excel is able to create the chart somewhat properly.

The actual proper way would be the chart next to it "Missing chart", however here Excel is not able to create the graph. The only difference in the code being the 's around the sheetname in create_chart_missing_chart around the values property in add_series.

Obviously the fix for this behaviour is to set the actual numbers to a number format in the dataframe. (I guess it's the best to do before the data is written into Excel)

I am using:

Resulting charts

import pandas as pd
from xlsxwriter.utility import xl_rowcol_to_cell


def main():
    df = pd.DataFrame({"text": ["some value1", "some value2", "sum of values"], "valeus": ["2", "4", "6"]})
    with pd.ExcelWriter("test.xlsx") as writer:
        create_worksheet(df, writer, "Some name with space")
    return 0


def create_worksheet(df: pd.DataFrame, writer, sheet: str):
    df.to_excel(writer, sheet, index=False, startrow=1, startcol=1, header=False)

    wb = writer.book
    ws = writer.sheets[sheet]
    cell_format = wb.add_format({"bold": True, "border": True, "border_color": "white"})
    cell_grid = wb.add_format({"border": True, "border_color": "white"})
    ws.set_column("A:AA", 10, cell_grid)
    ws.set_column("B:B", 25, cell_format)

    chart_OK = create_chart_ok(wb, df, sheet)
    chart_missing_values = create_chart_missing_values(wb, df, sheet)
    chart_missing_chart = create_chart_missing_chart(wb, df, sheet)
    chart_missing_x = create_chart_missing_x(wb, df, sheet)

    # This creates a visible chart even if the values to display are stored in excel as string, 
    # the only odd thing is the missing cross reference between the chart and the data
    ws.insert_chart("B6", chart_OK)
    
    # This creates a chart, where instead of the values from B2 and B3 only "1" and "2" are shown in the legend
    ws.insert_chart("B22", chart_missing_values)

    # This would actually create a valid chart crossreferencing correctly to the data, 
    # if the values for the chart in C2 and C3 where handled by excel as numbers instead of "general"
    ws.insert_chart("K6", chart_missing_chart)

    # Here we have the missing chart, due to the values in C2 and C3 being seen as text
    # additionally there is also missing values for the legend
    ws.insert_chart("K22", chart_missing_x)

    return 0


def create_chart_ok(wb, df: pd.DataFrame, sheet: str):
    chart = wb.add_chart({'type': 'pie'})
    chart.set_title({"name": "Chart OK", "name_font": {"bold": False, "size": 14}})
    chart.add_series({
        # Mind the missing ' characters around the sheet name in values
        "values": "=" + str(sheet) + "!$C$2:" +
                  xl_rowcol_to_cell(len(df.index) - 1, 2, row_abs=True, col_abs=True),
        # Mind the  ' characters around the sheet name in values
        "categories": "='" + str(sheet) + "'!$B$2:" +
                      xl_rowcol_to_cell(len(df.index) - 1, 1, row_abs=True, col_abs=True),
        'points': [
            {'fill': {'color': "9bbb59"}},
            {'fill': {'color': '4f81bd'}},
        ],
    })

    return chart


def create_chart_missing_values(wb, df: pd.DataFrame, sheet: str):
    chart = wb.add_chart({'type': 'pie'})
    chart.set_title({"name": "Missing Values in legend", "name_font": {"bold": False, "size": 14}})
    chart.add_series({
        # Mind the missing ' characters around the sheet name in values
        "values": "=" + str(sheet) + "!$C$2:" +
                  xl_rowcol_to_cell(len(df.index) - 1, 2, row_abs=True, col_abs=True),
        # Mind the missing ' characters around the sheet name in categories
        "categories": "=" + str(sheet) + "!$B$2:" +
                      xl_rowcol_to_cell(len(df.index) - 1, 1, row_abs=True, col_abs=True),
        'points': [
            {'fill': {'color': "9bbb59"}},
            {'fill': {'color': '4f81bd'}},
        ],
    })

    return chart


def create_chart_missing_chart(wb, df: pd.DataFrame, sheet: str):
    chart = wb.add_chart({'type': 'pie'})
    chart.set_title({"name": "Missing chart", "name_font": {"bold": False, "size": 14}})
    chart.add_series({
        # Mind the ' characters around the sheet name in values
        "values": "='" + str(sheet) + "'!$C$2:" +
                  xl_rowcol_to_cell(len(df.index) - 1, 2, row_abs=True, col_abs=True),
        # Mind the ' characters around the sheet name in categories
        "categories": "='" + str(sheet) + "'!$B$2:" +
                      xl_rowcol_to_cell(len(df.index) - 1, 1, row_abs=True, col_abs=True),
        'points': [
            {'fill': {'color': "9bbb59"}},
            {'fill': {'color': '4f81bd'}},
        ],
    })

    return chart


def create_chart_missing_x(wb, df: pd.DataFrame, sheet: str):
    chart = wb.add_chart({'type': 'pie'})
    chart.set_title({"name": "Missing whatever", "name_font": {"bold": False, "size": 14}})
    chart.add_series({
        # Mind the ' characters around the sheet name in values
        "values": "='" + str(sheet) + "'!$C$2:" +
                  xl_rowcol_to_cell(len(df.index) - 1, 2, row_abs=True, col_abs=True),
        # Mind the missing ' characters around the sheet name in categories
        "categories": "=" + str(sheet) + "!$B$2:" +
                      xl_rowcol_to_cell(len(df.index) - 1, 1, row_abs=True, col_abs=True),
        'points': [
            {'fill': {'color': "9bbb59"}},
            {'fill': {'color': '4f81bd'}},
        ],
    })

    return chart


if __name__ == "__main__":
    main()

Upvotes: 1

Views: 397

Answers (1)

jmcnamara
jmcnamara

Reputation: 41644

Xlsxwriter charts odd behaviour with sheet names consisting of multiple words and writing numbers as text into excel. I have found a rather odd behaviour by chance, but could not come up with an explanation for it yet. Maybe someone has an explanation for it.

This odd behaviour is really just Excel's default behaviour when you try to create charts in the way you are trying to create them. If you replicated the same charts manually in Excel then you would get the same results. XlsxWriter is just creating the charts you tell it to create.

Some of the issues in the examples are:

  1. Excel requires that worksheet names with spaces in them are single quoted. XlsxWriter warns about this.
  2. Chart "values" cannot be strings. They must be numbers.
  3. The are also issues with the color names that XlsxWriter warns about.

To fix the first of these issues you can manually single quote the worksheet name or better still let XlsxWriter do it via the add_series() list syntax. The second issue can be fixed by converting the string values to numbers or using the XlsxWriter strings_to_numbers option to do it.

Changing the example to do both of these, and fix the colour names, would give you something like this:

import pandas as pd


def main():
    df = pd.DataFrame({'text': ['some value1', 'some value2', 'sum of values'],
                       'values': ['2', '4', '6']})

    options = {'options': {'strings_to_numbers': True}}

    with pd.ExcelWriter('test.xlsx', engine='xlsxwriter',
                        engine_kwargs=options) as writer:
        create_worksheet(df, writer, 'Some name with space')

    return 0


def create_worksheet(df: pd.DataFrame, writer, sheet: str):
    df.to_excel(writer, sheet, index=False, startrow=1, startcol=1,
                header=False)

    wb = writer.book
    ws = writer.sheets[sheet]
    cell_format = wb.add_format({'bold': True, 'border': True,
                                 'border_color': 'white'})
    cell_grid = wb.add_format({'border': True,
                               'border_color': 'white'})
    ws.set_column('A:AA', 10, cell_grid)
    ws.set_column('B:B', 25, cell_format)

    chart_OK = create_chart_ok(wb, df, sheet)

    ws.insert_chart('B6', chart_OK)

    return 0


def create_chart_ok(wb, df: pd.DataFrame, sheet: str):
    chart = wb.add_chart({'type': 'pie'})
    chart.set_title({'name': 'Chart OK',
                     'name_font': {'bold': False, 'size': 14}})
    first_row = 1
    last_row = first_row + df.shape[0] - 2

    chart.add_series({
        'categories': [sheet, first_row, 1, last_row, 1],
        'values': [sheet, first_row, 2, last_row, 2],

        'points': [
            {'fill': {'color': '#9bbb59'}},
            {'fill': {'color': '#4f81bd'}},
        ],
    })

    return chart


if __name__ == '__main__':
    main()

Output:

enter image description here

And finally, a better way to hide the worksheet gridlines would be to use the worksheet.hide_gridliens() method.

Upvotes: 1

Related Questions