Reputation: 154
I want to append data from a Pandas DataFrame
to both an existing excel sheet and an excel table (ListObject
). I'm using openpyxl
to do so.
I'm now writing the code for writing to a sheet. I have the following:
def _append_to_excel_sheet(self
, data_to_write: pd.DataFrame
, excel_file: str
, sheet_name: str
, **kwargs
) -> bool:
try:
if Path(excel_file).exists():
# Load existing workbook
self.logger.debug(f"Appending {len(data_to_write)} rows to sheet {sheet_name} in {excel_file}")
with open (excel_file, "rb") as f:
wb = load_workbook(f
, read_only=False
, keep_vba=True
, data_only=False
, keep_links=True
, rich_text=True
)
self.logger.debug(wb.sheetnames)
ws = wb[sheet_name] if sheet_name in wb.sheetnames else wb.create_sheet(sheet_name)
# Find last row with data
last_row = ws.max_row
# Write new data
for idx, row in enumerate(data_to_write.values):
self.logger.debug(f"Appending row: {row} to row: {last_row + idx + 1}")
for col_idx, value in enumerate(row, 1):
ws.cell(row=last_row + idx + 1, column=col_idx, value=value)
self.logger.debug(f"New range: {ws.cell(row=last_row + 1, column=1).coordinate}:{ws.cell(row=last_row + len(data_to_write), column=len(data_to_write.columns)).coordinate}")
self.logger.debug(f"Saving to file {excel_file}")
wb.save(excel_file)
else:
# Create new file if it doesn't exist
self.logger.debug(f"Creating new file {excel_file} and writing {len(data_to_write)} rows to sheet {sheet_name}")
with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
data_to_write.to_excel(
writer,
sheet_name=sheet_name,
index=False
)
except Exception as e:
self.logger.error(f"Failed to write to Excel: {str(e)}")
raise
finally:
wb.close()
When I run this code, the logger
object reaches the self.logger.debug(f"Saving to file {excel_file}")
line without raising any exception. Furthermore, I never see an Exception
raised: self.logger.error(f"Failed to write to Excel: {str(e)}")
is unreachable as far as my tests are concerned.
I have looked at openpyxl's documentation and several answers to similar questions in SO (this one and this one primarily) but haven't been able to find what I'm doing wrong in my code. The str
path I pass to the function is absolute.
I know I can use pandas to append a DataFrame to an existing sheet, and this would be an ideal solution but I also want this functionality with Excel tables.
Is there a way to enable verbose mode to see what openpyxl
is doing behind the scenes? Am I missing an edge case in which saving a workbook with the same name is forbidden? What alternatives can I look into if I'm unable to fix the issue I have?
Edit
To add more context on how I'm calling the code, the function is a method of a class called ExcelOutputHandler
. I'm calling it from a Unittest.TestCase
as follows:
from datetime import datetime
from pathlib import Path
import sys
import unittest
from src.core.types import City # this is a NamedTuple
from src.output_handler import ExcelOutputHandler
import logging
class test_output_handler(unittest.TestCase):
@classmethod
def setUpClass(cls) -> None:
cls.logger = logging.getLogger()
cls.logger.setLevel(logging.DEBUG)
cls.logger.addHandler(logging.StreamHandler(sys.stdout))
cls.test_dir = Path(__file__).parent
cls.xl_testfile = cls.test_dir / f"./output_history/{datetime.now().strftime("%Y-%m-%d %H-%M-%S")}.xlsx"
with open(cls.test_dir / "./test_worksheet.xlsx", "rb") as template_testfile:
with open(cls.xl_testfile, "wb+") as testfile:
testfile.write(template_testfile.read())
@classmethod
def tearDownClass(cls) -> None:
pass
def setUp(self) -> None:
pass
def tearDown(self) -> None:
pass
def test_write_to_sheet_overlay(self) -> None:
handler = ExcelOutputHandler(self.logger)
data = [
City('London', 'UK', 'EU', 'Rainy', 50, 'S', 5)
, City('Paris', 'FR', 'EU', 'Sunny', 10, 'A', 6)
, City('Berlin', 'DE', 'EU', 'Cold', 20, 'A', 3)
, City('Brussels', 'BE', 'EU', 'Cold', 10, 'B', 6)
, City('Lisbon', 'PT', 'EU', 'Sunny', 20, 'S+', 7)
, City('Oslo', 'NW', 'EU', 'Cold', 10, 'S', 3)
, City('Vienna', 'AT', 'EU', 'Cold', 10, 'A+', 8)
]
handler._append_to_excel_sheet(data, str(self.xl_testfile), "Sheet2")
handler._append_to_excel_sheet(data, str(self.xl_testfile), "Sheet2")
# Verify the results
import openpyxl
wb = openpyxl.load_workbook(str(self.xl_testfile))
ws = wb["Sheet2"]
# Get the number of rows with data (excluding header)
row_count = sum(1 for row in ws.iter_rows(min_row=2) if any(cell.value for cell in row))
# Assert we have twice the number of data rows
self.assertEqual(row_count, len(data) * 2,
f"Expected {len(data) * 2} rows but found {row_count}")
wb.close()
In the test, I expect the excel file to have 14 data rows, but it always fails with AssertionError: 0 != 14 : Expected 14 rows but found 0
. From this answer I assume the code called to set up the xl_testfile
will copy the test_worksheet
file and then allow the tests to access it.
Upvotes: 0
Views: 69
Reputation: 154
The code I've shared above was adapted for brevity and to not share confidential data. It turns out I opened a pd.ExcelWriter
before calling the function and never closed it. Removing that line allows the code to save the workbook as expected. I still don't know why it raised no error, I would have noticed the issue much sooner.
Upvotes: 0