Reputation: 21
I want to merge multiple excel files with multiple sheets respectively using python. I do not want to lose any formatting from the sheets. It should copy all sheets and just create a single excel file.
I'm able to merge only the first sheet and also all formatting is lost.
This is my code:
import os
import os.path
import xlrd
import xlsxwriter
file_name = input("merge")
merged_file_name = file_name + ".xls"
dest_book = xlsxwriter.Workbook('m.xls')
dest_sheet_1 = dest_book.add_worksheet()
dest_row = 1
temp = 0
path = input("C:\\test")
out = os.path.isdir("")
print(out)
print("File path: " + path)
for root,dirs, files in os.walk("C:\\test"):
for xlsfile in files:
print ("File in mentioned folder is: " + xlsfile)
temp_book = xlrd.open_workbook(os.path.join(root,xlsfile))
temp_sheet = temp_book.sheet_by_index(0)
if temp == 0:
for col_index in range(temp_sheet.ncols):
str = temp_sheet.cell_value(0, col_index)
dest_sheet_1.write(0, col_index, str)
temp = temp + 1
for row_index in range(1, temp_sheet.nrows):
for col_index in range(temp_sheet.ncols):
str = temp_sheet.cell_value(row_index, col_index)
dest_sheet_1.write(dest_row, col_index, str)
dest_row = dest_row + 1
dest_book.close()
book = xlrd.open_workbook("m.xls")
sheet = book.sheet_by_index(0)
print ("number of rows in destination file are: "), sheet.nrows
print ("number of columns in destination file are: "), sheet.ncols
Upvotes: 2
Views: 3611
Reputation: 1
Regarding to the error faced by Sachin Ingle,
(-2147352567, 'Exception occurred.', (0, 'Microsoft Excel', 'That name is already taken. Try a different one.', 'xlmain11.chm', 0, -2146827284), None) facing this error
It's probably because you have created a file of same name before. Try create it with different name.
The answer by Parfait on
new_wb.SaveAs(Filename='MasterMerge.xlsx', FileFormat=xlWorkbookDefault)
will make the file named "MasterMerge.xlsx" and probably you have created the file already.
And btw can add in xlapp.Quit() at finally: block to solve the in-use problem
I done some changes on Parfait answer (Thanks mate)
def merge_excel_files(filepath_list,filename,delete_original_files=False):
import os, errno
import win32com.client as win32
try:
# INITIALIZE EXCEL COM APP
xlapp = win32.gencache.EnsureDispatch('Excel.Application')
# ASSIGN CONSTANTS
xlPasteValues = -4163; lPasteFormats = -4122; xlWorkbookDefault = 51
# CREATE NEW WOKRBOOK (PROMPTS IF EXISTS)
new_wb = xlapp.Workbooks.Add()
new_wb.SaveAs(Filename=filename, FileFormat=xlWorkbookDefault)
# Gain filename in a directory
# xl_files = [f for f in os.listdir(path) if f.endswith('.xls') or f.endswith('.xlsx')]
for wb in filepath_list:
xlwb = xlapp.Workbooks.Open(wb)
# LOOP THROUGH EVERY WORKSHEET, COPYING TO NEW WORKSHEET
for xlsh in xlwb.Worksheets:
new_sh = new_wb.Worksheets.Add()
new_sh.Name = xlsh.Name
new_wb.Save()
new_sh.Move(After=new_wb.Worksheets(new_wb.Worksheets.Count))
xlsh.Cells.Copy(new_sh.Cells)
new_sh = None
xlwb.Close(False)
xlwb = None
# REMOVNIG DEFAULT SHEET AND LAUNCHING TO SCREEN
new_wb.Worksheets('Sheet1').Delete()
new_wb.Save()
# xlapp.Visible = True
except Exception as e:
print(e)
finally:
# Close the Excel file since done writing
xlapp.Quit()
# RELEASE RESOURCES
xlsh = None; new_sh = None;
xlwb = None; new_wb = None; xlapp = None
# Delete the initial file
if delete_original_files:
for count,x in enumerate(filepath_list):
print(f"Deleting the {count+1}/{len(filepath_list)} original file(s)...")
try:
os.remove(x)
except OSError as e:
# No such file or directory
if e.errno != errno.ENOENT:
raise
else:
# If there's no exception
print(f"Deleted {x}")
## Merge Excel files into one workbook with keeping the sheets and styling/formatting
# => https://stackoverflow.com/questions/51986517/merge-two-excel-files-with-multiple-sheet-without-losing-formatting
# => https://stackoverflow.com/questions/44593705/how-to-copy-over-an-excel-sheet-to-another-workbook-in-python/44596301#44596301 [openpyxl (Can't keep formatting), pywin32, xlwings]
# => https://stackoverflow.com/questions/56687602/copy-excel-sheet-from-one-worksheet-to-another-in-python/56688138#56688138 [xlwings]
## Solve file in use problem with pywin32 solution from questions/51986517
# => https://stackoverflow.com/questions/6337595/python-win32-com-closing-excel-workbook/6338030
## Basic Python: Pythonic way to delete a files, running code if try statements were successful
# => https://stackoverflow.com/questions/10840533/most-pythonic-way-to-delete-a-file-which-may-not-exist
# => https://stackoverflow.com/questions/2792568/running-code-if-try-statements-were-successful-in-python
## Research on openpyxl copy_worksheet(); Conclusion: it can only copy and paste sheet within same workbook. =(
# => https://stackoverflow.com/questions/44593705/how-to-copy-over-an-excel-sheet-to-another-workbook-in-python/44596301
# => https://openpyxl.readthedocs.io/en/latest/tutorial.html?highlight=copy_worksheet#manipulating-a-workbook-in-memory
Upvotes: 0
Reputation: 107652
Since you require Excel specific needs like formatting, consider directly interfacing to the Excel object library with a COM interface. Of course this assumes you have Excel installed on machine. For Windows, Python can run COM with the win32com
library and this connects beyond Excel but to most Windows apps and objects including Notepad, Paint, even ADODB.
Essentially, this mirrors VBA (which does a similar interface to the Excel object library) using Workbooks.Add, Sheets.Add, Range.Copy, and other methods. All other APIs such as xlrd
and xlwriter
do not directly use Excel methods and hence why you lose formatting even graphics but not data.
import os
import win32com.client as win32
path = input("C:\\test")
file_name = input("merge")
merged_file_name = file_name + ".xlsx"
try:
# INITIALIZE EXCEL COM APP
xlapp = win32.gencache.EnsureDispatch('Excel.Application')
# ASSIGN CONSTANTS
xlPasteValues = -4163; lPasteFormats = -4122; xlWorkbookDefault = 51
# CREATE NEW WOKRBOOK (PROMPTS IF EXISTS)
new_wb = xlapp.Workbooks.Add()
new_wb.SaveAs(Filename='MasterMerge.xlsx', FileFormat=xlWorkbookDefault)
# LOOP THROUGH WORKBOOKS
xl_files = [f for f in os.listdir(path) if f.endswith('.xls') or f.endswith('.xlsx')]
for wb in xl_files:
xlwb = xlapp.Workbooks.Open(os.path.join(path, wb))
# LOOP THROUGH EVERY WORKSHEET, COPYING TO NEW WORKSHEET
for xlsh in xlwb.Worksheets:
new_sh = new_wb.Worksheets.Add()
new_sh.Name = xlsh.Name
new_wb.Save()
new_sh.Move(After=new_wb.Worksheets(new_wb.Worksheets.Count))
xlsh.Cells.Copy(new_sh.Cells)
new_sh = None
xlwb.Close(False)
xlwb = None
# REMOVNIG DEFAULT SHEET AND LAUNCHING TO SCREEN
new_wb.Worksheets('Sheet1').Delete()
new_wb.Save()
xlapp.Visible = True
except Exception as e:
print(e)
finally:
# RELEASE RESOURCES
xlsh = None; new_sh = None;
xlwb = None; new_wb = None; xlapp = None
Upvotes: 3