Reputation: 103
I'm trying to read data from the excel file that has merged_cells_range... but the output is not my goal. Pls help me out
import openpyxl
wb = openpyxl.load_workbook('book1.xlsx')
sheet = wb.get_sheet_by_name('info')
all_data=[]
print(sheet.merged_cells.ranges)
for row_index in range(1,sheet.max_row+1):
row=[]
for col_index in range(1,sheet.max_column+1):
vals = sheet.cell(row_index,col_index).value
if vals =='':
for crange in sheet.merged_cells.ranges:
rlo,rhi,clo,chi = crange
if rlo<=row_index and row_index<rhi and clo<=col_index and col_index<chi:
vals = sheet.cell(rlo,clo).value
print(vals)
break
row.append(vals)
all_data.append(row)
print(all_data)
for row in all_data:
sheet.append(row)
wb.save('bbbb.xlsx')
I desired to get output: [['06B', 'Daewoo BC 212', 80, 1373], ['06C', 'Daewoo BC 212', 80, 1020], ['06D', 'Transinco B60KL', 60, 1061], ['06D', 'Transinco B60KL', 60, 19], ['06E', 'Daewoo BC 212', 80, 1020], ['06E', 'Daewoo BC 212', 60, 1061], ['06E', 'Daewoo BC 212', 60, 19]] but results is:
[['06B', 'Daewoo BC 212', 80, 1373], ['06C', 'Daewoo BC 212', 80, 1020], ['06D', 'Transinco B60KL', 60, 1061], [None, None, 60, 19], ['06E', 'Daewoo BC 212', 80, 1020], [None, None, 60, 1061], [None, None, 60, 19]]
Upvotes: 1
Views: 13305
Reputation: 414
I think this is more clear than the existed answer.
from openpyxl.cell import MergedCell
def parser_merged_cell(sheet: openpyxl.Workbook, row, col):
cell = sheet.cell(row=row, column=col)
if isinstance(cell, MergedCell):
for merged_range in sheet.merged_cells.ranges:
if cell.coordinate in merged_range:
# return the left top cell
cell = sheet.cell(row=merged_range.min_row, column=merged_range.min_col)
break
return cell.value
Upvotes: 5
Reputation: 103
I amend my code and it's work.
import openpyxl
from openpyxl.utils import range_boundaries
wb = openpyxl.load_workbook('book1.xlsx')
sheet = wb.get_sheet_by_name('info')
all_data=[]
for row_index in range(1,sheet.max_row+1):
row=[]
for col_index in range(1,sheet.max_column+1):
vals = sheet.cell(row_index,col_index).value
if vals == None:
for crange in sheet.merged_cells:
clo,rlo,chi,rhi = crange.bounds
top_value = sheet.cell(rlo,clo).value
if rlo<=row_index and row_index<=rhi and clo<=col_index and col_index<=chi:
vals = top_value
print(vals)
break
row.append(vals)
all_data.append(row)
print(all_data)
for row in all_data:
sheet.append(row)
wb.save('bbbb.xlsx')
Upvotes: 5
Reputation: 5740
There You go:
=^..^=
import openpyxl
from openpyxl import Workbook
# load data
raw_data = openpyxl.load_workbook('data.xlsx')
select_sheet = raw_data['Sheet1']
# collect data from rows
valid_row = []
data = []
for row in select_sheet.iter_rows(max_row=select_sheet.max_row, max_col=select_sheet.max_column):
# get cell values
row_data = [cell.value for cell in row]
# handle merged cells
new_row_data = [0]*select_sheet.max_column
if None in row_data:
new_row_data[0] = valid_row[0]
new_row_data[1] = valid_row[1]
new_row_data[2] = row_data[2]
new_row_data[3] = row_data[3]
data.append(new_row_data)
else:
data.append(row_data)
# storage valid row
if None not in row_data:
valid_row = row_data
# save data
book = Workbook()
new_sheet = book.active
for row in data:
new_sheet.append(row)
book.save('new_data.xlsx')
Input:
0 1 2 3
0 B 212 80 1.2
1 C 212 80 1.3
2 D B60 60 1.4
3 None None 60 1.5
4 E 212 80 1.6
5 None None 60 1.7
6 None None 60 1.8
Output:
0 1 2 3
0 B 212 80 1.2
1 C 212 80 1.3
2 D B60 60 1.4
3 D B60 60 1.5
4 E 212 80 1.6
5 E 212 60 1.7
6 E 212 60 1.8
Upvotes: 1