Reputation: 789
I am trying to write into excel a pivot view of the following table. I am reading from index sheet of existing workbook(input.xlsx) and filtering for ID in excel DB(db1.xlsx) and trying to print pivot view of those dynamically in the input.xlsx.
Index sheet of the input workbook:-
db1.xlsx:-
ID NAME SEGMENT LEVEL PARAMETER VALUE REMARKS
11 NAME2 set1 L ball 32000 GREEN
11 NAME2 set1 M ball 30000 GREEN
11 NAME2 set1 H ball 29000 GREEN
11 NAME2 set1 L egg 68 GREEN
11 NAME2 set1 M egg 67 GREEN
11 NAME2 set1 H egg 62 GREEN
11 NAME2 set1 L tin 67667 GREEN
11 NAME2 set1 M tin 60852 GREEN
11 NAME2 set1 H tin 50434 GREEN
11 NAME2 set1 L rat 6 GREEN
11 NAME2 set1 M rat 5 GREEN
11 NAME2 set1 H rat 4 GREEN
11 NAME2 set1 L pen 3 GREEN
11 NAME2 set1 M pen 2 GREEN
11 NAME2 set1 H pen 1 GREEN
11 NAME2 set2 L ball 2000 GREEN
11 NAME2 set2 M ball 2000 GREEN
11 NAME2 set2 H ball 2000 GREEN
11 NAME2 set2 L egg 67 GREEN
11 NAME2 set2 M egg 67 GREEN
11 NAME2 set2 H egg 66 GREEN
11 NAME2 set2 L tin 11078 GREEN
11 NAME2 set2 M tin 10633 GREEN
11 NAME2 set2 H tin 10271 GREEN
11 NAME2 set2 L rat 5 GREEN
11 NAME2 set2 M rat 4 GREEN
11 NAME2 set2 H rat 3 GREEN
11 NAME2 set2 L pen 4 GREEN
11 NAME2 set2 M pen 3 GREEN
11 NAME2 set2 H pen 2 GREEN
11 NAME2 set1 L ball 32000 TSS
11 NAME2 set1 M ball 30000 TSS
11 NAME2 set1 H ball 29000 TSS
11 NAME2 set1 L egg 68 TSS
11 NAME2 set1 M egg 67 TSS
11 NAME2 set1 H egg 62 TSS
11 NAME2 set1 L tin 100 TSS
11 NAME2 set1 M tin 100 TSS
11 NAME2 set1 H tin 100 TSS
11 NAME2 set1 L rat 6 TSS
11 NAME2 set1 M rat 5 TSS
11 NAME2 set1 H rat 4 TSS
11 NAME2 set1 L pen 1 TSS
11 NAME2 set1 M pen 1 TSS
11 NAME2 set1 H pen 1 TSS
11 NAME2 set2 L ball 2000 TSS
11 NAME2 set2 M ball 2000 TSS
11 NAME2 set2 H ball 2000 TSS
11 NAME2 set2 L egg 67 TSS
11 NAME2 set2 M egg 67 TSS
11 NAME2 set2 H egg 66 TSS
11 NAME2 set2 L tin 100 TSS
11 NAME2 set2 M tin 100 TSS
11 NAME2 set2 H tin 100 TSS
11 NAME2 set2 L rat 5 TSS
11 NAME2 set2 M rat 4 TSS
11 NAME2 set2 H rat 3 TSS
11 NAME2 set2 L pen 1 TSS
11 NAME2 set2 M pen 1 TSS
11 NAME2 set2 H pen 1 TSS
11 NAME2 set1 NOT APPLICABLE max 800 GREEN
11 NAME2 set2 NOT APPLICABLE max 300 GREEN
11 NAME2 set1 NOT APPLICABLE max 1300 TSS
11 NAME2 set2 NOT APPLICABLE max 500 TSS
Code I wrote to get pivot view :-
from openpyxl import load_workbook
from openpyxl.styles import Alignment
import pandas as pd,os,sys
os.chdir(r'')
def fetchDatabaseRowsAsDataFrame(country_id, remark, database_table_name):
database_table_df = pd.read_excel(database_table_name)
return database_table_df.query('ID == ' + str(country_id) + ' and REMARKS == "' + str(remark) + '"')
wb = load_workbook('input.xlsx')
try:
index_sheet = wb['Index']
input_df = pd.read_excel('input.xlsx', sheet_name= 'Index')
for ind in input_df.index:
wb.create_sheet(str(input_df['ID'][ind]))
current_sheet = wb[str(input_df['ID'][ind])]
current_sheet.cell(1,1).value = "Index sheet"
current_sheet.cell(1,1).hyperlink = "input.xlsx#Index!A1"
current_sheet.cell(1,1).style = "Hyperlink"
heading_row = 3
start_row = heading_row + 2
remarks_array = ['REMARKS1', 'REMARKS2', 'REMARKS3']
db_array = ['db1.xlsx', 'db1.xlsx', 'db1.xlsx']
empty = True
for i in range(len(remarks_array)):
db_rows = fetchDatabaseRowsAsDataFrame(input_df['ID'][ind], input_df[remarks_array[i]][ind], db_array[i])
if (db_rows is not None) and (len(db_rows) > 0):
empty = False
current_sheet.cell(heading_row-1, 3).value = remarks_array[i] + "-" + input_df[remarks_array[i]][ind]
for ind_db_rows in db_rows.index:
if pd.isnull(db_rows['LEVEL'][ind_db_rows]) or db_rows['LEVEL'][ind_db_rows] == 'NOT APPLICABLE':
db_rows.at[ind_db_rows, 'LEVEL'] = 'NA'
pivot_db_rows = pd.pivot_table(db_rows, values = 'VALUE', index=['SEGMENT','PARAMETER'], columns = 'LEVEL').reset_index()
pivot_columns = list(pivot_db_rows)[2:]
pivot_segments = pivot_db_rows['SEGMENT'].unique()
pivot_parameters = pivot_db_rows['PARAMETER'].unique()
pivot_parameter_column_index = {}
initial_pivot_segment_column_index = 4
paramater_column_map = {}
for pivot_parameter_index in range(len(pivot_parameters)):
current_sheet.merge_cells(start_row=heading_row,start_column=initial_pivot_segment_column_index,end_row=heading_row,end_column=initial_pivot_segment_column_index+len(pivot_columns)-1)
current_sheet.cell(heading_row,initial_pivot_segment_column_index).value = pivot_parameters[pivot_parameter_index]
current_sheet.cell(heading_row,initial_pivot_segment_column_index).alignment = Alignment(horizontal='center')
pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] = initial_pivot_segment_column_index
for pivot_column_index in range(len(pivot_columns)):
current_sheet.cell(heading_row+1,initial_pivot_segment_column_index+pivot_column_index).value = pivot_columns[pivot_column_index]
initial_pivot_segment_column_index = initial_pivot_segment_column_index+len(pivot_columns)
current_sheet.cell(heading_row+1,3).value = 'SEGMENT'
for pivot_segment_index in range(len(pivot_segments)):
current_sheet.cell(heading_row+2+pivot_segment_index,3).value=pivot_segments[pivot_segment_index]
pivot_dictionary = {}
for ind_pivot_db_rows in pivot_db_rows.index:
if pivot_db_rows['SEGMENT'][ind_pivot_db_rows] not in pivot_dictionary:
pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]] = {}
if pivot_db_rows['PARAMETER'][ind_pivot_db_rows] not in pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]]:
pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]] = {}
for pivot_column_index in range(len(pivot_columns)):
pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]][pivot_columns[pivot_column_index]] = pivot_db_rows[pivot_columns[pivot_column_index]][ind_pivot_db_rows]
for pivot_segment_index in range(len(pivot_segments)):
for pivot_parameter_index in range(len(pivot_parameters)):
for pivot_column_index in range(len(pivot_columns)):
if pivot_segments[pivot_segment_index] in pivot_dictionary and pivot_parameters[pivot_parameter_index] in pivot_dictionary[pivot_segments[pivot_segment_index]] and pivot_columns[pivot_column_index] in pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]]:
current_sheet.cell(heading_row+2+pivot_segment_index, pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] + pivot_column_index).value = pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]][pivot_columns[pivot_column_index]]
heading_row = heading_row + len(pivot_segments) + 4
if empty:
wb.remove(current_sheet)
wb.save('input.xlsx')
wb.close()
except Exception as e:
print("Exception occured "+str(e))
wb.save('input.xlsx')
wb.close()
I am unable to drop the columns which are empty. I also want the parameters which have NA level for whichever ID applicable to come as the first column in the pivot view table as shown in expected output.
Upvotes: 1
Views: 217
Reputation: 195553
Try:
df = df.replace("NOT APPLICABLE", "")
x = df[df.REMARKS.eq("GREEN")].pivot("SEGMENT", ["PARAMETER", "LEVEL"], "VALUE")
x = x.reindex(
pd.MultiIndex.from_tuples(
sorted(x.columns, key=lambda k: (k[1] != "", k[0], k[1]))
),
axis=1,
)
print(x)
Prints:
max ball egg pen rat tin
H L M H L M H L M H L M H L M
SEGMENT
set1 800 29000 32000 30000 62 68 67 1 3 2 4 6 5 50434 67667 60852
set2 300 2000 2000 2000 66 67 67 2 4 3 3 5 4 10271 11078 10633
Upvotes: 2