Scope
Scope

Reputation: 789

How to drop null/empty columns in a Pivot df while writing to excel

I am trying to write into excel a pivot view of the following table. I am reading from index sheet of existing workbook(input.xlsx) and filtering for ID in excel DB(db1.xlsx) and trying to print pivot view of those dynamically in the input.xlsx.

Index sheet of the input workbook:-Index

db1.xlsx:-

ID  NAME SEGMENT    LEVEL   PARAMETER   VALUE   REMARKS
11  NAME2   set1    L       ball    32000   GREEN
11  NAME2   set1    M       ball    30000   GREEN
11  NAME2   set1    H       ball    29000   GREEN
11  NAME2   set1    L       egg     68      GREEN
11  NAME2   set1    M       egg     67      GREEN
11  NAME2   set1    H       egg     62      GREEN
11  NAME2   set1    L       tin     67667   GREEN
11  NAME2   set1    M       tin     60852   GREEN
11  NAME2   set1    H       tin     50434   GREEN
11  NAME2   set1    L       rat     6       GREEN
11  NAME2   set1    M       rat     5       GREEN
11  NAME2   set1    H       rat     4       GREEN
11  NAME2   set1    L       pen     3       GREEN
11  NAME2   set1    M       pen     2       GREEN
11  NAME2   set1    H       pen     1       GREEN
11  NAME2   set2    L       ball    2000    GREEN
11  NAME2   set2    M       ball    2000    GREEN
11  NAME2   set2    H       ball    2000    GREEN
11  NAME2   set2    L       egg     67      GREEN
11  NAME2   set2    M       egg     67      GREEN
11  NAME2   set2    H       egg     66      GREEN
11  NAME2   set2    L       tin     11078   GREEN
11  NAME2   set2    M       tin     10633   GREEN
11  NAME2   set2    H       tin     10271   GREEN
11  NAME2   set2    L       rat     5       GREEN
11  NAME2   set2    M       rat     4       GREEN
11  NAME2   set2    H       rat     3       GREEN
11  NAME2   set2    L       pen     4       GREEN
11  NAME2   set2    M       pen     3       GREEN
11  NAME2   set2    H       pen     2       GREEN
11  NAME2   set1    L       ball    32000   TSS
11  NAME2   set1    M       ball    30000   TSS
11  NAME2   set1    H       ball    29000   TSS
11  NAME2   set1    L       egg     68      TSS
11  NAME2   set1    M       egg     67      TSS
11  NAME2   set1    H       egg     62      TSS
11  NAME2   set1    L       tin     100     TSS
11  NAME2   set1    M       tin     100     TSS
11  NAME2   set1    H       tin     100     TSS
11  NAME2   set1    L       rat     6       TSS
11  NAME2   set1    M       rat     5       TSS
11  NAME2   set1    H       rat     4       TSS
11  NAME2   set1    L       pen     1       TSS
11  NAME2   set1    M       pen     1       TSS
11  NAME2   set1    H       pen     1       TSS
11  NAME2   set2    L       ball    2000    TSS
11  NAME2   set2    M       ball    2000    TSS
11  NAME2   set2    H       ball    2000    TSS
11  NAME2   set2    L       egg     67      TSS
11  NAME2   set2    M       egg     67      TSS
11  NAME2   set2    H       egg     66      TSS
11  NAME2   set2    L       tin     100     TSS
11  NAME2   set2    M       tin     100     TSS
11  NAME2   set2    H       tin     100     TSS
11  NAME2   set2    L       rat     5       TSS
11  NAME2   set2    M       rat     4       TSS
11  NAME2   set2    H       rat     3       TSS
11  NAME2   set2    L       pen     1       TSS
11  NAME2   set2    M       pen     1       TSS
11  NAME2   set2    H       pen     1       TSS
11  NAME2   set1    NOT APPLICABLE  max 800 GREEN
11  NAME2   set2    NOT APPLICABLE  max 300 GREEN
11  NAME2   set1    NOT APPLICABLE  max 1300 TSS
11  NAME2   set2    NOT APPLICABLE  max 500 TSS

Code I wrote to get pivot view :-

from openpyxl import load_workbook
from openpyxl.styles import Alignment
import pandas as pd,os,sys


os.chdir(r'')


def fetchDatabaseRowsAsDataFrame(country_id, remark, database_table_name):
    database_table_df = pd.read_excel(database_table_name)
    
    return database_table_df.query('ID == ' + str(country_id) + ' and REMARKS == "' + str(remark) + '"') 

wb = load_workbook('input.xlsx')
try:
    index_sheet = wb['Index']
    input_df = pd.read_excel('input.xlsx', sheet_name= 'Index')
    for ind in input_df.index:
        wb.create_sheet(str(input_df['ID'][ind]))
        current_sheet = wb[str(input_df['ID'][ind])]
        current_sheet.cell(1,1).value = "Index sheet"
        current_sheet.cell(1,1).hyperlink = "input.xlsx#Index!A1"
        current_sheet.cell(1,1).style = "Hyperlink"
        heading_row = 3
        start_row = heading_row + 2
        
        remarks_array = ['REMARKS1', 'REMARKS2', 'REMARKS3']
        
        db_array = ['db1.xlsx', 'db1.xlsx', 'db1.xlsx']
        empty = True
        
        for i in range(len(remarks_array)):
            
            db_rows = fetchDatabaseRowsAsDataFrame(input_df['ID'][ind], input_df[remarks_array[i]][ind], db_array[i])
            
            if (db_rows is not None) and  (len(db_rows) > 0):
                empty = False
                current_sheet.cell(heading_row-1, 3).value = remarks_array[i] + "-" + input_df[remarks_array[i]][ind] 
                for ind_db_rows in db_rows.index:
                    if pd.isnull(db_rows['LEVEL'][ind_db_rows]) or db_rows['LEVEL'][ind_db_rows] == 'NOT APPLICABLE':
                        db_rows.at[ind_db_rows, 'LEVEL'] = 'NA'
                
                pivot_db_rows = pd.pivot_table(db_rows, values = 'VALUE', index=['SEGMENT','PARAMETER'], columns = 'LEVEL').reset_index()
                pivot_columns = list(pivot_db_rows)[2:]
                pivot_segments = pivot_db_rows['SEGMENT'].unique()
                pivot_parameters = pivot_db_rows['PARAMETER'].unique()
                pivot_parameter_column_index = {}
                initial_pivot_segment_column_index = 4
                paramater_column_map = {}
                
                for pivot_parameter_index in range(len(pivot_parameters)):
                    current_sheet.merge_cells(start_row=heading_row,start_column=initial_pivot_segment_column_index,end_row=heading_row,end_column=initial_pivot_segment_column_index+len(pivot_columns)-1)
                    
                    current_sheet.cell(heading_row,initial_pivot_segment_column_index).value = pivot_parameters[pivot_parameter_index]
                    current_sheet.cell(heading_row,initial_pivot_segment_column_index).alignment = Alignment(horizontal='center')
                    pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] = initial_pivot_segment_column_index
                    for pivot_column_index in range(len(pivot_columns)):
                        current_sheet.cell(heading_row+1,initial_pivot_segment_column_index+pivot_column_index).value = pivot_columns[pivot_column_index]
                    
                    initial_pivot_segment_column_index = initial_pivot_segment_column_index+len(pivot_columns)
                
                current_sheet.cell(heading_row+1,3).value = 'SEGMENT'
                
                for pivot_segment_index in range(len(pivot_segments)):
                    current_sheet.cell(heading_row+2+pivot_segment_index,3).value=pivot_segments[pivot_segment_index]
                
                pivot_dictionary = {}
                for ind_pivot_db_rows in pivot_db_rows.index:
                    if pivot_db_rows['SEGMENT'][ind_pivot_db_rows] not in pivot_dictionary:
                        pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]] = {}
                    if pivot_db_rows['PARAMETER'][ind_pivot_db_rows] not in pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]]:
                        pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]] = {}
                    
                    for pivot_column_index in range(len(pivot_columns)):
                        pivot_dictionary[pivot_db_rows['SEGMENT'][ind_pivot_db_rows]][pivot_db_rows['PARAMETER'][ind_pivot_db_rows]][pivot_columns[pivot_column_index]] = pivot_db_rows[pivot_columns[pivot_column_index]][ind_pivot_db_rows]
                
                
                for pivot_segment_index in range(len(pivot_segments)):
                    for pivot_parameter_index in range(len(pivot_parameters)):
                        for pivot_column_index in range(len(pivot_columns)):
                            if pivot_segments[pivot_segment_index] in pivot_dictionary and pivot_parameters[pivot_parameter_index] in pivot_dictionary[pivot_segments[pivot_segment_index]] and pivot_columns[pivot_column_index] in pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]]:
                                current_sheet.cell(heading_row+2+pivot_segment_index, pivot_parameter_column_index[pivot_parameters[pivot_parameter_index]] + pivot_column_index).value = pivot_dictionary[pivot_segments[pivot_segment_index]][pivot_parameters[pivot_parameter_index]][pivot_columns[pivot_column_index]]
                
                heading_row = heading_row + len(pivot_segments) + 4
        if empty:
            wb.remove(current_sheet)
                    
                    
        
    wb.save('input.xlsx')
    wb.close()

except Exception as e:
    print("Exception occured "+str(e))
    wb.save('input.xlsx')
    wb.close()

Output :- Output

Expected Output :- Expected Output

I am unable to drop the columns which are empty. I also want the parameters which have NA level for whichever ID applicable to come as the first column in the pivot view table as shown in expected output.

Upvotes: 1

Views: 217

Answers (1)

Andrej Kesely
Andrej Kesely

Reputation: 195553

Try:

df = df.replace("NOT APPLICABLE", "")
x = df[df.REMARKS.eq("GREEN")].pivot("SEGMENT", ["PARAMETER", "LEVEL"], "VALUE")

x = x.reindex(
    pd.MultiIndex.from_tuples(
        sorted(x.columns, key=lambda k: (k[1] != "", k[0], k[1]))
    ),
    axis=1,
)
print(x)

Prints:

         max   ball               egg         pen       rat          tin              
                  H      L      M   H   L   M   H  L  M   H  L  M      H      L      M
SEGMENT                                                                               
set1     800  29000  32000  30000  62  68  67   1  3  2   4  6  5  50434  67667  60852
set2     300   2000   2000   2000  66  67  67   2  4  3   3  5  4  10271  11078  10633

Upvotes: 2

Related Questions