Reputation: 17
import pandas as pd
concordance_region = pd.DataFrame(
{
"country 1": pd.Series([1, 0], index=["region a", "region b"]),
"country 2": pd.Series([0, 1], index=["region a", "region b"]),
"country 3": pd.Series([0, 1], index=["region a", "region b"]),
}
)
display(concordance_region)
country_index = concordance_region.columns
region_index = concordance_region.index
sector_index = ['sector a','sector b']
country_sector = pd.MultiIndex.from_product([country_index, sector_index], names=["country", "sector"])
region_sector = pd.MultiIndex.from_product([region_index, sector_index], names=["region", "sector"])
concordance_region_expanded = pd.DataFrame([[1,0,0,0,0,0],[0,1,0,0,0,0],[0,0,1,0,1,0],[0,0,0,1,0,1]], index=region_sector, columns=country_sector)
display(concordance_region_expanded)
I want to achieve the above expansion without hard-coding the number.
An option is that:
concordance_region_extended = pd.DataFrame(index=region_sector, columns=country_sector)
for region in region_index:
for sector_1 in sector_index:
for country in country_index:
for sector_2 in sector_index:
if sector_1 == sector_2 and concordance_region.loc[region, country] == 1:
concordance_region_expanded.loc[(region, sector_1),(country, sector_2)] = 1
concordance_region_expanded = concordance_region_expanded.fillna(value=0).infer_objects(copy=False)
concordance_region_expanded
But I think the above code is neither efficient nor elegant.
Any way to solve the above problem?
Upvotes: 1
Views: 86
Reputation: 13212
Code
use np.kron and identity matrix (identity matrix can be created with np.eye.)
import pandas as pd
import numpy as np
# taken from questioner's code
sector_index = ['sector a', 'sector b']
country_sector = pd.MultiIndex.from_product(
[country_index, sector_index], names=["country", "sector"])
region_sector = pd.MultiIndex.from_product(
[region_index, sector_index], names=["region", "sector"])
# start answer
n = len(sector_index)
out = pd.DataFrame(
np.kron(concordance_region.values, np.eye(n)),
index=region_sector, columns=country_sector, dtype='int'
)
out
Upvotes: 4
Reputation: 28649
summary of logic: convert the multindexes to dataframes, cross join, map concordance_region to get the integers, do a boolean check based on your condition and unstack:
# you could possibly build a MultiIndex
# of the three fields('region', 'country', 'sector')
# - that way the cartesian join is done in one step,
# within the MultiIndex creation
cc = country_sector.to_frame(index=False)
rr=region_sector.to_frame(index=False)
mapped=(concordance_region
.stack(future_stack=True)
.rename_axis(index=['region','country'])
.rename('integers')
)
# this captures your conditional logic
where_clause = (lambda df: df.integers
.where(df.sector_x.eq(df.sector_y))
.fillna(0)
.astype(int)
)
(cc
.merge(rr,how='cross')
.set_index(['region','country'])
.join(mapped)
.assign(integers = where_clause)
.set_index(['sector_x','sector_y'], append=True)
.unstack(['country','sector_x'])
.droplevel(axis='columns', level=0)
)
country country 1 country 2 country 3
sector_x sector a sector b sector a sector b sector a sector b
region sector_y
region a sector a 1 0 0 0 0 0
sector b 0 1 0 0 0 0
region b sector a 0 0 1 0 1 0
sector b 0 0 0 1 0 1
Upvotes: 0