Reputation: 17152
let's say I have a dataframe that looks like this:
df = pd.DataFrame({'A': range(5), 'B': range(5)}, index=list('abcde'))
df
Out[92]:
A B
a 0 0
b 1 1
c 2 2
d 3 3
e 4 4
Asumming that this dataframe already exist, how can I simply add a level 'C' to the column index so I get this:
df
Out[92]:
A B
C C
a 0 0
b 1 1
c 2 2
d 3 3
e 4 4
I saw SO anwser like this python/pandas: how to combine two dataframes into one with hierarchical column index? but this concat different dataframe instead of adding a column level to an already existing dataframe.
Upvotes: 108
Views: 97543
Reputation: 168
I present a one-liner that offers flexibility, readability and performance/scalability:
df.columns = pd.MultiIndex.from_frame(pd.concat(
[df.columns.to_frame(index=False),
pd.Series(["C"] * len(df.columns))],
axis=1))
print(df)
# A B
# C C
# a 0 0
# b 1 1
# c 2 2
# d 3 3
# e 4 4
Upvotes: 0
Reputation: 6642
I like it explicit (using MultiIndex
) and chain-friendly (.set_axis
):
df.set_axis(pd.MultiIndex.from_product([df.columns, ['C']]), axis=1)
This is particularly convenient when merging DataFrames with different column level numbers, where Pandas (1.4.2) raises a FutureWarning (FutureWarning: merging between different levels is deprecated and will be removed ...
):
import pandas as pd
df1 = pd.DataFrame({'A': range(5), 'B': range(5)}, index=list('abcde'))
df2 = pd.DataFrame(range(10, 15),
index=list('abcde'),
columns=pd.MultiIndex.from_tuples([("C", "x")]))
# df1:
A B
a 0 0
b 1 1
# df2:
C
x
a 10
b 11
# merge while giving df1 another column level:
pd.merge(df1.set_axis(pd.MultiIndex.from_product([df1.columns, ['']]), axis=1),
df2,
left_index=True, right_index=True)
# result:
A B C
x
a 0 0 10
b 1 1 11
Upvotes: 8
Reputation: 294478
option 1: set_index
and T
df.T.set_index(np.repeat('C', df.shape[1]), append=True).T
option 2: pd.concat
, keys
, and swaplevel
pd.concat([df], axis=1, keys=['C']).swaplevel(0, 1, 1)
A B
C C
a 0 0
b 1 1
c 2 2
d 3 3
e 4 4
Upvotes: 34
Reputation: 3467
Only use this line:
df.columns = [df.columns, ['C'] * len(df.columns)]
Upvotes: 0
Reputation: 21938
As suggested by @StevenG himself, a better answer:
df.columns = pd.MultiIndex.from_product(df.columns.levels + [['C']])
print(df)
# A B
# C C
# a 0 0
# b 1 1
# c 2 2
# d 3 3
# e 4 4
Upvotes: 155
Reputation: 13778
The accepted and other high score answers haven't coped with multi-index df. I write this function to to add a value or list to specific level with custom name :
from typing import Iterable
def add_level(df, vals, name='', level=0):
cols = df.columns
if not isinstance(vals, Iterable):
vals = np.repeat(vals, cols.shape[0])
else:
assert cols.shape[0]%len(vals) == 0, 'cols.shape[0] must be divisible by len(vals)'
vals = np.repeat(vals.to_list(), cols.shape[0]//len(vals))
new_names = list(cols.names)
new_names.insert(level, name)
new_cols_df = cols.to_frame().assign(**{name:vals})
new_cols = pd.MultiIndex.from_frame(new_cols_df[new_names])
df1 = df.copy()
df1.columns = new_cols
return df1
Upvotes: 0
Reputation: 368
I haven't found an exhaustive way to do it, so here it is:
def add_multindex_level(
data: pd.DataFrame,
keys: Union[Any, List[Any]],
level: int=0,
axis: int=0,
name: str=None,
inplace: bool=False,
) -> pd.DataFrame:
to_promote = data.columns if axis==1 else data.index
keys = [keys]*len(to_promote) if isinstance(keys, str) else keys
if len(keys)!=len(to_promote):
raise ValueError(
"Keys must be a value or array-like matching the length of the index to extend"
)
new_keys = []
for existing_key,insert_key in zip(to_promote, keys):
if isinstance(existing_key, tuple):
new_key = (*existing_key[:level], insert_key, *existing_key[level:])
else:
new_key = (existing_key, insert_key) if level else (insert_key, existing_key)
new_keys.append(new_key)
data_ = data if inplace else data.copy(deep=True)
new_index = pd.MultiIndex.from_tuples(new_keys)
new_names = []
for l in range(new_index.nlevels):
if l==level:
n = name
else:
n = to_promote.names[l - (1 if l>=level else 0)]
new_names.append(n)
new_index.names = new_names
if axis:
data_.columns = new_index
else:
data_.index = new_index
return None if inplace else data_
>>> source
a b c
0 0 5 0
1 1 6 1
2 0 9 4
>>> add_multindex_level(source, ['x','y','z'], level=1, axis=1)
a b c
x y z
0 0 5 0
1 1 6 1
2 0 9 4
>>> add_multindex_level(source, ['x','y','z'], level=0, axis=1)
x y z
a b c
0 0 5 0
1 1 6 1
2 0 9 4
>>> add_multindex_level(source, 'A', level=0, axis=1)
x y z
A A A
0 0 5 0
1 1 6 1
2 0 9 4
>>> add_multindex_level(source, 'A', level=0, axis=0)
x y z
A 0 0 5 0
A 1 1 6 1
A 2 0 9 4
Upvotes: 0
Reputation: 903
I have a dedicated function for this. It less elegant, but more flexible. The advantages:
Index
and MultiIndex
Best regards.
def addLevel(index, value='', name=None, n=1, onTop=False):
"""Add extra dummy levels to index"""
assert isinstance(index, (pd.MultiIndex, pd.Index))
xar = np.array(index.tolist()).transpose()
names = index.names if isinstance(index, pd.MultiIndex) else [index.name]
addValues = np.full(shape=(n, xar.shape[-1]), fill_value=value)
addName = [name] * n
if onTop:
names = addName + names
xar = np.vstack([addValues, xar])
else:
names = names + addName
xar = np.vstack([xar, addValues])
return pd.MultiIndex.from_arrays(xar, names=names)
df = pd.DataFrame(index=list('abc'), data={'A': range(3), 'B': range(3)})
df.columns = addLevel(df.columns, value='C')
df.columns = addLevel(df.columns, value='D', name='D-name')
df.columns = addLevel(df.columns, value='E2', n=2)
df.columns = addLevel(df.columns, value='Top', name='OnTop', onTop=True)
df.columns = addLevel(df.columns, value=1, name='Number')
print(df)
## OnTop Top
## A B
## C C
## D-name D D
## E2 E2
## E2 E2
## Number 1 1
## a 0 0
## b 1 1
## c 2 2
Upvotes: 0
Reputation: 176
Another method, but using a list comprehension of tuples as the arg to pandas.MultiIndex.from_tuples():
df.columns = pd.MultiIndex.from_tuples([(col, 'C') for col in df.columns])
df
A B
C C
a 0 0
b 1 1
c 2 2
d 3 3
e 4 4
Upvotes: 0
Reputation: 71600
You could just assign the columns like:
>>> df.columns = [df.columns, ['C', 'C']]
>>> df
A B
C C
a 0 0
b 1 1
c 2 2
d 3 3
e 4 4
>>>
Or for unknown length of columns:
>>> df.columns = [df.columns.get_level_values(0), np.repeat('C', df.shape[1])]
>>> df
A B
C C
a 0 0
b 1 1
c 2 2
d 3 3
e 4 4
>>>
Upvotes: 16
Reputation: 299
A solution which adds a name to the new level and is easier on the eyes than other answers already presented:
df['newlevel'] = 'C'
df = df.set_index('newlevel', append=True).unstack('newlevel')
print(df)
# A B
# newlevel C C
# a 0 0
# b 1 1
# c 2 2
# d 3 3
# e 4 4
Upvotes: 18
Reputation: 359
Another way for MultiIndex (appanding 'E'
):
df.columns = pd.MultiIndex.from_tuples(map(lambda x: (x[0], 'E', x[1]), df.columns))
A B
E E
C D
a 0 0
b 1 1
c 2 2
d 3 3
e 4 4
Upvotes: 9