pandas mask indexing misshaped DataFrame

Question

Is there a built in way with pandas to accomplish this.

I'd prefer to avoid pd.concat([...],1) .all(1) methods as the dataset I'm working with has missing data points.

main.py

import pandas as pd
import numpy as np
import numpy.typing as npt

def _index_mask(index_a: pd.Index, index_b: pd.Index) -> npt.NDArray[np.bool_]:

    return index_b.isin(index_a[index_a.isin(index_b)])


def mask_b(a: pd.DataFrame, b: pd.DataFrame) -> pd.DataFrame:
    return b[_index_mask(a.index, b.index)]


if __name__ == '__main__':

    frame_a = pd.DataFrame(
        np.arange(10).reshape(5, 2),
        index=["A", "B", "C", "D", "E"]
    )
    frame_b = pd.DataFrame(
        np.arange(16).reshape(8, 2),
        index=["F", "G", "H", "C", "D", "E",  "I", "J"]
    )
    x = mask_b(frame_a, frame_b)
    print(x)

edit

I forgot to mention that I also need to perform the inverse operation on frame_a

def _index_mask(index_a: pd.Index, index_b: pd.Index) -> tuple[npt.NDArray[np.bool_],npt.NDArray[np.bool_]]:


    return index_a.isin(index_b), index_b.isin(index_a)


mask_a, mask_b = _index_mask(frame_a.index, frame_b.index)
frame_a = frame_a[mask_a]
frame_b = frame_b[mask_b]
assert all(frame_b.index == frame_a.index)

result

pandas mask indexing misshaped DataFrame

main.py

edit

result

Answers (1)

Related Questions