user29263305
user29263305

Reputation: 11

Altair: Manually created boxplot doesn't react correctly to selection filter

I need to program a boxplot chart where I can filter my data according to a dropdown menu. Because of this issue https://github.com/vega/altair/issues/2255, I program the the boxplot chart manually.
I have the problem, that when I select an option first the chart is correct. But when I first select another option and then this option the chart is wrong.
I believe the problem is connected to how i calculate the lower and upper_whisker especially the usage of the min and max_value

import altair as alt
import pandas as pd

# Example Data
data = pd.DataFrame({
    'task': ['A'] * 5 + ['B'] * 5 ,
    'value': [0,1,2,3,4,5,6,7,8,9]
})
tasks=["All"]+list(data["task"].unique())
dropdown = alt.binding_select(options=tasks, name="Dropdown Menu: ")
select = alt.selection_point(fields=['task'], bind=dropdown,value="All")

chart = alt.Chart(data).transform_aggregate( #Vertical line of boxplot
        q1_value=f"q1(value)",
        q3_value=f"q3(value)",
        max_value=f"max(value)",
        min_value=f"min(value)"
        ).transform_calculate(
        IQR='datum.q3_value - datum.q1_value',
        lower_whisker='max(datum.q1_value - 1.5 * datum.IQR, datum.min_value)',
        upper_whisker='min(datum.q3_value + 1.5 * datum.IQR, datum.max_value)'
    ).mark_rule(color='black').encode(
        y='lower_whisker:Q',
        y2='upper_whisker:Q'
    ) + alt.Chart(data).transform_aggregate( #Body of boxplot
        q1_value=f"q1(value)",
        q3_value=f"q3(value)"
    ).mark_bar(size=20).encode(
        y=alt.Y('q1_value:Q'),
        y2='q3_value:Q'
    )  + alt.Chart(data).transform_aggregate( #Median line
        q2_value=f"median(value)",
    ).mark_tick(color='black', size=20).encode(
        y='q2_value:Q',
    ) + alt.Chart(data).transform_aggregate( #Lower whisker line
        q1_value=f"q1(value)",
        q3_value=f"q3(value)",
        min_value=f"min(value)"

        ).transform_calculate(
        IQR='datum.q3_value - datum.q1_value',
        lower_whisker='max(datum.q1_value - 1.5 * datum.IQR, datum.min_value)',
        ).mark_tick(color='black', size=20).encode(
        y="lower_whisker:Q"
    ) + alt.Chart(data).transform_aggregate( #Upper whisker line
        q1_value=f"q1(value)",
        q3_value=f"q3(value)",
        max_value=f"max(value)"
        ).transform_calculate(
        IQR='datum.q3_value - datum.q1_value',
        upper_whisker='min(datum.q3_value + 1.5 * datum.IQR, datum.max_value)'
        ).mark_tick(color='black', size=20).encode(
        y="upper_whisker:Q"
    )
chart=chart.encode(
    y=alt.Y(title="value")
)
chart2=chart.add_params(
      select
  ).transform_filter(
      f"{select.name}.task=='All' || {select.name}.task==datum.task "
  )

b if chosen directly b if a is chosen first

When I just calculate the lower and upperwhisker to be q1-1,5*IQR and q3+1,5*IQR I don't have this issue

Upvotes: 0

Views: 30

Answers (0)

Related Questions