Reputation: 11
I need to program a boxplot chart where I can filter my data according to a dropdown menu. Because of this issue https://github.com/vega/altair/issues/2255, I program the the boxplot chart manually.
I have the problem, that when I select an option first the chart is correct. But when I first select another option and then this option the chart is wrong.
I believe the problem is connected to how i calculate the lower and upper_whisker especially the usage of the min and max_value
import altair as alt
import pandas as pd
# Example Data
data = pd.DataFrame({
'task': ['A'] * 5 + ['B'] * 5 ,
'value': [0,1,2,3,4,5,6,7,8,9]
})
tasks=["All"]+list(data["task"].unique())
dropdown = alt.binding_select(options=tasks, name="Dropdown Menu: ")
select = alt.selection_point(fields=['task'], bind=dropdown,value="All")
chart = alt.Chart(data).transform_aggregate( #Vertical line of boxplot
q1_value=f"q1(value)",
q3_value=f"q3(value)",
max_value=f"max(value)",
min_value=f"min(value)"
).transform_calculate(
IQR='datum.q3_value - datum.q1_value',
lower_whisker='max(datum.q1_value - 1.5 * datum.IQR, datum.min_value)',
upper_whisker='min(datum.q3_value + 1.5 * datum.IQR, datum.max_value)'
).mark_rule(color='black').encode(
y='lower_whisker:Q',
y2='upper_whisker:Q'
) + alt.Chart(data).transform_aggregate( #Body of boxplot
q1_value=f"q1(value)",
q3_value=f"q3(value)"
).mark_bar(size=20).encode(
y=alt.Y('q1_value:Q'),
y2='q3_value:Q'
) + alt.Chart(data).transform_aggregate( #Median line
q2_value=f"median(value)",
).mark_tick(color='black', size=20).encode(
y='q2_value:Q',
) + alt.Chart(data).transform_aggregate( #Lower whisker line
q1_value=f"q1(value)",
q3_value=f"q3(value)",
min_value=f"min(value)"
).transform_calculate(
IQR='datum.q3_value - datum.q1_value',
lower_whisker='max(datum.q1_value - 1.5 * datum.IQR, datum.min_value)',
).mark_tick(color='black', size=20).encode(
y="lower_whisker:Q"
) + alt.Chart(data).transform_aggregate( #Upper whisker line
q1_value=f"q1(value)",
q3_value=f"q3(value)",
max_value=f"max(value)"
).transform_calculate(
IQR='datum.q3_value - datum.q1_value',
upper_whisker='min(datum.q3_value + 1.5 * datum.IQR, datum.max_value)'
).mark_tick(color='black', size=20).encode(
y="upper_whisker:Q"
)
chart=chart.encode(
y=alt.Y(title="value")
)
chart2=chart.add_params(
select
).transform_filter(
f"{select.name}.task=='All' || {select.name}.task==datum.task "
)
b if chosen directly b if a is chosen first
When I just calculate the lower and upperwhisker to be q1-1,5*IQR and q3+1,5*IQR I don't have this issue
Upvotes: 0
Views: 30