Ahmad Abdallah
Ahmad Abdallah

Reputation: 50

Is there a way to overlay scatterplot over grouped boxplots so they aren't offset using plotly graph_objects?

I'm trying to get the scatter plots to lie ontop of their respective boxplots to act as outlier points. Since plotly's graph_object.box doesn't have a method of inputting precalculated outliers, I've been trying to do it this way. I don't want plotly to calculate the outliers for the purposes of the project. Is there any way to accomplish this by moving the scatterplot, or perhaps a feature of go.Box I overlooked that can do this?

import plotly.graph_objects as go

def create_multiple_boxplots(summary_stats_list, labels, types, title="Multiple Boxplots"):
    fig = go.Figure()
    
    color_map = {"Rainy": "blue", "Sunny": "green"}
    
    i=0
    for stats, label, type_ in zip(summary_stats_list, labels, types):
        fig.add_trace(go.Box(
            name=type_,
            q1=[stats['Q1']],
            median=[stats['Median']],
            q3=[stats['Q3']],
            lowerfence=[stats['Min']],
            upperfence=[stats['Max']],
            mean=[stats['Mean']],
            boxpoints='all' if 'Outliers' in stats else False,
            jitter=0.3,
            pointpos=-1.8,
            marker=dict(color=color_map[type_]),  # Assign color based on type
            legendgroup=type_,
            showlegend=True if i < 2 else False,
            x=[label],
            # y=stats.get('Outliers', [])
        ))
        # Add outlier points separately

        fig.add_trace(go.Scatter(
            x=[label] * len(stats['Outliers']),
            y=stats['Outliers'],
            mode='markers',
            marker=dict(color=color_map[type_], size=8, symbol='circle-open'),
            name=f"Outliers - {type_}",
            legendgroup=type_,
            showlegend=False
        ))
        i+=1
    
    fig.update_layout(title=title, yaxis_title="Value", boxmode='group')
    
    fig.show()

# Example summary statistics
data_summaries = [
    {"Min": 5, "Q1": 10, "Median": 15, "Q3": 20, "Max": 25, "Mean": 16, "Outliers": [2, 27]},
    {"Min": 6, "Q1": 11, "Median": 16, "Q3": 21, "Max": 26, "Mean": 17, "Outliers": [3, 28]},
    {"Min": 4, "Q1": 9, "Median": 14, "Q3": 19, "Max": 24, "Mean": 15, "Outliers": [1, 26]},
    {"Min": 7, "Q1": 12, "Median": 17, "Q3": 22, "Max": 27, "Mean": 18, "Outliers": [4, 29]}
]
labels = ["Happy", "Happy", "Sad", "Sad"]
types = ["Rainy", "Sunny", "Rainy", "Sunny"]

create_multiple_boxplots(data_summaries, labels, types)

Boxplot with incorrectly aligned scatterplot points

Upvotes: 1

Views: 39

Answers (1)

r-beginners
r-beginners

Reputation: 35230

The desired output can be obtained by exiting box mode and making each label unique. This is because the x-axis of the box-and-whisker and scatter plots will be the same.

import plotly.graph_objects as go

def create_multiple_boxplots(summary_stats_list, labels, types, title="Multiple Boxplots"):
    fig = go.Figure()
    
    color_map = {"Rainy": "blue", "Sunny": "green"}
    
    i=0
    for stats, label, type_ in zip(summary_stats_list, labels, types):
        fig.add_trace(go.Box(
            name=type_,
            q1=[stats['Q1']],
            median=[stats['Median']],
            q3=[stats['Q3']],
            lowerfence=[stats['Min']],
            upperfence=[stats['Max']],
            mean=[stats['Mean']],
            boxpoints='all' if 'Outliers' in stats else False,
            jitter=0.3,
            pointpos=-1.8,
            marker=dict(color=color_map[type_]),  # Assign color based on type
            legendgroup=type_,
            showlegend=True if i < 2 else False,
            x=[label],
        ))
        fig.add_trace(go.Scatter(
            x=[label] * len(stats['Outliers']),
            y=stats['Outliers'],
            mode='markers',
            marker=dict(color=color_map[type_], size=8, symbol='circle-open'),
            name=f"Outliers - {type_}",
            legendgroup=type_,
            showlegend=False
        ))
        i+=1
    
    fig.update_layout(title=title, yaxis_title="Value")#, boxmode='group') # update
    
    fig.show()

# Example summary statistics
data_summaries = [
    {"Min": 5, "Q1": 10, "Median": 15, "Q3": 20, "Max": 25, "Mean": 16, "Outliers": [2, 27]},
    {"Min": 6, "Q1": 11, "Median": 16, "Q3": 21, "Max": 26, "Mean": 17, "Outliers": [3, 28]},
    {"Min": 4, "Q1": 9, "Median": 14, "Q3": 19, "Max": 24, "Mean": 15, "Outliers": [1, 26]},
    {"Min": 7, "Q1": 12, "Median": 17, "Q3": 22, "Max": 27, "Mean": 18, "Outliers": [4, 29]}
]
labels = ["Happy", "Happy_", "Sad", "Sad_"] # update
types = ["Rainy", "Sunny", "Rainy", "Sunny"]

create_multiple_boxplots(data_summaries, labels, types)

enter image description here

To do a scatter plot while still in box mode, add an offset group, which will draw the scatter plot in the center of the box-and-whisker plot.

import plotly.graph_objects as go

def create_multiple_boxplots(summary_stats_list, labels, types, title="Multiple Boxplots"):
    fig = go.Figure()
    
    color_map = {"Rainy": "blue", "Sunny": "green"}
    offsetgroup_names = ['A','B','A','B'] # update

    i=0
    for stats, label, type_, offset in zip(summary_stats_list, labels, types, offsetgroup_names):
        print('stats', stats, 'label',label, 'type', type_)
        fig.add_trace(go.Box(
            name=type_,
            q1=[stats['Q1']],
            median=[stats['Median']],
            q3=[stats['Q3']],
            lowerfence=[stats['Min']],
            upperfence=[stats['Max']],
            mean=[stats['Mean']],
            boxpoints='all' if 'Outliers' in stats else False,
            jitter=0.3,
            pointpos=-1.8,
            marker=dict(color=color_map[type_]),  # Assign color based on type
            legendgroup=type_,
            showlegend=True if i < 2 else False,
            x=[label],
            offsetgroup=offset, # update
        ))
        fig.add_trace(go.Scatter(
            x=[label] * len(stats['Outliers']),
            y=stats['Outliers'],
            xaxis='x',
            yaxis='y',
            offsetgroup=offset, # update
            mode='markers',
            marker=dict(color=color_map[type_], size=8, symbol='circle-open'),
            name=f"Outliers - {type_}",
            legendgroup=type_,
            showlegend=False
        ))
        i+=1
    
    fig.update_layout(title=title, yaxis_title="Value", boxmode='group')
    fig.show()

# Example summary statistics
data_summaries = [
    {"Min": 5, "Q1": 10, "Median": 15, "Q3": 20, "Max": 25, "Mean": 16, "Outliers": [2, 27]},
    {"Min": 6, "Q1": 11, "Median": 16, "Q3": 21, "Max": 26, "Mean": 17, "Outliers": [3, 28]},
    {"Min": 4, "Q1": 9, "Median": 14, "Q3": 19, "Max": 24, "Mean": 15, "Outliers": [1, 26]},
    {"Min": 7, "Q1": 12, "Median": 17, "Q3": 22, "Max": 27, "Mean": 18, "Outliers": [4, 29]}
]
labels = ["Happy", "Happy", "Sad", "Sad"]
types = ["Rainy", "Sunny", "Rainy", "Sunny"]

create_multiple_boxplots(data_summaries, labels, types)

enter image description here

Upvotes: 1

Related Questions