Sam
Sam

Reputation: 55

Iterate through columns to generate barplots while using groupby

This is my dataframe:

data = {'machine': ['machine_a', 'machine_a', 'machine a', 'machine a', 'machine a', 'machine a', 'machine_b', 'machine_b', 'machine_b', 'machine_b', 'machine_b', 'machine_b', 'machine_c', 'machine_c', 'machine_c', 'machine_c', 'machine_c', 'machine_c'], 'bin': ['(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 1200]', '(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 1200]', '(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 1200]'], 'speed': [10, 0, 20, 0, 20, 10, 5, 0, 40, 10, 20, 10, 5, 25, 0, 10, 5, 10], 'Temp': [0, 0, 0, 20, 20, 0, 35, 0, 0, 20, 0, 70, 30, 0, 0, 5, 0, 25]}
df = pd.DataFrame(data)

      machine           bin  speed  Temp
0   machine_a      (0, 200]     10     0
1   machine_a    (200, 400]      0     0
2   machine a    (400, 600]     20     0
3   machine a    (600, 800]      0    20
4   machine a   (800, 1000]     20    20
5   machine a  (1000, 1200]     10     0
6   machine_b      (0, 200]      5    35
7   machine_b    (200, 400]      0     0
8   machine_b    (400, 600]     40     0
9   machine_b    (600, 800]     10    20
10  machine_b   (800, 1000]     20     0
11  machine_b  (1000, 1200]     10    70
12  machine_c      (0, 200]      5    30
13  machine_c    (200, 400]     25     0
14  machine_c    (400, 600]      0     0
15  machine_c    (600, 800]     10     5
16  machine_c   (800, 1000]      5     0
17  machine_c  (1000, 1200]     10    25

I want to create different bar plots for columns speed and temp, where x axis is the bins column. I want to do this for every different machine.

so far I've created a for loop to iterate over the last two columns

import seaborn as sns
import matplotlib.pyplot as plt

for column in df.columns[2:]: 
    sns.set()
    fig, ax = plt.subplots()
    sns.set(style="ticks")
    sns.barplot(x = df.bin, y=column, data=df)
    sns.despine(offset=10, trim=True) 
    fig.set_size_inches(22,14)

This creates 2 bar plots. 1 for speed and 1 for temp. How do I make so I get back 6 bar plots (2 for each machine)? Essentially how do I use groupby in this case?

Complete Synthetic Data

import pandas as pd
import numpy as np

bins = [f'({n*200}, {(n+1)*200}]' for _ in range(50) for n in range(109)]
machines = [f'machine_{n}' for n in range(50) for _ in range(109)]
np.random.seed(365)
speed = np.random.randint(0, 40, size=len(machines))
temp = np.random.choice([0, 30, 70], size=len(machines))

df = pd.DataFrame({'machine': machines, 'bin': bins, 'speed': speed, 'Temp': temp})

df.head()
     machine          bin  speed  Temp
0  machine_0     (0, 200]     18    30
1  machine_0   (200, 400]     33    70
2  machine_0   (400, 600]     27    30
3  machine_0   (600, 800]      5    30
4  machine_0  (800, 1000]     34    30

df.tail()
         machine             bin  speed  Temp
5445  machine_49  (20800, 21000]      6     0
5446  machine_49  (21000, 21200]     20    30
5447  machine_49  (21200, 21400]     14     0
5448  machine_49  (21400, 21600]     38    30
5449  machine_49  (21600, 21800]     24    70

Upvotes: 2

Views: 1049

Answers (3)

Trenton McKinney
Trenton McKinney

Reputation: 62523

  • Given the existing data as described in the comments and with the updated synthetic data set.
  • Using seaborn.barplot. seaborn is a high-level API for matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

# melt the dataframe into a long form and group by the machine
dfmg = df.melt(id_vars=['machine', 'bin']).groupby('machine')

Option 1

  • Create a single plot figure for each machine, which is saved with the machine name
    • this saves 50 plots, 1 for each machine
for mach, data in dfmg:
    plt.figure(figsize=(20, 5))
    sns.barplot(data=data, x='bin', y='value', hue='variable')
    plt.xticks(rotation=90, ha='center')
    plt.title(mach)
    plt.legend(loc='upper left')
    plt.tight_layout()
    plt.savefig(f'{mach}.png')
    plt.show()

enter image description here

enter image description here

  • Separate plots for 'Temp' and 'speed'
for mach, data in dfmg:
    fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(20, 5), sharex=True)
    
    # select data
    temp = data[data.variable.eq('Temp')]
    speed = data[data.variable.eq('speed')]
    
    # plot data
    sns.barplot(data=temp, x='bin', y='value', ax=ax1)  # ax1.bar(data=temp, x='bin', y='value') - without seaborn
    sns.barplot(data=speed, x='bin', y='value', ax=ax2)  # ax2.bar(data=speed, x='bin', y='value') - without seaborn
    
    ax1.set(xlabel=None, ylabel='Temp')
    ax1.tick_params(bottom=False)
    ax2.set(ylabel='Speed')
    ax2.set_xticklabels(ax2.get_xticklabels(), rotation=90, ha='center')  # ax2.tick_params('x', rotation=90) - without seaborn
    
    fig.suptitle(mach)
    fig.tight_layout()
    fig.savefig(f'{mach}.png')
    plt.show()

enter image description here

enter image description here

Option 2

  • Alternatively, create a single figure will all plots on the figure
    • this saves 1 figure with all 50 machines
  • The plot of each machine on the figure in this section, looks the same as the previous plots with orange and blue hue.
fig, axes = plt.subplots(nrows=len(df.machine.unique()), ncols=1, figsize=(20, 250))
for i, (mach, data) in enumerate(df.melt(id_vars=['machine', 'bin']).groupby('machine')):
    sns.barplot(data=data, x='bin', y='value', hue='variable', ax=axes[i])
    axes[i].set_xticklabels(axes[i].get_xticklabels(), rotation=90, ha='center')
    axes[i].set(title=mach)
    axes[i].legend(loc='upper left')
fig.tight_layout()
fig.savefig('machines_all.png')
plt.show()
  • seaborn.catplot can create an equivalent, single figure-level plot with one line
import seaborn as sns

# melt the dataframe into a long form
dfm = df.melt(id_vars=['machine', 'bin'])

# plot
p = sns.catplot(data=dfm, col='machine', x='bin', y='value', hue='variable', kind='bar', col_wrap=1, height=4, aspect=5)
p.set_xticklabels(rotation=90) 
p.savefig("facet_plot.png")

Upvotes: 1

mozway
mozway

Reputation: 262359

Here is a version using groupby:

df2 = df.melt(id_vars=['machine', 'bin'])

COLS = df2['machine'].nunique()
ROWS = df2['variable'].nunique()

fig, axes = plt.subplots(ncols=COLS,
                         nrows=ROWS,
                        )
i = 0
for group, d in df2.groupby(['machine', 'variable']):
    ax = axes[i%ROWS][i//ROWS]
    ax.bar(d['bin'], d['value'])
    if not i//ROWS:
        ax.set_ylabel(group[1])
    if i%ROWS == ROWS-1:
        ax.set_xlabel(group[0])
    i+=1

output:

manual groupby

Upvotes: 0

mozway
mozway

Reputation: 262359

You can use seaborn.catplot directly. You just need to melt the y-columns first:

import seaborn as sns

sns.catplot(data=df.melt(id_vars=['machine', 'bin']),
            col='machine',
            x='bin', y='value', hue='variable',
            kind='bar'
           )

output:

seaborn catplot

Or to have the variables as rows:

import seaborn as sns

sns.catplot(data=df.melt(id_vars=['machine', 'bin']),
            col='machine',
            row='variable',
            x='bin', y='value',
            kind='bar',
            color='k',
            )

output:

seaborn catplot rows

Or to split the variables:

import seaborn as sns

sns.catplot(data=df.melt(id_vars=['machine', 'bin']),
            hue='machine',
            x='bin', y='value', col='variable',
            kind='bar'
           )

seaborn catplot split variables

Upvotes: 2

Related Questions