Reputation: 55
This is my dataframe:
data = {'machine': ['machine_a', 'machine_a', 'machine a', 'machine a', 'machine a', 'machine a', 'machine_b', 'machine_b', 'machine_b', 'machine_b', 'machine_b', 'machine_b', 'machine_c', 'machine_c', 'machine_c', 'machine_c', 'machine_c', 'machine_c'], 'bin': ['(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 1200]', '(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 1200]', '(0, 200]', '(200, 400]', '(400, 600]', '(600, 800]', '(800, 1000]', '(1000, 1200]'], 'speed': [10, 0, 20, 0, 20, 10, 5, 0, 40, 10, 20, 10, 5, 25, 0, 10, 5, 10], 'Temp': [0, 0, 0, 20, 20, 0, 35, 0, 0, 20, 0, 70, 30, 0, 0, 5, 0, 25]}
df = pd.DataFrame(data)
machine bin speed Temp
0 machine_a (0, 200] 10 0
1 machine_a (200, 400] 0 0
2 machine a (400, 600] 20 0
3 machine a (600, 800] 0 20
4 machine a (800, 1000] 20 20
5 machine a (1000, 1200] 10 0
6 machine_b (0, 200] 5 35
7 machine_b (200, 400] 0 0
8 machine_b (400, 600] 40 0
9 machine_b (600, 800] 10 20
10 machine_b (800, 1000] 20 0
11 machine_b (1000, 1200] 10 70
12 machine_c (0, 200] 5 30
13 machine_c (200, 400] 25 0
14 machine_c (400, 600] 0 0
15 machine_c (600, 800] 10 5
16 machine_c (800, 1000] 5 0
17 machine_c (1000, 1200] 10 25
I want to create different bar plots for columns speed and temp, where x axis is the bins column. I want to do this for every different machine.
so far I've created a for loop to iterate over the last two columns
import seaborn as sns
import matplotlib.pyplot as plt
for column in df.columns[2:]:
sns.set()
fig, ax = plt.subplots()
sns.set(style="ticks")
sns.barplot(x = df.bin, y=column, data=df)
sns.despine(offset=10, trim=True)
fig.set_size_inches(22,14)
This creates 2 bar plots. 1 for speed and 1 for temp. How do I make so I get back 6 bar plots (2 for each machine)? Essentially how do I use groupby in this case?
import pandas as pd
import numpy as np
bins = [f'({n*200}, {(n+1)*200}]' for _ in range(50) for n in range(109)]
machines = [f'machine_{n}' for n in range(50) for _ in range(109)]
np.random.seed(365)
speed = np.random.randint(0, 40, size=len(machines))
temp = np.random.choice([0, 30, 70], size=len(machines))
df = pd.DataFrame({'machine': machines, 'bin': bins, 'speed': speed, 'Temp': temp})
df.head()
machine bin speed Temp
0 machine_0 (0, 200] 18 30
1 machine_0 (200, 400] 33 70
2 machine_0 (400, 600] 27 30
3 machine_0 (600, 800] 5 30
4 machine_0 (800, 1000] 34 30
df.tail()
machine bin speed Temp
5445 machine_49 (20800, 21000] 6 0
5446 machine_49 (21000, 21200] 20 30
5447 machine_49 (21200, 21400] 14 0
5448 machine_49 (21400, 21600] 38 30
5449 machine_49 (21600, 21800] 24 70
Upvotes: 2
Views: 1049
Reputation: 62523
seaborn.barplot
. seaborn
is a high-level API for matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
# melt the dataframe into a long form and group by the machine
dfmg = df.melt(id_vars=['machine', 'bin']).groupby('machine')
for mach, data in dfmg:
plt.figure(figsize=(20, 5))
sns.barplot(data=data, x='bin', y='value', hue='variable')
plt.xticks(rotation=90, ha='center')
plt.title(mach)
plt.legend(loc='upper left')
plt.tight_layout()
plt.savefig(f'{mach}.png')
plt.show()
'Temp'
and 'speed'
for mach, data in dfmg:
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(20, 5), sharex=True)
# select data
temp = data[data.variable.eq('Temp')]
speed = data[data.variable.eq('speed')]
# plot data
sns.barplot(data=temp, x='bin', y='value', ax=ax1) # ax1.bar(data=temp, x='bin', y='value') - without seaborn
sns.barplot(data=speed, x='bin', y='value', ax=ax2) # ax2.bar(data=speed, x='bin', y='value') - without seaborn
ax1.set(xlabel=None, ylabel='Temp')
ax1.tick_params(bottom=False)
ax2.set(ylabel='Speed')
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=90, ha='center') # ax2.tick_params('x', rotation=90) - without seaborn
fig.suptitle(mach)
fig.tight_layout()
fig.savefig(f'{mach}.png')
plt.show()
fig, axes = plt.subplots(nrows=len(df.machine.unique()), ncols=1, figsize=(20, 250))
for i, (mach, data) in enumerate(df.melt(id_vars=['machine', 'bin']).groupby('machine')):
sns.barplot(data=data, x='bin', y='value', hue='variable', ax=axes[i])
axes[i].set_xticklabels(axes[i].get_xticklabels(), rotation=90, ha='center')
axes[i].set(title=mach)
axes[i].legend(loc='upper left')
fig.tight_layout()
fig.savefig('machines_all.png')
plt.show()
seaborn.catplot
can create an equivalent, single figure-level plot with one lineimport seaborn as sns
# melt the dataframe into a long form
dfm = df.melt(id_vars=['machine', 'bin'])
# plot
p = sns.catplot(data=dfm, col='machine', x='bin', y='value', hue='variable', kind='bar', col_wrap=1, height=4, aspect=5)
p.set_xticklabels(rotation=90)
p.savefig("facet_plot.png")
Upvotes: 1
Reputation: 262359
Here is a version using groupby
:
df2 = df.melt(id_vars=['machine', 'bin'])
COLS = df2['machine'].nunique()
ROWS = df2['variable'].nunique()
fig, axes = plt.subplots(ncols=COLS,
nrows=ROWS,
)
i = 0
for group, d in df2.groupby(['machine', 'variable']):
ax = axes[i%ROWS][i//ROWS]
ax.bar(d['bin'], d['value'])
if not i//ROWS:
ax.set_ylabel(group[1])
if i%ROWS == ROWS-1:
ax.set_xlabel(group[0])
i+=1
output:
Upvotes: 0
Reputation: 262359
You can use seaborn.catplot
directly. You just need to melt the y-columns first:
import seaborn as sns
sns.catplot(data=df.melt(id_vars=['machine', 'bin']),
col='machine',
x='bin', y='value', hue='variable',
kind='bar'
)
output:
Or to have the variables as rows:
import seaborn as sns
sns.catplot(data=df.melt(id_vars=['machine', 'bin']),
col='machine',
row='variable',
x='bin', y='value',
kind='bar',
color='k',
)
output:
Or to split the variables:
import seaborn as sns
sns.catplot(data=df.melt(id_vars=['machine', 'bin']),
hue='machine',
x='bin', y='value', col='variable',
kind='bar'
)
Upvotes: 2