Reputation: 2533
So the question is:
Can I plot a histogram in Plotly, where all values that are bigger than some threshold will be grouped into one bin?
The desired output:
But using standard plotly Histogram
class I was able only to get this output:
import pandas as pd
from plotly import graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()
test_df = pd.DataFrame({'values': [1]*10 + [2]*9 +
[3.1]*4 + [3.6]*4 +
[4]*7 + [5]*6 + [6]*5 + [7]*4 + [8]*3 +
[9]*2 + [10]*1 +
[111.2]*2 + [222.3]*2 + [333.4]*1}) # <- I want to group them into one bin "> 10"
data = [go.Histogram(x=test_df['values'],
xbins=dict(
start=0,
end=11,
size=1
),
autobinx = False)]
layout = go.Layout(
title='values'
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='basic histogram')
Upvotes: 7
Views: 5406
Reputation: 1
An alternative to the above option is the following:
import numpy as np
# Initialize the values that you want in the histogram.
values = [7,8,8,8,9,10,10,11,12,13,14]
# Initialize the maximum x-axis value that you want.
maximum_value = 11
# Plot the histogram.
fig = go.Figure()
fig.add_trace(
go.Histogram(
x=[np.minimum(maximum_value, num) for num in values],
xbins = {"size": 1}
)
)
fig.show()
Upvotes: 0
Reputation: 2533
So after spending some time I found a solution myself using numpy.Histogram
and plotly Bar
chart.
Leaving it here in case anyone will face the same problem.
def plot_bar_with_outliers(series, name, end):
start = int(series.min())
size = 1
# Making a histogram
largest_value = series.max()
if largest_value > end:
hist = np.histogram(series, bins=list(range(start, end+size, size)) + [largest_value])
else:
hist = np.histogram(series, bins=list(range(start, end+size, size)) + [end+size])
# Adding labels to the chart
labels = []
for i, j in zip(hist[1][0::1], hist[1][1::1]):
if j <= end:
labels.append('{} - {}'.format(i, j))
else:
labels.append('> {}'.format(i))
# Plotting the graph
data = [go.Bar(x=labels,
y=hist[0])]
layout = go.Layout(
title=name
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='basic histogram')
plot_bar_with_outliers(test_df['values'], 'values', end=11)
Upvotes: 11