Reputation: 19776
Sometimes I get histograms that look like below:
I see the peaks loud and clear, but nigh for much else; is there a way to drop the "bin outliers" from a histogram so that the rest of the distribution can be seen better?
Upvotes: 0
Views: 330
Reputation: 19776
This can be accomplished by simply setting ylim; however, this rids of the peaks information. To retain, we can include it via annotations, as follows:
N
, and positions, bins
N
All combined and an example below; I used your exact data for comparison, since you are me.
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(0)
data = np.random.randn(100, 100) ** 3
data[:50] = 0
hist_visible(data, peaks_to_clip=3, bins=500, annot_kw={})
data[:95] = 0
hist_visible(data, peaks_to_clip=3, bins=500, annot_kw={})
Function:
def hist_visible(data, peaks_to_clip=1, bins=200, annot_kw=None):
def _annotate(peaks_info, annot_kw):
def _process_annot_kw(annot_kw):
defaults = dict(weight='bold', fontsize=13, color='r',
xy=(.85, .85), xycoords='axes fraction')
if not annot_kw:
annot_kw = defaults.copy()
else:
annot_kw = annot_kw.copy() # ensure external dict unaffected
# if `defaults` key not in `annot_kw`, add it & its value
for k, v in defaults.items():
if k not in annot_kw:
annot_kw[k] = v
return annot_kw
def _make_annotation(peaks_info):
txt = ''
for entry in peaks_info:
txt += "({:.2f}, {})\n".format(entry[0], int(entry[1]))
return txt.rstrip('\n')
annot_kw = _process_annot_kw(annot_kw)
txt = _make_annotation(peaks_info)
plt.annotate(txt, **annot_kw)
N, bins, _ = plt.hist(np.asarray(data).ravel(), bins=bins)
Ns = np.sort(N)
lower_max = Ns[-(peaks_to_clip + 1)]
peaks_info = []
for peak_idx in range(1, peaks_to_clip + 1):
patch_idx = np.where(N == Ns[-peak_idx])[0][0]
peaks_info.append([bins[patch_idx], N[patch_idx]])
plt.ylim(0, lower_max)
if annot_kw is not None:
_annotate(peaks_info, annot_kw)
plt.show()
Upvotes: 1