Reputation: 169
I'm trying to calculate the mean, standard deviation, median, first quartile and third quartile of the lognormal distribution that I fit to my histogram. So far I've only been able to calculate the mean, standard deviation and median, based on the formulas I found on Wikipedia, but I don't know how to calculate the first quartile and the third quartile. How could I calculate in Python the first quartile and the third quartile, based on the lognormal distribution?
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import lognorm
import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
#from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True
from scipy.optimize import curve_fit
x=np.asarray([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00], dtype=np.float64)
frequencia_relativa=np.asarray([0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000], dtype=np.float64)
plt.rcParams["figure.figsize"] = [18,8]
f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')
def fun(y, mu, sigma):
return 1.0/(np.sqrt(2.0*np.pi)*sigma*y)*np.exp(-(np.log(y)-mu)**2/(2.0*sigma*sigma))
step = 0.1
xx = x-step*0.99
nrm = np.sum(frequencia_relativa*step) # normalization integral
print(nrm)
frequencia_relativa /= nrm # normalize frequences histogram
print(np.sum(frequencia_relativa*step)) # check normalizatio
params, extras = curve_fit(fun, xx, frequencia_relativa)
print(params)
axes = f.add_subplot(111, frameon=False)
ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
ax.axvspan(0.243, 1.481, label='Média $\pm$ desvio padrão', ymin=0.0, ymax=1.0, alpha=0.2, color='Plum') #lognormal distribution
ax.yaxis.tick_left()
ax.xaxis.tick_bottom()
ax2.xaxis.tick_bottom()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
ax.bar(xx, height=frequencia_relativa, label='Frequência relativa normalizada do tamanho triangular', alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax2.bar(xx, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
xxx = np.linspace (0.001, 8, 1000)
ax.plot(xxx, fun(xxx, params[0], params[1]), "r-", label='Distribuição log-normal', linewidth=3)
ax2.plot(xxx, fun(xxx, params[0], params[1]), "r-", linewidth=3)
ax.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.xaxis.set_ticks(np.arange(7.0, 8.5, 0.5))
ax2.xaxis.set_major_formatter(tkr.FormatStrFormatter('%0.1f'))
plt.subplots_adjust(wspace=0.04)
ax.set_xlim(0,2.5)
ax.set_ylim(0,1.4)
ax2.set_xlim(7.0,8.0)
def func(x, pos): # formatter function takes tick label and tick position
s = str(x)
ind = s.index('.')
return s[:ind] + ',' + s[ind+1:] # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax2.xaxis.set_major_formatter(x_format)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)
d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)
kwargs.update(transform=ax2.transAxes) # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
ax2.tick_params(labelright=False)
ax.tick_params(labeltop=False)
ax.tick_params(axis='x', which='major', pad=15)
ax2.tick_params(axis='x', which='major', pad=15)
ax2.set_yticks([])
f.text(0.5, -0.04, 'Tamanho lateral do triângulo ($\mu m$)', ha='center', fontsize=22)
f.text(-0.02, 0.5, 'Frequência relativa normalizada', va='center', rotation='vertical', fontsize=22)
ax.axvline(0.862, color='k', linestyle='-', linewidth=1.3) #lognormal distribution
ax.axvline(0.243, color='k', linestyle='--', linewidth=1) #lognormal distribution
ax.axvline(1.481, color='k', linestyle='--', linewidth=1) #lognormal distribution
f.legend(loc=9,
bbox_to_anchor=(.77,.99),
labelspacing=1.5,
numpoints=1,
columnspacing=0.2,
ncol=1, fontsize=18,
frameon=False)
ax.text(0.86*0.63, 1.4*0.92, 'tamanho = (0,86 $\pm$ 0,62) $\mu m$', fontsize=20) #Excel
mu = params[0]
sigma = params[1]
# calculate mean value
print(np.exp(mu + sigma*sigma/2.0))
# calculate stddev
print(np.sqrt((np.exp(sigma*sigma)-1)*np.exp(mu+sigma*sigma/2.0)))
# calculate median value
print(np.exp(mu))
f.tight_layout()
#plt.show()
plt.savefig('output.png', dpi=500, bbox_inches='tight')
Upvotes: 1
Views: 979
Reputation: 20080
Welcome back
Ok, for log-normal distribution one could compute quantiles using expression in the page in the wiki
import numpy as np
from scipy import special
SQRT2 = 1.41421356237
def LNquantile(μ, σ, p):
"""
Compute quantile function for log-normal distribution
"""
nq = SQRT2 * special.erfinv(2.0*p - 1.0) # N(0,1) normal quantile
t = μ + σ * nq # N(μ, σ) quantile
return np.exp(t) # LN(μ, σ) quantile
μ = 0.0
σ = 1.0
q1 = LNquantile(μ, σ, 0.25)
q3 = LNquantile(μ, σ, 0.75)
print(q1)
print(q3)
prints reasonable
0.5094162838640294
1.9630310841553595
You could put any values and compare with online calculator at https://planetcalc.com/7263/
Upvotes: 1
Reputation: 1151
Given a log-normal distribution, we want to compute its quantiles. Furthermore, the parameters of the log-normal distribution are estimated from data.
The script below uses OpenTURNS to create the distribution using the LogNormal
class. It takes as inputs arguments the mean and standard deviation of the underlying normal distribution. Then we can use the computeQuantile()
method to compute the quantiles.
import openturns as ot
distribution = ot.LogNormal(-0.33217492, 0.6065058)
mean = distribution.getMean()
std = distribution.getStandardDeviation()
print("Mean=", mean, ", Std=", std)
q25 = distribution.computeQuantile(0.25)
q75 = distribution.computeQuantile(0.75)
print("Quantiles = ", q25, q75)
Notice that the constant values in the previous script can be replaced by whatever estimates, e.g. with your estimation procedure based on fitting the PDF and the histogram.
The previous script prints:
Mean= [0.862215] , Std= [0.574926]
Quantiles = [0.476515] [1.07994]
We can plot the PDF with the script:
import openturns.viewer as otv
graph = distribution.drawPDF()
otv.View(graph)
which plots:
Upvotes: 1