Carmen González
Carmen González

Reputation: 169

Calculation of the first and third quartile

I'm trying to calculate the mean, standard deviation, median, first quartile and third quartile of the lognormal distribution that I fit to my histogram. So far I've only been able to calculate the mean, standard deviation and median, based on the formulas I found on Wikipedia, but I don't know how to calculate the first quartile and the third quartile. How could I calculate in Python the first quartile and the third quartile, based on the lognormal distribution?

import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import lognorm
import matplotlib.ticker as tkr
import scipy, pylab
import locale
import matplotlib.gridspec as gridspec
#from scipy.stats import lognorm
locale.setlocale(locale.LC_NUMERIC, "de_DE")
plt.rcParams['axes.formatter.use_locale'] = True

from scipy.optimize import curve_fit

x=np.asarray([0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 1.00, 1.10, 1.20, 1.30, 1.40,
   1.50, 1.60, 1.70, 1.80, 1.90, 2.00, 2.10, 2.20, 2.30, 2.40, 2.50, 2.60, 2.70, 2.80,
   2.90, 3.00, 3.10, 3.20, 3.30, 3.40, 3.50, 3.60, 3.70, 3.80, 3.90, 4.00, 4.10, 4.20,
   4.30, 4.40, 4.50, 4.60, 4.70, 4.80, 4.90, 5.00, 5.10, 5.20, 5.30, 5.40, 5.50, 5.60,
   5.70, 5.80, 5.90, 6.00, 6.10, 6.20, 6.30, 6.40, 6.50, 6.60, 6.70, 6.80, 6.90, 7.00,
   7.10, 7.20, 7.30, 7.40, 7.50, 7.60, 7.70, 7.80, 7.90, 8.00], dtype=np.float64)

frequencia_relativa=np.asarray([0.000, 0.000, 0.038, 0.097, 0.091, 0.118, 0.070, 0.124, 0.097, 0.059, 0.059, 0.048, 0.054, 0.043,
                     0.032, 0.005, 0.027, 0.016, 0.005, 0.000, 0.005, 0.000, 0.005, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000,
                     0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.000, 0.005, 0.000, 0.000], dtype=np.float64)

plt.rcParams["figure.figsize"] = [18,8]
f, (ax,ax2) = plt.subplots(1,2, sharex=True, sharey=True, facecolor='w')

def fun(y, mu, sigma):
    return 1.0/(np.sqrt(2.0*np.pi)*sigma*y)*np.exp(-(np.log(y)-mu)**2/(2.0*sigma*sigma))

step = 0.1

xx = x-step*0.99

nrm = np.sum(frequencia_relativa*step) # normalization integral
print(nrm)

frequencia_relativa /= nrm # normalize frequences histogram

print(np.sum(frequencia_relativa*step)) # check normalizatio

params, extras = curve_fit(fun, xx, frequencia_relativa)

print(params)

axes = f.add_subplot(111, frameon=False)

ax.spines['top'].set_color('none')
ax2.spines['top'].set_color('none')
gs = gridspec.GridSpec(1,2,width_ratios=[3,1])
ax = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
ax.axvspan(0.243, 1.481, label='Média $\pm$ desvio padrão', ymin=0.0, ymax=1.0, alpha=0.2, color='Plum') #lognormal distribution
ax.yaxis.tick_left()
ax.xaxis.tick_bottom()
ax2.xaxis.tick_bottom()
ax.tick_params(labeltop='off') # don't put tick labels at the top
ax2.yaxis.tick_right()
ax.bar(xx, height=frequencia_relativa, label='Frequência relativa normalizada do tamanho triangular', alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")
ax2.bar(xx, height=frequencia_relativa, alpha=0.5, width=0.1, align='edge', edgecolor='black', hatch="///")

xxx = np.linspace (0.001, 8, 1000)
ax.plot(xxx, fun(xxx, params[0], params[1]), "r-", label='Distribuição log-normal', linewidth=3)
ax2.plot(xxx, fun(xxx, params[0], params[1]), "r-", linewidth=3)

ax.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'major', labelsize = 18)
ax2.tick_params(axis = 'both', which = 'minor', labelsize = 18)
ax2.xaxis.set_ticks(np.arange(7.0, 8.5, 0.5))
ax2.xaxis.set_major_formatter(tkr.FormatStrFormatter('%0.1f'))

plt.subplots_adjust(wspace=0.04)
ax.set_xlim(0,2.5)
ax.set_ylim(0,1.4)
ax2.set_xlim(7.0,8.0)
def func(x, pos):  # formatter function takes tick label and tick position
    s = str(x)
    ind = s.index('.')
    return s[:ind] + ',' + s[ind+1:]   # change dot to comma
x_format = tkr.FuncFormatter(func)
ax.xaxis.set_major_formatter(x_format)
ax2.xaxis.set_major_formatter(x_format)
# hide the spines between ax and ax2
ax.spines['right'].set_visible(False)
ax2.spines['left'].set_visible(False)

d = .015 # how big to make the diagonal lines in axes coordinates
# arguments to pass plot, just so we don't keep repeating them
kwargs = dict(transform=ax.transAxes, color='k', clip_on=False)
ax.plot((1-d/3,1+d/3), (-d,+d), **kwargs)
ax.plot((1-d/3,1+d/3),(1-d,1+d), **kwargs)

kwargs.update(transform=ax2.transAxes)  # switch to the bottom axes
ax2.plot((-d,+d), (1-d,1+d), **kwargs)
ax2.plot((-d,+d), (-d,+d), **kwargs)
ax2.tick_params(labelright=False)
ax.tick_params(labeltop=False)
ax.tick_params(axis='x', which='major', pad=15)
ax2.tick_params(axis='x', which='major', pad=15)
ax2.set_yticks([])
f.text(0.5, -0.04, 'Tamanho lateral do triângulo ($\mu m$)', ha='center', fontsize=22)
f.text(-0.02, 0.5, 'Frequência relativa normalizada', va='center', rotation='vertical', fontsize=22)

ax.axvline(0.862, color='k', linestyle='-', linewidth=1.3) #lognormal distribution
ax.axvline(0.243, color='k', linestyle='--', linewidth=1)  #lognormal distribution
ax.axvline(1.481, color='k', linestyle='--', linewidth=1)  #lognormal distribution
f.legend(loc=9, 
          bbox_to_anchor=(.77,.99),
          labelspacing=1.5,
          numpoints=1,
          columnspacing=0.2,
          ncol=1, fontsize=18,
          frameon=False)

ax.text(0.86*0.63, 1.4*0.92, 'tamanho = (0,86 $\pm$ 0,62) $\mu m$', fontsize=20) #Excel
mu = params[0]
sigma = params[1]

# calculate mean value
print(np.exp(mu + sigma*sigma/2.0))

# calculate stddev
print(np.sqrt((np.exp(sigma*sigma)-1)*np.exp(mu+sigma*sigma/2.0)))

# calculate median value
print(np.exp(mu))

f.tight_layout()
#plt.show()
plt.savefig('output.png', dpi=500, bbox_inches='tight')

Graph: enter image description here

Upvotes: 1

Views: 979

Answers (2)

Severin Pappadeux
Severin Pappadeux

Reputation: 20080

Welcome back

Ok, for log-normal distribution one could compute quantiles using expression in the page in the wiki

import numpy as np
from scipy import special

SQRT2 = 1.41421356237

def LNquantile(μ, σ, p):
    """
    Compute quantile function for log-normal distribution
    """
    nq = SQRT2 * special.erfinv(2.0*p - 1.0) # N(0,1) normal quantile
    t = μ +  σ * nq # N(μ, σ) quantile
    return np.exp(t) # LN(μ, σ) quantile

μ = 0.0
σ = 1.0

q1 = LNquantile(μ, σ, 0.25)
q3 = LNquantile(μ, σ, 0.75)

print(q1)
print(q3)

prints reasonable

0.5094162838640294
1.9630310841553595

You could put any values and compare with online calculator at https://planetcalc.com/7263/

Upvotes: 1

Michael Baudin
Michael Baudin

Reputation: 1151

Given a log-normal distribution, we want to compute its quantiles. Furthermore, the parameters of the log-normal distribution are estimated from data.

The script below uses OpenTURNS to create the distribution using the LogNormal class. It takes as inputs arguments the mean and standard deviation of the underlying normal distribution. Then we can use the computeQuantile()method to compute the quantiles.

import openturns as ot

distribution = ot.LogNormal(-0.33217492, 0.6065058)
mean = distribution.getMean()
std = distribution.getStandardDeviation()
print("Mean=", mean, ", Std=", std)
q25 = distribution.computeQuantile(0.25)
q75 = distribution.computeQuantile(0.75)
print("Quantiles = ", q25, q75)

Notice that the constant values in the previous script can be replaced by whatever estimates, e.g. with your estimation procedure based on fitting the PDF and the histogram.

The previous script prints:

Mean= [0.862215] , Std= [0.574926]
Quantiles =  [0.476515] [1.07994]

We can plot the PDF with the script:

import openturns.viewer as otv
graph = distribution.drawPDF()
otv.View(graph)

which plots:

LogNormal

Upvotes: 1

Related Questions