Reputation: 11
first off all i wanted to say that i am not so familar with statistics would like to apologize in advance if I have misrepresented or interpreted facts.
I have continous data x and i wanted to choose a distribution that fits to the data.
I used fitdistrplus in R and from the Cullen and Frey graph it seems that my data fits close to a logistic distribution.
Furthermore i used the software Easy Fit with the Result of the Summary of the Goodness of Fit that the data fits close to a lognormal(3P) distribution (Kolmogorv Smirnov Test and Chi Squared Test were ranked on the 1st place for the Lognormal(3P)).
With the given parameters for the 3-parameter lognormal distribution (σ = 0.03676, μ = 4.1258, γ = -61.826) from Easy Fit i wanted to plot the probabilty density function and the cumulative density function in R.
I tried to the function dlnorm3(x,shape=1,scale=1,thres=0,log=FALSE) and plnorm3(q,shape=1,scale=1,thres=0,lower.tail=TRUE,log.p=FALSE) from the package ‘FAdist’.
I interpreted the 3-parameter (σ = 0.03676, μ = 4.1258, γ = -61.826) as (shape=sdlog= σ = 0.03676, scale=meanlog= μ = 4.1258,threshold=γ = -61.826).
In R i got the following probabilty density function plot with the 3-parameter (σ = 0.03676, μ = 4.1258, γ = -61.826)
dat1 <- read.xlsx2('Tab.xlsx',"Sheet1")
dat1$x <- as.numeric(dat1$x)
str(dat1)
hist(dat1$x)
x= dat1$x
# > x
# [1] 2 1 -1 -7 1 -2 -2 -1 0 0 2 -1 -1 -2 -2 3 1 -1 3 -6 -1 1 -2 2 -2 4 0 -1 5 -2 3 0 0 -4 1 4 -2 -1 -1 -2
# [41] -4 0 2 1 2 0 3 0 3 1 1 -4 1 -2 2 -2 -1 4 2 0 -2 -2 -2 0 1 2 2 2 0 -3 0 1 -3 4 3 0 3 4 -2 0
# [81] -6 -2 1 3 -4 -1 -2 -2 1 0 3 2 -3 -3 -4 1 0 -1 -1 1 0 0 3 1 2 0 -1 0 1 1 0 2 -3 2 -1 2 -1 -4 -1 -2
# [121] 1 1 3 0 4 0 -1 2 1 -1 -3 -2 -3 0 0 -1 0 0 -2 -6 -2 -2 3 -1 5 4 -2 1 -1 -1 -1 -4 0 -2 0 1 -1 -1 -1 4
# [161] 1 0 5 -4 -1 0 -1 1 1 1 0 2 3 -2 -4 -2 1 -1 0 -1 3 1 1 -1 -1 -1 6 -1 -4 -3 0 -1 -2 -6 -1 1 -1 0 3 -1
# [201] -1 -2 0 2 -3 -1 1 -2 2 3 0 4 1 -3 1 -2 1 -4 0 -1 3 0 1 0 2 -3 2 1 3 2 -3 4 1 0 2 0 -1 1 0 2
# [241] 3 3 3 4 1 2 -3 1 -6 -3 6 3 2 -3 3 -1 0 0 1 0 4 3 1 5 1 -3 1 0 -2 -1 0 9 1 4 -1 -2 -1 2 -4 0
# [281] -1 -4 -2 -2 0 1 -1 0 3 -2 -4 2 -2 0 4 -2 -4 2 -2 -1 -8 0 6 -1 -3 2 -2 -1 2 -1 0 -2 2 -3 4 -2 1 2 1 2
# [321] -1 1 -3 -1 0 5 5 -2 -1 1 0 -1 1 1 3 3 2 -1 3 -2 -1 4 1 6 -1 1 7 2 -1 1 0 2 2 1 1 4 -2 -1 2 0
# [361] 0 -2 3 1 0 0 3 1 3 0 0 2 -3 0 -3 5 -1 1 2 -1 0 0 2 -2 1 -3 0 2 -7 1 -3 3 -1 3 3 -1 -1 6 -1 3
# [401] 0 -1 -3 0 2 2 -5 1 -1 0 -1 2 2 -1 0 0 1 -1 2 0 -1 0 -1 -1 -1 1 0 -1 -3 5 0 4 -4 -2 -1 10 3 0 1 -1
# [441] 2 0 3 -2 -6 0 1 1 -1 1 1 1 1 2 -1 0 0 -1 1 1 2 0 0 0 1 1 1 1 0 -3 0 4 0 0 1 0 5 -1 3 -2
# [481] 0 -2 -2 -1 -1 0 0 2 1 3 -2 -3 1 -1 -2 0 5 -1 4 -1 -2 0 0 1 -1 1 -2 2 0 -1 -1 0 -2 -3 -1 -1 2 -3 -1 2
# [521] 1 2 -3 -2 0 0 0 -2 -3 -1 -1 -1 -2 0 -1 -1 0 0 -1 0 3 0 1 -1 -1 -2 1 0 -1 2 -4 2 1 -1 2 3 0 -1 0 0
# [561] 0 0 3 3 -1 0 -2 -4 3 3 1 0 -3 -4 0 0 2 4 4 0 -3 -1 0 0 2 0 -2 0 3 0 1 2 0 0 3 0 1 0 -4 0
# [601] 2 3 1 1 0 2 1 4 3 3 -2 -2 -1 -1 1 1 1 -1 -1 0 3 3 -1 0 2 4 1 1 1 -1 1 0 -5 1 1 1 0 -2 -2 -1
# [641] 0 0 -2 1 0 2 2 -6 6 -3 0 -6 3 1 0 2 1 -2 -5 1 0 3 -1 2 0 2 1 1 2 -4 2 -1 2 0 -2 1 -1 -2 0 -2
# [681] -1 0 5 1 0 -2 -3 0
library(fitdistrplus)
library(logspline)
## Step (1) Plot
descdist( data = x , discrete = FALSE)
descdist(data = x, discrete = FALSE, boot=1000)
# Cullen and Frey graph: Theoretical distribution "logistic"
## Step (2) Fit
fitdist(x,"logis")
max(x)
logis_ = fitdist(x, "logis")
normal_ = fitdist(x, "norm")
plot(logis_)
plot(normal_)
## Step (3) Estimate parameters
print(logis_)
print(normal_)
summary(logis_)
summary(normal_)
##################################################################################################################
#Plot the probabilty density function of the 3 parameter log-normal distribution with parameters from software Easy Fit
# 3-parameter from Software Easy Fit (σ = 0.03676, μ = 4.1258, γ = -61.826)
x1 <- seq(from=-61, to=-60 ,by=0.1)
y <- dlnorm3(x1, 0.03676, 4.1258,-61.826)
# Plot the log-normal distribution
plot(x1,y,type = "l", xlab = "x", ylab = "Density", main = "Log-Normal Distribution")
The plot looks a bit strange to me and i am not sure if I am on the right path.My aim is to use the cumulative density function to calculate the probabilty for different data x. But I am not sure if the 3-parameter lognormal distribution is the best fit for my data.
Thanks for your help in advance.
Data in dput
format.
x <- c(2L, 1L, -1L, -7L, 1L, -2L, -2L, -1L, 0L, 0L, 2L, -1L, -1L,
-2L, -2L, 3L, 1L, -1L, 3L, -6L, -1L, 1L, -2L, 2L, -2L, 4L, 0L,
-1L, 5L, -2L, 3L, 0L, 0L, -4L, 1L, 4L, -2L, -1L, -1L, -2L, -4L,
0L, 2L, 1L, 2L, 0L, 3L, 0L, 3L, 1L, 1L, -4L, 1L, -2L, 2L, -2L,
-1L, 4L, 2L, 0L, -2L, -2L, -2L, 0L, 1L, 2L, 2L, 2L, 0L, -3L,
0L, 1L, -3L, 4L, 3L, 0L, 3L, 4L, -2L, 0L, -6L, -2L, 1L, 3L, -4L,
-1L, -2L, -2L, 1L, 0L, 3L, 2L, -3L, -3L, -4L, 1L, 0L, -1L, -1L,
1L, 0L, 0L, 3L, 1L, 2L, 0L, -1L, 0L, 1L, 1L, 0L, 2L, -3L, 2L,
-1L, 2L, -1L, -4L, -1L, -2L, 1L, 1L, 3L, 0L, 4L, 0L, -1L, 2L,
1L, -1L, -3L, -2L, -3L, 0L, 0L, -1L, 0L, 0L, -2L, -6L, -2L, -2L,
3L, -1L, 5L, 4L, -2L, 1L, -1L, -1L, -1L, -4L, 0L, -2L, 0L, 1L,
-1L, -1L, -1L, 4L, 1L, 0L, 5L, -4L, -1L, 0L, -1L, 1L, 1L, 1L,
0L, 2L, 3L, -2L, -4L, -2L, 1L, -1L, 0L, -1L, 3L, 1L, 1L, -1L,
-1L, -1L, 6L, -1L, -4L, -3L, 0L, -1L, -2L, -6L, -1L, 1L, -1L,
0L, 3L, -1L, -1L, -2L, 0L, 2L, -3L, -1L, 1L, -2L, 2L, 3L, 0L,
4L, 1L, -3L, 1L, -2L, 1L, -4L, 0L, -1L, 3L, 0L, 1L, 0L, 2L, -3L,
2L, 1L, 3L, 2L, -3L, 4L, 1L, 0L, 2L, 0L, -1L, 1L, 0L, 2L, 3L,
3L, 3L, 4L, 1L, 2L, -3L, 1L, -6L, -3L, 6L, 3L, 2L, -3L, 3L, -1L,
0L, 0L, 1L, 0L, 4L, 3L, 1L, 5L, 1L, -3L, 1L, 0L, -2L, -1L, 0L,
9L, 1L, 4L, -1L, -2L, -1L, 2L, -4L, 0L, -1L, -4L, -2L, -2L, 0L,
1L, -1L, 0L, 3L, -2L, -4L, 2L, -2L, 0L, 4L, -2L, -4L, 2L, -2L,
-1L, -8L, 0L, 6L, -1L, -3L, 2L, -2L, -1L, 2L, -1L, 0L, -2L, 2L,
-3L, 4L, -2L, 1L, 2L, 1L, 2L, -1L, 1L, -3L, -1L, 0L, 5L, 5L,
-2L, -1L, 1L, 0L, -1L, 1L, 1L, 3L, 3L, 2L, -1L, 3L, -2L, -1L,
4L, 1L, 6L, -1L, 1L, 7L, 2L, -1L, 1L, 0L, 2L, 2L, 1L, 1L, 4L,
-2L, -1L, 2L, 0L, 0L, -2L, 3L, 1L, 0L, 0L, 3L, 1L, 3L, 0L, 0L,
2L, -3L, 0L, -3L, 5L, -1L, 1L, 2L, -1L, 0L, 0L, 2L, -2L, 1L,
-3L, 0L, 2L, -7L, 1L, -3L, 3L, -1L, 3L, 3L, -1L, -1L, 6L, -1L,
3L, 0L, -1L, -3L, 0L, 2L, 2L, -5L, 1L, -1L, 0L, -1L, 2L, 2L,
-1L, 0L, 0L, 1L, -1L, 2L, 0L, -1L, 0L, -1L, -1L, -1L, 1L, 0L,
-1L, -3L, 5L, 0L, 4L, -4L, -2L, -1L, 10L, 3L, 0L, 1L, -1L, 2L,
0L, 3L, -2L, -6L, 0L, 1L, 1L, -1L, 1L, 1L, 1L, 1L, 2L, -1L, 0L,
0L, -1L, 1L, 1L, 2L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, -3L, 0L,
4L, 0L, 0L, 1L, 0L, 5L, -1L, 3L, -2L, 0L, -2L, -2L, -1L, -1L,
0L, 0L, 2L, 1L, 3L, -2L, -3L, 1L, -1L, -2L, 0L, 5L, -1L, 4L,
-1L, -2L, 0L, 0L, 1L, -1L, 1L, -2L, 2L, 0L, -1L, -1L, 0L, -2L,
-3L, -1L, -1L, 2L, -3L, -1L, 2L, 1L, 2L, -3L, -2L, 0L, 0L, 0L,
-2L, -3L, -1L, -1L, -1L, -2L, 0L, -1L, -1L, 0L, 0L, -1L, 0L,
3L, 0L, 1L, -1L, -1L, -2L, 1L, 0L, -1L, 2L, -4L, 2L, 1L, -1L,
2L, 3L, 0L, -1L, 0L, 0L, 0L, 0L, 3L, 3L, -1L, 0L, -2L, -4L, 3L,
3L, 1L, 0L, -3L, -4L, 0L, 0L, 2L, 4L, 4L, 0L, -3L, -1L, 0L, 0L,
2L, 0L, -2L, 0L, 3L, 0L, 1L, 2L, 0L, 0L, 3L, 0L, 1L, 0L, -4L,
0L, 2L, 3L, 1L, 1L, 0L, 2L, 1L, 4L, 3L, 3L, -2L, -2L, -1L, -1L,
1L, 1L, 1L, -1L, -1L, 0L, 3L, 3L, -1L, 0L, 2L, 4L, 1L, 1L, 1L,
-1L, 1L, 0L, -5L, 1L, 1L, 1L, 0L, -2L, -2L, -1L, 0L, 0L, -2L,
1L, 0L, 2L, 2L, -6L, 6L, -3L, 0L, -6L, 3L, 1L, 0L, 2L, 1L, -2L,
-5L, 1L, 0L, 3L, -1L, 2L, 0L, 2L, 1L, 1L, 2L, -4L, 2L, -1L, 2L,
0L, -2L, 1L, -1L, -2L, 0L, -2L, -1L, 0L, 5L, 1L, 0L, -2L, -3L, 0L)
Upvotes: 0
Views: 51