Reputation: 25

Displaying area under the curve between two z values in standard normal distribution

I am trying to draw a sketch something like this: where I can denote the area under curve left/right of the z score value in standard normal distribution I am trying to make a similar graph but this time want to display all areas in -2.25<Z<1.77 with two lines perhaps? I have searched so many places but cannot seem to find an answer and mosaic::xpnorm() seems to be the closest one but I don't believe it has an option for that.

library(mosaic)
xpnorm(1.77, mean=0, sd=1)

Upvotes: 0

Answers (2)

tmontserrat

Reputation: 101

I'm not sure if I've completely understood what you want. If you are looking for a way to shade the area under a normal curve between two z scores, then you could use a function I wrote, expanding a function from https://gist.github.com/jrnold/6799152.

You can use the function normal_prob_area_plot_p_value() to shade the area beyond your Z scores:

normal_prob_area_plot_p_val <- function(lb, ub, 
                                        mean = 0, sd = 1, 
                                        limits = c(mean - 3 * sd, mean + 3 * sd),
                                        type = "lower_upper_bounds") {
  if (type == "lower_upper_bounds") {
    # X axis
    x <- seq(limits[1], 
             limits[2], 
             length.out = 100)
    # Define the lower and upper bound
    xmin <- max(lb, limits[1])
    xmax <- min(ub, limits[2])
    # Define the lower tail area x axis
    areax1 <- seq(limits[1], 
                  xmin, 
                  length.out = 100)
    # Define the upper tail area x axis
    areax2 <- seq(limits[2], xmax, length.out = 100)
    # Calculate densities for the areas
    area1 <- data.frame(x = areax1, 
                        ymin = 0, 
                        ymax = dnorm(areax1, mean, sd))
    area2 <- data.frame(x = areax2, 
                        ymin = 0, 
                        ymax = dnorm(areax2, mean, sd))
    return(ggplot()
           # Normal pdf representation
           + geom_line(data.frame(x = x,
                                  y = dnorm(x, mean, sd)),
                       mapping = aes(x = x, y = y))
           # Lower tail area
           + geom_ribbon(data = area1, 
                         mapping = aes(x = x, 
                                       ymin = 
                                         ymin, 
                                       ymax = 
                                         ymax), 
                         fill = "dodgerblue", alpha=0.4)
           # Upper tail area
           + geom_ribbon(data = area2, 
                         mapping = aes(x = x, 
                                       ymin = ymin, 
                                       ymax = ymax), 
                         fill = "dodgerblue", alpha=0.4)
           # X axis
           + scale_x_continuous(limits = limits) 
           + labs(y="Density", x="z-score") +
             theme_bw())
    
  } 
  # The same than before, but now
  # only matters the lower tail
  else if (type == "lower_bound") {
    x <- seq(limits[1], 
             limits[2], 
             length.out = 100)
    xmin <- max(lb, limits[1])
    xmax <- min(ub, limits[2])
    areax1 <- seq(limits[1], 
                  xmin, 
                  length.out = 100)
    areax2 <- seq(limits[2], 
                  xmax, 
                  length.out = 100)
    area1 <- data.frame(x = areax1, 
                        ymin = 0, 
                        ymax = dnorm(areax1, mean, sd))
    area2 <- data.frame(x = areax2, 
                        ymin = 0, 
                        ymax = dnorm(areax2, mean, sd))
    return(ggplot()
           + geom_line(data.frame(x = x, 
                                  y = dnorm(x, mean, sd)),
                       mapping = aes(x = x, 
                                     y = y))
           + geom_ribbon(data = area1, 
                         mapping = aes(x = x, 
                                       ymin = ymin, 
                                       ymax = ymax), 
                         fill = "dodgerblue", alpha=0.4)
           + scale_x_continuous(limits = limits) 
           + labs(y="Density", x="z-score") +
             theme_bw())
  } 
  # Only matters the upper tail
  else if (type == "upper_bound") {
    x <- seq(limits[1], 
             limits[2], 
             length.out = 100)
    xmin <- max(lb, limits[1])
    xmax <- min(ub, limits[2])
    areax1 <- seq(limits[1], 
                  xmin, 
                  length.out = 100)
    areax2 <- seq(limits[2], 
                  xmax, 
                  length.out = 100)
    area1 <- data.frame(x = areax1, 
                        ymin = 0, 
                        ymax = dnorm(areax1, mean, sd))
    area2 <- data.frame(x = areax2, 
                        ymin = 0, 
                        ymax = dnorm(areax2, mean, sd))
    return(ggplot()
           + geom_line(data.frame(x = x, 
                                  y = dnorm(x, mean, sd)),
                       mapping = aes(x = x, 
                                     y = y))
           + geom_ribbon(data = area2, 
                         mapping = aes(x = x, 
                                       ymin = ymin, 
                                       ymax = ymax), 
                         fill = "dodgerblue", alpha=0.4)
           + scale_x_continuous(limits = limits)
           + labs(y="Density", x="z-score") +
             theme_bw())
  }
  # Shade the inner zone between the bounds
  else if (type=="inner") {
    # X axis
    x <- seq(limits[1], 
             limits[2], 
             length.out = 100)
    # Define the lower and upper bound
    xmin <- lb
    xmax <- ub
    # Define the area to be shaded
    areax <- seq(xmin, 
                  xmax, 
                  length.out = 100)
    # Calculate densities for the areas
    area <- data.frame(x = areax, 
                        ymin = 0, 
                        ymax = dnorm(areax, mean, sd))
    return(ggplot()
           # Normal pdf representation
           + geom_line(data.frame(x = x,
                                  y = dnorm(x, mean, sd)),
                       mapping = aes(x = x, y = y))
           
           # Area
           + geom_ribbon(data = area, 
                         mapping = aes(x = x, 
                                       ymin = ymin, 
                                       ymax = ymax), 
                         fill = "dodgerblue", alpha=0.4)
           # X axis
           + scale_x_continuous(limits = limits) 
           + labs(y="Density", x="z-score") +
             theme_bw())
    
  } 
}

In your case, the usage would be:

normal_prob_area_plot_p_val(-2.25, 1.77)

And that's what you would get:

However, given this function, it's easy to shift the shaded area. If you actually want to shade the area between the z scores, then you can use:

normal_prob_area_plot_p_val(-2.25, 1.77, type="inner")

You would get:

Upvotes: 0

jay.sf

Reputation: 73272

We can use polygon() to color areas under a curve. polygon() needs a two-dimensional coordinates matrix of the points of the polygon line where it draws straight lines between these points. Actually their number is infinity, but the function will smooth sufficiently above a certain number, i.e. we want a sequence of length, say, 200. For the coordinates matrix we want the x values and their corresponding y=dnorm(x) values.

# given z values
z.rg <- c(-2.25, 1.77)

# define cuts and canonical z-scores
cuts <- sort(sort(c(z.rg, -3.5, 3.5)))  # c(-3.5, 3.5) will be the xlim of the plot
x.sq <- seq(cuts[1], cuts[4], len=200)
alpha <- c(.001, .01, .05)
z <- c(qnorm(alpha/2), 0, abs(qnorm(alpha/2)))

# plot
plot(x.sq, dnorm(x.sq), type="l", xlab="z-score", ylab="density", 
     main="Standard Normal Distribution", xaxt="n")
z <- c(qnorm(alpha/2), 0, abs(qnorm(alpha/2)))
axis(1, z, round(z, 2))
abline(h=0)
# random middle part (optional)
polygon(c(cuts[2], cuts[2], x.sq[x.sq > cuts[2] & x.sq < cuts[3]], cuts[3], cuts[3]),
        c(0, dnorm(cuts[2]), dnorm(x.sq[x.sq > cuts[2] & x.sq < cuts[3]]), dnorm(cuts[3]), 0),
        col="lightgrey", border=1)
# left tail
polygon(c(cuts[3], cuts[3], x.sq[x.sq > cuts[3]], cuts[4], cuts[4]),
        c(0, dnorm(cuts[3]), dnorm(x.sq[x.sq > cuts[3]]), dnorm(cuts[4]), 0),
        col="#4da6ff", border=1)
# right tail
polygon(c(cuts[1], cuts[1], x.sq[x.sq < cuts[2]], cuts[2], cuts[2]),
        c(0, dnorm(cuts[1]), dnorm(x.sq[x.sq < cuts[2]]), dnorm(cuts[2]), 0),
        col="#4da6ff", border=4)
# labels
arrows(z.rg, rep(dnorm(z.rg), length(z.rg)), z.rg, rep(dnorm(0)*.666, length(z.rg)),
       length=0, lty=2, col=4)
sapply(z.rg, function(x) text(x, dnorm(0)*.666 + .02, bquote(italic("z=")~.(x)), col=4))

Upvotes: 1

Displaying area under the curve between two z values in standard normal distribution

Answers (2)

Related Questions