Crimc
Crimc

Reputation: 306

How to add confidence intervals around predicted prior probabilities of latent class membership in a LCA model fitted with {poLCA}?

I want to add confidence intervals to the predicted prior probabilities from a latent class analysis (LCA) model with covariates.

I have prepared an example built upon one provided by the authors of the package {poLCA} (link).

The relevant piece for calculating the confidence intervals should be the covariance matrix in the object with the fitted model (nes.3cov$coeff.V in this example).

The solution should add the lower and upper bounds from the confidence intervals to the object pdata, so they can be included as ribbons in the visualization.

library(poLCA)
library(tidyr)
library(dplyr)
library(ggplot2)

data(election)

## Fit LCA model with covariates using 3 classes
f.3cov <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
                MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~PARTY*AGE
nes.3cov <- poLCA(f.3cov,election,nclass=3)          # log-likelihood: -16135.39 

probs.start <- poLCA.reorder(nes.3cov$probs.start,
                             order(nes.3cov$P,decreasing=TRUE))
nes.3cov <- poLCA(f.3cov,election,nclass=3,probs.start=probs.start)

# Create profiles for predicted prior probabilities
## Strong dems and strong reps of different age
pred.profs <- rbind(cbind(1,1,c(18:80),(c(18:80)*1)),
                 cbind(1,7,c(18:80),(c(18:80)*7)))
exb.pred.profs <- exp(pred.profs %*% nes.3cov$coeff)

# Calculate predicted prior probabilities
pred.profs <- cbind(1, exb.pred.profs)/(1 + rowSums(exb.pred.profs))
pdata <- cbind(data.frame(pred.profs),
                    age = rep(18:80, 2),
                    party = rep(c("Strong Democrat", "Strong Republican"), 
                                each = 63))
# Prepare data for plot
pdata <- pdata |> 
  pivot_longer(cols = X1:X3, names_to = "affinity", values_to = "pp") |> 
  mutate(affinity = case_when(
    affinity == "X1" ~ "Other",
    affinity == "X2" ~ "Bush affinity",
    affinity == "X3" ~ "Gore affinity"))
  
# Plot predicted prior probabilities
pdata |> 
  ggplot(aes(x = age, y = pp, fill = affinity, linetype = affinity)) +
  geom_line() +
  scale_linetype_manual(values = c("longdash", "dotted", "solid")) +
  facet_wrap(~ party) +
  coord_cartesian(ylim = c(0, 1)) +
  labs(x = "Age", y = "Probability of latent class membership") +
  theme_classic()

Created on 2023-05-17 with reprex v2.0.2

Upvotes: 0

Views: 186

Answers (1)

DaveArmstrong
DaveArmstrong

Reputation: 22034

You could do this with a pseudo-Bayesian approach, treating the coefficients as posterior means and the variance-covariance matrix as the posterior variance-covariance matrix of the coefficients. Then, you take random draws from the multivariate normal distribution with the coefficients and variance-covariance matrix as parameters (I did 2500). For each draw you make the matrix of probabilities. Then, you can find the lower and upper bounds from those 2500 simulations.

Estimate models

library(poLCA)
library(tidyr)
library(dplyr)
library(ggplot2)

data(election)

## Fit LCA model with covariates using 3 classes
f.3cov <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
                MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~PARTY*AGE
nes.3cov <- poLCA(f.3cov,election,nclass=3) 
probs.start <- poLCA.reorder(nes.3cov$probs.start,
                             order(nes.3cov$P,decreasing=TRUE))
nes.3cov <- poLCA(f.3cov,election,nclass=3,probs.start=probs.start)

Calculate probabilties

pred.profs <- rbind(cbind(1,1,c(18:80),(c(18:80)*1)),
                    cbind(1,7,c(18:80),(c(18:80)*7)))
q <- cbind(1, exp(pred.profs %*% nes.3cov$coeff))
p <- prop.table(q, 1)

Do simulation

B <- MASS::mvrnorm(2500, c(nes.3cov$coeff), nes.3cov$coeff.V)
B <- lapply(1:nrow(B), \(i)matrix(B[i, ], ncol=2))
probs <- lapply(B, function(b){
  q <- cbind(1, exp(pred.profs %*% b))
  prop.table(q, 1)
})
probs <- abind::abind(probs, along=3)
lwr <- apply(probs, c(1,2), quantile, .025)
upr <- apply(probs, c(1,2), quantile, .975)
colnames(p) <- paste0("pp_", 1:3)
colnames(lwr) <- paste0("lwr_", 1:3)
colnames(upr) <- paste0("upr_", 1:3)

Combine probabilities, lower and upper bounds and data

pdata <- cbind(data.frame(p),
               data.frame(lwr), 
               data.frame(upr), 
               age = rep(18:80, 2),
               party = rep(c("Strong Democrat", "Strong Republican"), 
                           each = 63))
# Prepare data for plot
pdata <- pdata |> 
  pivot_longer(cols = 1:9, names_sep="_", names_to = c(".value", "affinity")) |> 
  mutate(affinity = case_when(
    affinity == "1" ~ "Other",
    affinity == "2" ~ "Bush affinity",
    affinity == "3" ~ "Gore affinity"))

Make plot

pdata |> 
  ggplot(aes(x = age, y = pp, ymin =lwr, ymax=upr, linetype = affinity)) +
  geom_ribbon(alpha=.2, fill="gray75") + 
  geom_line() +
  scale_linetype_manual(values = c("longdash", "dotted", "solid")) +
  facet_wrap(~ party) +
  coord_cartesian(ylim = c(0, 1)) +
  labs(x = "Age", y = "Probability of latent class membership") +
  theme_classic()

Created on 2023-05-17 with reprex v2.0.2

Upvotes: 1

Related Questions