Hierarchical Dirichlet regression (jags)... overfitting

Question

Good Morning, please I need community help in order to understand some problems that occurred writing this model. I aim at modeling causes of death proportion using as predictors "log_GDP" (Gross domestic product in log scale), and "log_h" (hospital beds per 1,000 people on log scale)

y: 3 columns that are observed proportions of deaths over the years.
x1: "log_GDP" (Gross domestic product in log scale)
x2: "log_h" (hospital beds per 1,000 people in log scale)

As you can see from the estimation result in the last plot, I got a high noise level. Where I worked using just one covariate i.e. log_GDP, I obtained smoothed results

Here the model specification:

Here simulated data:

library(reshape2)
library(tidyverse)
library(ggplot2)
library(runjags)

CIRC <- c(0.3685287, 0.3675516, 0.3567829, 0.3517274, 0.3448940, 0.3391031, 0.3320184, 0.3268640,
          0.3227445, 0.3156360, 0.3138515,0.3084506, 0.3053657, 0.3061224, 0.3051044)

NEOP <- c(0.3602199, 0.3567355, 0.3599409, 0.3591258, 0.3544591, 0.3566269, 0.3510974, 0.3536156,
          0.3532980, 0.3460948, 0.3476183, 0.3475634, 0.3426035, 0.3352433, 0.3266048)

OTHER <-c(0.2712514, 0.2757129, 0.2832762, 0.2891468, 0.3006468, 0.3042701, 0.3168842, 0.3195204, 
          0.3239575, 0.3382691, 0.3385302, 0.3439860, 0.3520308, 0.3586342, 0.3682908)

log_h <- c(1.280934, 1.249902, 1.244155, 1.220830, 1.202972, 1.181727, 1.163151, 1.156881, 1.144223,
       1.141033, 1.124930, 1.115142, 1.088562, 1.075002, 1.061257)

log_GDP <- c(29.89597, 29.95853, 29.99016, 30.02312, 30.06973, 30.13358, 30.19878, 30.25675, 30.30184,
         30.31974, 30.30164, 30.33854, 30.37460, 30.41585, 30.45150)

D <- data.frame(CIRC=CIRC, NEOP=NEOP, OTHER=OTHER,
              log_h=log_h, log_GDP=log_GDP)

cause.y <- as.matrix((data.frame(D[,1],D[,2],D[,3])))
cause.y <-  cause.y/rowSums(cause.y)
mat.x<- D$log_GDP  
mat.x2 <- D$log_h
n <- 15

Jags Model


dirlichet.model = "
model {
#setup priors for each species
for(j in 1:N.spp){
m0[j] ~ dnorm(0, 1.0E-3) #intercept prior
m1[j] ~ dnorm(0, 1.0E-3) #      mat.x prior
m2[j] ~ dnorm(0, 1.0E-3)
}

#implement dirlichet
for(i in 1:N){
y[i,1:N.spp] ~ ddirch(a0[i,1:N.spp])


for(j in 1:N.spp){
log(a0[i,j]) <- m0[j] + m1[j] * mat.x[i]+ m2[j] * mat.x2[i] # m0 = intercept; m1= coeff log_GDP; m2= coeff log_h
}

}} #close model loop.
"

jags.data <- list(y = cause.y,mat.x= mat.x,mat.x2= mat.x2, N = nrow(cause.y), N.spp = ncol(cause.y))
jags.out <- run.jags(dirlichet.model,
                     data=jags.data,
                     adapt = 5000,
                     burnin = 5000,
                     sample = 10000,
                     n.chains=3,
                     monitor=c('m0','m1','m2'))
out <- summary(jags.out)
head(out)

Gather coefficient and I make estimation of proportions

coeff <- out[c(1,2,3,4,5,6,7,8,9),4]

coef1 <- out[c(1,4,7),4] #coeff (interc and slope) caus 1
coef2 <- out[c(2,5,8),4] #coeff (interc and slope) caus 2
coef3 <- out[c(3,6,9),4] #coeff (interc and slope) caus 3
pred <- as.matrix(cbind(exp(coef1[1]+coef1[2]*mat.x+coef1[3]*mat.x2),
                        exp(coef2[1]+coef2[2]*mat.x+coef2[3]*mat.x2),
                        exp(coef3[1]+coef3[2]*mat.x+coef3[3]*mat.x2)))
pred <- pred / rowSums(pred)

Predicted and Obs. values DB

Obs <- data.frame(Circ=cause.y[,1],
                  Neop=cause.y[,2],
                  Other=cause.y[,3],
                  log_GDP=mat.x,
                  log_h=mat.x2)

Obs$model <- "Obs"

Pred <- data.frame(Circ=pred[,1],
                   Neop=pred[,2],
                   Other=pred[,3],
                   log_GDP=mat.x,
                   log_h=mat.x2)

Pred$model <- "Pred"

tot60<-as.data.frame(rbind(Obs,Pred))
tot <- melt(tot60,id=c("log_GDP","log_h","model"))
tot$variable <- as.factor(tot$variable)

Plot

tot %>%filter(model=="Obs") %>%  ggplot(aes(log_GDP,value))+geom_point()+
  geom_line(data = tot %>%
              filter(model=="Pred"))+facet_wrap(.~variable,scales = "free")

Hierarchical Dirichlet regression (jags)... overfitting

Answers (1)

Related Questions