bias-corrected percentile bootstrap

Question

I am trying to run a bias-corrected percentile bootstrap for 3 different effect sizes and ran across some code from a predecessor. While this code functions, the run time takes FAR too long. It takes about 3 days to do each of the 3 different effect sizes.

It also shows each of the 1000 iterations which I do not need. Ideally, I would like to optimize run time maximally. I would also like all 3 of the outputs on the console or into a word/excel document rather than having to run them one at time.

I know the code is a bit clunky, but some help would be appreciated.

I am fairly new to R, and was looking for some advice on how to proceed appropriately. The code is presented below:

library(pscl)
library(boot)

# RNG MODULE FOR TWO_PART HURDLE MODEL

gen.hurdle = function(n, a, b1, b2, c1, c2, i0, i1, i2){

  x = round(rnorm(n),3)
  e = rnorm(n)
  m = round(i0 + a*x + e, 3)

  lambda = exp(i1 + b1*m + c1*x)                       # PUT REGRESSION TERMS FOR THE CONTINUUM PART HERE; KEEP exp()
  ystar = qpois(runif(n, dpois(0, lambda), 1), lambda) # Zero-TRUNCATED POISSON DIST.; THE CONTINUUM PART

  z = i2 + b2*m  + c2*x                                # PUT REGRESSION TERMS FOR THE BINARY    PART HERE
  z_p = exp(z) / (1+exp(z))                            # p(1) = 1-p(0)
  tstar = rbinom(n, 1, z_p)                            # BINOMIAL DIST.         ; THE BINARY    PART

  y= ystar*tstar                                       # TWO-PART COUNT OUTCOME

  return(cbind(x,m,y,z,z_p,tstar))
}


##################################################################################################
# MODEL ##########################################################################################
##################################################################################################

# i=1  ###################################

#small effect
seed=51; n=50  ;a=.18; b=.16; c=.25; i=1
seed=53; n=100 ;a=.18; b=.16; c=.25; i=1
seed=55; n=200 ;a=.18; b=.16; c=.25; i=1
seed=57; n=300 ;a=.18; b=.16; c=.25; i=1
seed=58; n=500 ;a=.18; b=.16; c=.25; i=1
seed=59; n=1000;a=.18; b=.16; c=.25; i=1

#medium effect
seed=61; n=50  ;a=.31; b=.35; c=.25; i=1
seed=63; n=100 ;a=.31; b=.35; c=.25; i=1
seed=65; n=200 ;a=.31; b=.35; c=.25; i=1
seed=67; n=300 ;a=.31; b=.35; c=.25; i=1
seed=68; n=500 ;a=.31; b=.35; c=.25; i=1
seed=69; n=1000;a=.31; b=.35; c=.25; i=1

#large effect
seed=81; n=50  ;a=.52; b=.49; c=.25; i=1
seed=73; n=100 ;a=.52; b=.49; c=.25; i=1
seed=75; n=200 ;a=.52; b=.49; c=.25; i=1
seed=77; n=300 ;a=.52; b=.49; c=.25; i=1
seed=78; n=500 ;a=.52; b=.49; c=.25; i=1
seed=79; n=1000;a=.52; b=.49; c=.25; i=1


#model
set.seed(seed)
iterations = 1000
r = 1000

results = matrix(,iterations,4)
for (iiii in 1:iterations){

  data  = data.frame(gen.hurdle(n, a, b, b, c, c, i, i, i))
  data0 = data.frame(gen.hurdle(n, a, 0, 0, c, c, i, i, i))

  p_0     =1-mean(data$z_p)
  p_0_hat =1-mean(data$tstar)
  p_0_b0     =1-mean(data0$z_p)
  p_0_hat_b0 =1-mean(data0$tstar)

  # power

  boot= matrix(,r,8)
  for(jjjj in 1:r){

    #power
    fit1      = lm(m ~ x, data=data)
    fit2      = hurdle(formula = y ~ m + x, data=data, dist = "poisson", zero.dist = "binomial") 

    a_hat     = summary(fit1)$coef[2,1]
    b1_hat    = summary(fit2)[[1]]$count[2,1]
    b2_hat    = summary(fit2)[[1]]$zero[2,1]
    ab1_hat   = prod(a_hat,b1_hat)
    ab2_hat   = prod(a_hat,b2_hat)

    boot.data = data[sample(nrow(data), replace = TRUE), ]
    boot.data$y[1] = if(prod(boot.data$y) > 0) 0 else boot.data$y[1]

    boot.fit1 = lm(m ~ x, data=boot.data)
    boot.fit2 = hurdle(formula = y ~ m + x, data=boot.data, dist = "poisson", zero.dist = "binomial") 

    boot.a    = summary(boot.fit1)$coef[2,1]
    boot.b1   = summary(boot.fit2)[[1]]$count[2,1]
    boot.b2   = summary(boot.fit2)[[1]]$zero[2,1]
    boot.ab1  = prod(boot.a,boot.b1)
    boot.ab2  = prod(boot.a,boot.b2)

    #type I error
    fit3       = lm(m ~ x, data=data0)
    fit4       = hurdle(formula = y ~ m + x, data=data0, dist = "poisson", zero.dist = "binomial")  

    a_hat_b0   = summary(fit3)$coef[2,1]
    b1_hat_b0  = summary(fit4)[[1]]$count[2,1]
    b2_hat_b0  = summary(fit4)[[1]]$zero[2,1]
    ab1_hat_b0 = prod(a_hat_b0,b1_hat_b0)
    ab2_hat_b0 = prod(a_hat_b0,b2_hat_b0)

    boot.data0 = data0[sample(nrow(data0), replace = TRUE), ]
    boot.data0$y[1] = if(prod(boot.data0$y) > 0) 0 else boot.data0$y[1]

    boot.fit3  = lm(m ~ x, data=boot.data0)
    boot.fit4  = hurdle(formula = y ~ m + x, data=boot.data0, dist = "poisson", zero.dist = "binomial")  

    boot.a_b0   = summary(boot.fit3)$coef[2,1]
    boot.b1_b0  = summary(boot.fit4)[[1]]$count[2,1]
    boot.b2_b0  = summary(boot.fit4)[[1]]$zero[2,1]
    boot.ab1_b0 = prod(boot.a_b0,boot.b1_b0)
    boot.ab2_b0 = prod(boot.a_b0,boot.b2_b0)

    boot[jjjj,] = c(ab1_hat,    ab2_hat,    boot.ab1,    boot.ab2,
                    ab1_hat_b0, ab2_hat_b0, boot.ab1_b0, boot.ab2_b0)

  }

  z0.1 = qnorm((sum(boot[,3] > boot[,1])+sum(boot[,3]==boot[,1])/2)/r)
  z0.2 = qnorm((sum(boot[,4] > boot[,2])+sum(boot[,4]==boot[,2])/2)/r)
  z0.1_b0 = qnorm((sum(boot[,7] > boot[,5])+sum(boot[,7]==boot[,5])/2)/r)
  z0.2_b0 = qnorm((sum(boot[,8] > boot[,6])+sum(boot[,8]==boot[,6])/2)/r)

  alpha=0.05 # 95% limits
  z=qnorm(c(alpha/2,1-alpha/2)) # Std. norm. limits

  p1    = pnorm(z-2*z0.1) # bias-correct & convert to proportions
  p2    = pnorm(z-2*z0.2) 
  p1_b0 = pnorm(z-2*z0.1_b0)
  p2_b0 = pnorm(z-2*z0.2_b0) 

  ci1    = quantile(boot[,3],p=p1) # Bias-corrected percentile lims
  ci2    = quantile(boot[,4],p=p2)
  ci1_b0 = quantile(boot[,7],p=p1_b0)
  ci2_b0 = quantile(boot[,8],p=p2_b0)

  sig.ab1 = if(prod(ci1) > 0) 1 else 0
  sig.ab2 = if(prod(ci2) > 0) 1 else 0
  sig.ab1_b0 = if(prod(ci1_b0) > 0) 1 else 0
  sig.ab2_b0 = if(prod(ci2_b0) > 0) 1 else 0



  #results
  results[iiii,] = c(sig.ab1, sig.ab2, sig.ab1_b0, sig.ab2_b0)

  message(paste0(iiii, " / iterations"))
  flush.console()
}

i
n
a
b
iterations
#bootstrap how many
r

#power of ab1
mean(results[,1])
#power of ab2
mean(results[,2])
#type I error of ab1
mean(results[,3])
#type I error of ab2
mean(results[,4])

It would seem to me that the problem with each of the effects being ran separately is coming from naming the results "results" for each loop. I do not know the best way to print all results(for each effect size) without disturbing the loop.

The length is obviously coming from the sheer amount of iterations and lack of RAM to process them fast enough. Is this something that can even been remedied? The time isn't nearly as much of a concern as not getting results for all effect sizes when the program is run.

bias-corrected percentile bootstrap

Answers (1)

Related Questions