Reputation: 147
Assume that iid random sample X_i\sim Bernoulli(\pi)
for i=1,\dots, 100
and sample size is 100. We want to do a hypothesis test that
H_0: \pi\ge 0.6,\, H_a: \pi<0.6
I want to get a plot of the relation between our true parameter pi
and power. I have got the function power
. But I can only input each true pi=0.50, 0.51, 0.52, 0.53, ..., 0.59
. How to plot a similar figure?
My code is as follows.
n=100
pi0=0.60
##power function
power_fun=function(N,pi,alpha)
{
pvalue=c()
power=c()
rej=c()
for (i in 1:N) {
set.seed(i)
y=rbinom(n,size=1,prob=pi)
pi_hat=mean(y)
z=(pi_hat-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
rej[i]=ifelse(z<qnorm(0.05,mean=0,sd=1),1,0)
pvalue[i]=pnorm(q=z,mean=0,sd=1)
c_value=qnorm(alpha,mean=0,sd=1)
aug_term=(pi-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
power[i]=pnorm(c_value-aug_term,mean=0,sd=1)
}
mean_pvalue=mean(pvalue)
sd_pvalue=sd(pvalue)
mean_power=ifelse(pi<pi0,mean(power),NA)
sd_power=sd(power)
rej_rate=mean(pvalue<alpha)
type1_error=ifelse(pi>=pi0,rej_rate,NA)
type2_error=ifelse(pi<pi0,1-mean_power,NA)
type2_error_emp=ifelse(pi<pi0,1-rej_rate,NA)
mc_se=sqrt(1/N*rej_rate*(1-rej_rate))
df_out=data.frame("pvalue"=mean_pvalue,"pvalue2"=sd_pvalue,"power"=mean_power,"ESE_power"=sd_power,
"rejection rate"=rej_rate,
"Type I error"= type1_error,"type_II_error"=type2_error,"Type_II_error"=type2_error_emp,
"MC_SE"=mc_se)
rownames(df_out)=paste0("N=",N,", pi=",pi, ", alpha=", alpha)
df_out=round(df_out,3)
return(df_out)
}
For each pi
, we can get power.
power_fun(N=1000,pi=0.5,alpha=0.05)
#power=0.643
power_fun(N=1000,pi=0.51,alpha=0.05)
# power=0.566
power_fun(N=1000,pi=0.52,alpha=0.05)
power_fun(N=1000,pi=0.53,alpha=0.05)
power_fun(N=1000,pi=0.54,alpha=0.05)
But this is too complicated. Is there an easy way to get the power of these pi values and plot their graph of pi
and power
?
Upvotes: 0
Views: 98
Reputation: 1709
This is more like a code-review.
I think that you thought that you could vectorise over N
, pi
, and alpha
.
I don't think you can that with your current implementation.
Following @Limey's advice:
#' Power function
#'
#'
#' @param N Total number of replications
#' @param pi
#' @param alpha Significance level
#' @param pi0
#' @param n Size of the replication
#'
#' @return
#'
power_fun <- function(N, pi, alpha, pi0, n) {
pvalue <- vector(mode = "numeric", length = N)
power <- vector(mode = "numeric", length = N)
rej <- vector(mode = "numeric", length = N)
for (i in seq_len(N)) {
# why?
# set.seed(i)
y = rbinom(n, size = 1, prob = pi)
pi_hat = mean(y)
z = (pi_hat - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
rej[i] = ifelse(z < qnorm(0.05, mean = 0, sd = 1), 1, 0)
pvalue[i] = pnorm(q = z, mean = 0, sd = 1)
c_value = qnorm(alpha, mean = 0, sd = 1)
aug_term = (pi - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
power[i] = pnorm(c_value - aug_term, mean = 0, sd = 1)
}
mean_pvalue = mean(pvalue, na.rm = TRUE)
sd_pvalue = sd(pvalue, na.rm = TRUE)
# mean_power = ifelse(pi < pi0, mean(power), NA)
mean_power = mean(power, na.rm = TRUE)
sd_power = sd(power, na.rm = TRUE)
rej_rate = mean(pvalue < alpha, na.rm = TRUE)
type1_error = ifelse(pi >= pi0, rej_rate, NA)
type2_error = ifelse(pi < pi0, 1 - mean_power, NA)
type2_error_emp = ifelse(pi < pi0, 1 - rej_rate, NA)
mc_se = sqrt(1 / N * rej_rate * (1 - rej_rate))
df_out <- data.frame(
N = N,
pi = pi,
alpha = alpha,
pvalue = mean_pvalue,
pvalue2 = sd_pvalue,
power = mean_power,
ESE_power = sd_power,
rejection_rate = rej_rate,
Type_I_error = type1_error,
type_II_error = type2_error,
Type_II_error = type2_error_emp,
MC_SE = mc_se
)
# moved this to above
# rownames(df_out) = paste0("N=", N, ", pi=", pi, ", alpha=", alpha)
# why?
# df_out = round(df_out, 3)
return(df_out)
}
Please read the comments and the changes.
library(tidyverse)
n <- 100
pi0 <- 0.6
# testing the function
power_fun(N=1000,pi=0.5,alpha=0.05, pi0 = pi0, n = n)
#' Plot for all pi going from 0.5 to 1.
seq.default(0, 0.65, length.out = 200) %>%
# seq.default(0.5, 1., length.out = 2) %>%
map_df(function(pi) power_fun(N=1000,pi=pi,alpha=0.05, pi0 = pi0, n = n)) %>%
as_tibble() ->
power_df
# power_df %>% View()
power_df %>% {
ggplot(., aes(pi, power)) +
geom_line() +
# geom_point() +
geom_vline(xintercept = pi0, linetype = "dashed") +
NULL
}
Upvotes: 1