Reputation: 47
I was wondering if there is a way to speed up this function (this will take 20s with those defaults), since normally sim
would be a lot greater, like 10 thousands or 100 thousands
twosamp.fisher <- function(p1 = 0.1,
p2 = 0.05,
alpha = 0.025,
power = 0.9,
sim = 1000,
seed = 123) {
z_c <- qnorm(1 - alpha) + qnorm(power)
flag <- 0
n_int <- round(z_c^2 * (p1 * (1 - p1) + p2 * (1 - p2)) / ((p1 - p2)^2))
set.seed(seed)
p.value <- numeric(sim)
for (n in seq(n_int, 10000, 1)) {
m <- matrix(NA, nrow = sim, ncol = 4)
ka <- rbinom(sim, n, p1)
kb <- rbinom(sim, n, p2)
m[, 1] <- ka
m[, 2] <- n - ka
m[, 3] <- kb
m[, 4] <- n - kb
p.value <- apply(
m,
1,
function(x) {
fisher.test(
matrix(c(x[1], x[2], x[3], x[4]),
nrow = 2,
dimnames = list(
trt = c("Yes", "No"),
ctrl = c("Yes", "No")
)
),
alternative = "greater"
)$p.value
}
)
power.est <- mean(p.value < alpha)
if (power.est >= power) {
flag <- flag + 1
} else if (power.est < power & flag >= 1) {
flag <- 0
}
if ((flag == 10)) {
break
}
}
n <- n - flag + 1
cat("sample needed in each arm: ", n)
}
system.time(twosamp.fisher())
Upvotes: 2
Views: 83
Reputation: 16981
Using Ritchie Sacramento's suggestion, you can replace the apply
call with
phyper(m[,1] - 1, m[,1] + m[,3], m[,2] + m[,4], n, FALSE)
That speeds it up considerably:
system.time(twosamp.fisher())
#> sample needed in each arm: 622
#> user system elapsed
#> 0.05 0.01 0.06
Alternatively, if n
is not very large (e.g., < 5e3) you could calculate the exact power for a given n
by finding the ka
and kb
that would result in the Fisher p-value being less than alpha
. That can then be used to find the value of n
that results in the desired power:
twosamp.fisher2 <- function(p1 = 0.1, p2 = 0.05, alpha = 0.025, power = 0.9) {
fPow <- function(n) {
n <- round(n)
thresh <- 1:(n*2) - qhyper(alpha, 1:(n*2), (n*2 - 1):0, n, FALSE)
sum(dbinom(sequence(thresh, from = 1:(n*2), by = -1L), n, p1)*dbinom(sequence(thresh, from = 0L), n, p2)) - power
}
z_c <- qnorm(1 - alpha) + qnorm(power)
n0 <- n1 <- round(z_c^2 * (p1 * (1 - p1) + p2 * (1 - p2)) / ((p1 - p2)^2))
if (fPow(n0) < 0) {
while (fPow(n1 <- n1*1.2) < 0) {}
} else {
while (fPow(n0 <- n0/1.2) > 0) {}
}
ceiling(uniroot(fPow, c(n0, n1), tol = 1)$root)
}
The performance is better than the original function, but not as fast as estimating through sampling:
system.time(n <- twosamp.fisher2())
#> user system elapsed
#> 1.36 0.02 1.37
n
#> [1] 606
Upvotes: 3