Reputation: 136
I was trying to fit a glm from the binomial family with identity link to survey weighted data to calculate the risk difference. I was able to run the svyglm model with starting values, however when I tried to extract asymmetric confidence intervals using the likelihood method I received the above error.
Sample data of the first 4 entries using dput:
> data <- structure(list(binary_outcome= structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
binary_exposure= structure(c(1L, 1L, NA, 1L, 1L, NA, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L, NA, 1L,
NA, NA, NA, NA, 1L, NA, 2L, NA), .Label = c("1", "2"), label = "binary exposure", class = "factor"),
wt = c(191217, 89001, 16578, 188901, 7787, 7787, 3876,
27569, 7208, 7208, 8594, 170173, 3430, 7208, 53034, 53034,
5037, 5037, 786465, 177210, 7849, 9038, 6353, 3837, 89733,
188973, 176366, 188736, 3456, 27478), block = c("a",
"a", "b", "c", "w", "e",
"j", "c", "g", "r", "j",
"r", "v", "f", "g", "d",
"a", "a", "v", "t", "l",
"q", "d", "n",
"m", "b", "g", "k", "m",
"g"), strata_study = c("C R", "K U",
"TN R", "WB R", "K R", "K R",
"TN U", "WB U", "TN U", "TN U",
"O U", "M U", "K U", "TN U",
"WB U", "WB U", "K R", "K R",
"D U", "WB R", "TN U", "TN U",
"TN U", "K U", "D U", "WB R", "C R",
"WB R", "K U", "WB U")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -30L), groups = structure(list(
binary_outcome = structure(1:2, .Label = c("0", "1"), class = "factor"),
.rows = structure(list(1:15, 16:30), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
I ran the following to fit the svyglm model to estimate the risk difference:
> svy_design <- svydesign(ids = ~block, strata = ~strata_study, weights = ~wt, data = data, nest = TRUE, fpc = NULL)
> summary(svyglm(I(binary_outcome==1)~binary_exposure, design=svy_design,family=quasi(link="identity",variance="mu(1-mu)"), start=c(0.1,0.6)))
This gives the following result:
Call:
svyglm(formula = I(binary_outcome == 1) ~ binary_exposure, design = svy_design,
family = quasi(link = "identity", variance = "mu(1-mu)"),
start = c(0.1, 0.1))
Survey design:
svydesign(ids = ~block, strata = ~strata_study, weights = ~wt,
data = data, nest = TRUE, fpc = NULL)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3575 0.2043 1.750 0.1107
binary_exposure2 0.6425 0.2043 3.145 0.0104 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for quasi family taken to be 0.7467369)
Number of Fisher Scoring iterations: 2
However, I would like to generate the confidence intervals using likelihood method so I ran the following:
> confint(svyglm(I(binary_outcome==1)~binary_exposure, design=svy_design,family=quasi(link="identity",variance="mu(1-mu)"), start=c(0.1,0.6)), method="likelihood")
And recevied the following error message:
Error: no valid set of coefficients has been found: please supply starting values
Any help on this would be appreciated. Thank you!
Upvotes: 0
Views: 254
Reputation: 415
Don't know if it answers your question, but using the confint
without the method="likelihood"
parameter gives an output.
Sorry I'm not very knowledgeable about the stats behind this to confirm if the result is the same..
data <- structure(list(binary_outcome= structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("0", "1"), class = "factor"),
binary_exposure= structure(c(1L, 1L, NA, 1L, 1L, NA, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L, NA, 1L,
NA, NA, NA, NA, 1L, NA, 2L, NA), .Label = c("1", "2"), label = "binary exposure", class = "factor"),
wt = c(191217, 89001, 16578, 188901, 7787, 7787, 3876,
27569, 7208, 7208, 8594, 170173, 3430, 7208, 53034, 53034,
5037, 5037, 786465, 177210, 7849, 9038, 6353, 3837, 89733,
188973, 176366, 188736, 3456, 27478), block = c("a",
"a", "b", "c", "w", "e",
"j", "c", "g", "r", "j",
"r", "v", "f", "g", "d",
"a", "a", "v", "t", "l",
"q", "d", "n",
"m", "b", "g", "k", "m",
"g"), strata_study = c("C R", "K U",
"TN R", "WB R", "K R", "K R",
"TN U", "WB U", "TN U", "TN U",
"O U", "M U", "K U", "TN U",
"WB U", "WB U", "K R", "K R",
"D U", "WB R", "TN U", "TN U",
"TN U", "K U", "D U", "WB R", "C R",
"WB R", "K U", "WB U")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -30L), groups = structure(list(
binary_outcome = structure(1:2, .Label = c("0", "1"), class = "factor"),
.rows = structure(list(1:15, 16:30), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
# clean up data : (to prevent error with svyglm)
dat2 <- dplyr::filter(data, !stringr::str_detect(strata_study, 'M U|O U|TN R')) # remove strata with only one incidence
# surveydesign
svy_design <- survey::svydesign(ids = ~block, strata = ~strata_study, weights = ~wt, data = dat2, nest = TRUE, fpc = NULL)
# run glm fit
svyfit <- survey::svyglm(I(binary_outcome==1)~binary_exposure, design=svy_design,family=quasi(link="identity",variance="mu(1-mu)"), start=c(0.1,0.6))
#> Warning: step size truncated: out of bounds
#> Warning: step size truncated: out of bounds
#> Warning: step size truncated: out of bounds
#> Warning: step size truncated: out of bounds
#> Warning: step size truncated: out of bounds
#> Warning: step size truncated: out of bounds
#> Warning: step size truncated: out of bounds
#> Warning: step size truncated: out of bounds
#> Warning: step size truncated: out of bounds
#> Warning: glm.fit: algorithm stopped at boundary value
# summarize
summary(svyfit)
#>
#> Call:
#> svyglm(formula = I(binary_outcome == 1) ~ binary_exposure, design = svy_design,
#> family = quasi(link = "identity", variance = "mu(1-mu)"),
#> start = c(0.1, 0.6))
#>
#> Survey design:
#> survey::svydesign(ids = ~block, strata = ~strata_study, weights = ~wt,
#> data = dat2, nest = TRUE, fpc = NULL)
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.4206 0.2410 1.745 0.112
#> binary_exposure2 0.5794 0.2410 2.404 0.037 *
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> (Dispersion parameter for quasi family taken to be 0.6920658)
#>
#> Number of Fisher Scoring iterations: 10
# confidence interval
confint(svyfit)
#> 2.5 % 97.5 %
#> (Intercept) -0.11633493 0.9575489
#> binary_exposure2 0.04245107 1.1163349
Created on 2023-08-14 with reprex v2.0.2
Upvotes: 0