123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287 |
- library(tidyverse)
- library(broom)
- library(patchwork)
- library(grid)
- ggplot2::theme_set(
- theme_classic() +
- theme(
- legend.position=c(0.99, 0.01),
- legend.justification = c(1, 0),
- text=element_text(size=12),
- plot.title = element_text(hjust=0.5),
- legend.direction = "vertical"
- )
- )
- # colours <- c("red", "blue")
- colours <- c("#EA5515", "#15AAEA")
- ci_level <- 0.99
- ci_level_norm_sd <- qnorm(ci_level + (1-ci_level)/2) # how many multiples of sd to get ci_level of normal distribution
- geom_ribbon_ci_level <- 0.99
- # function to take vector of 0s and 1s and get lower and upper bounds of confidence interval in tidy format
- tidy_binom_ci <- function(x, level=0.95, min_obs=1) {
- x <- as.numeric(x)
- if (length(x)>min_obs) {
- prop.test(sum(x), length(x), conf.level=level, correct = TRUE) %>%
- broom::tidy() %>%
- select(conf.low, conf.high)
- } else {
- tibble(conf.low = NA, conf.high = NA)
- }
- }
- # power <- list.files("simulations", full.names = TRUE) %>%
- # map_df(
- # read_csv,
- # col_types = cols(
- # sim_nr = col_factor(),
- # re_corr = col_double(),
- # nS = col_double(),
- # estimate = col_double(),
- # chisq = col_double(),
- # p = col_double(),
- # is_sig = col_logical(),
- # slopes_removed = col_logical()
- # )
- # ) %>%
- # mutate(re_corr = factor(round(re_corr, 1), levels=seq(0, 0.8, 0.2)))
- # write_csv(power, "power.csv")
- # import simulations results
- power <- read_csv("power.csv", col_types = cols(
- sim_nr = col_factor(),
- re_corr = col_double(),
- nS = col_double(),
- estimate = col_double(),
- chisq = col_double(),
- p = col_double(),
- is_sig = col_logical(),
- corrs_removed = col_logical()
- )) %>%
- mutate(re_corr = factor(round(re_corr, 1), levels=seq(0, 0.8, 0.2))) %>%
- group_by(nS, re_corr) %>%
- mutate(iter_id = row_number()) %>%
- ungroup()
- # get one and two-tailed significance results in long format
- power_summ <- power %>%
- mutate(sig_type = "One-Tailed", is_sig = as.numeric(p<0.1 & estimate>0)) %>%
- bind_rows(
- .,
- mutate(., sig_type = "Two-Tailed", is_sig = as.numeric(p<0.05))
- )
- power_summ_recorr_0 <- filter(power_summ, re_corr==0)
- # for the plot looking at random effect correlations, sample a subset of the
- power_summ_all_recorrs <- power %>%
- group_by(nS, re_corr) %>%
- slice_sample(prop=1) %>%
- mutate(sig_type = "One-Tailed", is_sig = as.numeric(p<0.1 & estimate>0)) %>%
- bind_rows(
- .,
- mutate(., sig_type = "Two-Tailed", is_sig = as.numeric(p<0.05))
- )
- # plot power curve
- plt_power_summ <- power_summ_recorr_0 %>%
- group_by(nS, sig_type) %>%
- summarise(
- power = mean(is_sig),
- ci_low = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.low"),
- ci_high = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.high"),
- .groups = "drop"
- ) %>%
- ggplot(aes(nS, power, colour=sig_type, fill=sig_type)) +
- geom_hline(yintercept=0.8, linetype="dashed") +
- geom_ribbon(
- data=power_summ_recorr_0,
- aes(y=is_sig, group=sig_type),
- stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"),
- alpha=0.2, level=geom_ribbon_ci_level
- ) +
- # geom_line(
- # data=power_summ_recorr_0,
- # aes(y=is_sig),
- # stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"),
- # size = 0.25, alpha=1
- # ) +
- geom_point(size=1, position=position_dodge(width=1)) +
- geom_errorbar(aes(ymin=ci_low, ymax=ci_high), width=0, position=position_dodge(width=1), key_glyph="vpath") +
- scale_x_continuous(breaks = seq(0, 100, by = 10)) +
- scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1), expand=c(0,0)) +
- labs(
- x = "Number of Participants",
- y = "Power"
- ) +
- scale_color_manual(name = "Significance Test", values = colours) +
- scale_fill_manual(name = "Significance Test", values = colours) +
- theme(plot.margin=unit(c(0.75,0.75,0.75,0.75), "lines"))
- plt_power_summ
- ggsave("power.png", plt_power_summ, device="png", type="cairo", width=3.5, height=3, dpi=600)
- ggsave("power.pdf", plt_power_summ, device="pdf", width=3.5, height=3)
- # plot power curve
- draw_key_curve <- function(data, params, size, order=2.5) {
- x <- seq(0.1, 0.9, 0.001)
- y <- 1 - rev(
- 0.1 + (x**order - min(x**order)) /
- (1/0.8 * (max(x**order) - min(x**order)))
- )
- grobTree(
- linesGrob(
- x=x,
- y=y
- ),
- gp = gpar(
- col = data$colour %||% "grey20",
- fill = alpha(data$fill %||% "white", data$alpha),
- lwd = (data$linewidth %||% 0.5) * .pt,
- lty = data$linetype %||% 1
- )
- )
- }
- plt_power_summ_ot <- power_summ_recorr_0 |>
- filter(sig_type=="One-Tailed") |>
- group_by(nS) %>%
- summarise(
- power = mean(is_sig),
- ci_low = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.low"),
- ci_high = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.high"),
- .groups = "drop"
- ) %>%
- ggplot(aes(nS, power)) +
- geom_ribbon(
- data=filter(power_summ_recorr_0, sig_type=="One-Tailed"),
- aes(y=is_sig, group=sig_type, fill=sprintf("Loglinear %s%% CI", geom_ribbon_ci_level*100)),
- stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"),
- alpha=1, level=geom_ribbon_ci_level,
- key_glyph = draw_key_curve, colour = "#F09B0F"
- ) +
- geom_hline(yintercept=0.8, linetype="dashed") +
- geom_point(aes(colour="Point Estimates"), size=0.75) +
- geom_errorbar(aes(ymin=ci_low, ymax=ci_high, colour="Point Estimates"), width=0, position=position_dodge(width=1), key_glyph="vpath", linewidth=0.4) +
- scale_x_continuous(breaks = seq(0, 100, by = 20)) +
- scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1), expand=c(0,0)) +
- scale_fill_manual(values = "#F09B0F", guide=guide_legend(override.aes=list(linewidth=1))) +
- scale_colour_manual(values = "#000000") +
- labs(
- x = "N (Participants)",
- y = "Power",
- fill = NULL,
- colour = NULL
- ) +
- theme_classic() +
- theme(
- legend.position = c(1, 0),
- legend.justification = c(1, 0),
- legend.background = element_blank(),
- legend.spacing = unit(0, "pt"),
- legend.key.height = unit(10, "pt"),
- legend.key.width = unit(15, "pt"),
- legend.spacing.y = unit(-4, "pt"),
- legend.spacing.x = unit(2, "pt"),
- legend.key = element_blank()
- )
- ggsave("power_onetailed.png", plt_power_summ_ot, width=2, height=2, units="in", device="png", type="cairo")
- ggsave("power_onetailed.pdf", plt_power_summ_ot, width=2, height=2, units="in")
- plt_power_summ_corr_vals <- power_summ_all_recorrs %>%
- group_by(nS, re_corr, sig_type) %>%
- summarise(
- power = mean(is_sig),
- ci_low = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.low"),
- ci_high = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.high"),
- .groups = "drop"
- ) %>%
- ggplot(aes(nS, power, colour=re_corr, group=re_corr)) +
- geom_hline(yintercept=0.8, linetype="dashed") +
- geom_line(
- data=power_summ_all_recorrs,
- aes(y=is_sig),
- stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"),
- size = 0.5
- ) +
- scale_x_continuous(breaks = seq(0, 100, by = 20)) +
- scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1), expand=c(0,0)) +
- scale_colour_viridis_d() +
- guides(colour = guide_legend(override.aes = list(size = 1.5))) +
- labs(
- x = "Number of Participants",
- y = "Power",
- colour = "Random\nEffects\nCorrelation"
- ) +
- facet_wrap(vars(sig_type)) +
- theme(
- legend.position = "right",
- plot.margin=unit(c(0.75,0.75,0.75,0.75), "lines"),
- strip.background = element_rect(fill = "white")
- )
- plt_power_summ_corr_vals
- ggsave("power_corr_vals.png", plt_power_summ_corr_vals, device="png", type="cairo", width=6.5, height=2.75, dpi=600)
- ggsave("power_corr_vals.pdf", plt_power_summ_corr_vals, device="pdf", width=6.5, height=2.75)
- # fit the same models as plotted for to get predictions and results
- tails <- unique(power_summ$sig_type)
- re_corrs <- unique(power_summ$re_corr)
- power_m <- map(tails, function(tail) {
- map(re_corrs, function(re_corr_i) {
- power_summ %>%
- filter(sig_type==tail & re_corr==re_corr_i) %>%
- mutate(power=as.numeric(is_sig)) %>%
- glm(
- power ~ log(nS),
- family=binomial("logit"),
- data = .
- )
- }) %>%
- set_names(re_corrs)
- }) %>%
- set_names(tails)
- # get number of subjects needed for desired power
- poss_nS <- seq(4, 100, 4) # divisible by 4, so balanced between stimulus groups and response groups
- # confint(power_m[["One-Tailed"]][["0"]], level=ci_level)
- all_logit <- predict(power_m[["One-Tailed"]][["0"]], newdata=tibble(nS = poss_nS), se.fit=TRUE)
- all_logit$se.fit <- all_logit$se.fit * ci_level_norm_sd # make the standard errors into confidence intervals of ci_level
- all_p <- exp(all_logit$fit) / (1+exp(all_logit$fit))
- all_intervals_lower <- exp(all_logit$fit - all_logit$se.fit) / (1+exp(all_logit$fit - all_logit$se.fit))
- all_intervals_upper <- exp(all_logit$fit + all_logit$se.fit) / (1+exp(all_logit$fit + all_logit$se.fit))
- req_power <- 0.8
- dist_targ <- all_intervals_lower-req_power
- req_nS <- poss_nS[dist_targ == min(dist_targ[dist_targ>0])]
- pred_power <- all_p[poss_nS==req_nS]
- pred_ci <- c(all_intervals_lower[poss_nS==req_nS], all_intervals_upper[poss_nS==req_nS])
- # print a summary of the results
- power %>%
- filter(re_corr==0) %>%
- group_by(nS, re_corr) %>%
- summarise(
- n = n(),
- raw_onetailed = mean(p<0.1 & estimate>0),
- raw_twotailed = mean(p<0.05),
- m_estimate = mean(estimate),
- sd_estimate = sd(estimate),
- .groups = "drop"
- ) %>%
- rowwise() %>%
- mutate(
- re_corr = as.character(re_corr),
- logit_onetail = predict(power_m[["One-Tailed"]][[re_corr]], newdata=tibble(nS=nS)),
- power_onetail = exp(logit_onetail) / (1 + exp(logit_onetail)),
- logit_twotail = predict(power_m[["Two-Tailed"]][[re_corr]], newdata=tibble(nS=nS)),
- power_twotail = exp(logit_twotail) / (1 + exp(logit_twotail))
- )
- message(sprintf("We decided that we need %s participants to reach >=%s%% power. Specifically, the model predicted that at this number of subjects, we would have %s%% power (%s%% CI = [%s%%, %s%%]) to detect the predicted effect.\n", req_nS, req_power*100, round(pred_power*100, 3), ci_level*100, round(pred_ci[1]*100, 3), round(pred_ci[2]*100, 3)))
|