library(tidyverse) library(broom) library(patchwork) library(grid) ggplot2::theme_set( theme_classic() + theme( legend.position=c(0.99, 0.01), legend.justification = c(1, 0), text=element_text(size=12), plot.title = element_text(hjust=0.5), legend.direction = "vertical" ) ) # colours <- c("red", "blue") colours <- c("#EA5515", "#15AAEA") ci_level <- 0.99 ci_level_norm_sd <- qnorm(ci_level + (1-ci_level)/2) # how many multiples of sd to get ci_level of normal distribution geom_ribbon_ci_level <- 0.99 # function to take vector of 0s and 1s and get lower and upper bounds of confidence interval in tidy format tidy_binom_ci <- function(x, level=0.95, min_obs=1) { x <- as.numeric(x) if (length(x)>min_obs) { prop.test(sum(x), length(x), conf.level=level, correct = TRUE) %>% broom::tidy() %>% select(conf.low, conf.high) } else { tibble(conf.low = NA, conf.high = NA) } } # power <- list.files("simulations", full.names = TRUE) %>% # map_df( # read_csv, # col_types = cols( # sim_nr = col_factor(), # re_corr = col_double(), # nS = col_double(), # estimate = col_double(), # chisq = col_double(), # p = col_double(), # is_sig = col_logical(), # slopes_removed = col_logical() # ) # ) %>% # mutate(re_corr = factor(round(re_corr, 1), levels=seq(0, 0.8, 0.2))) # write_csv(power, "power.csv") # import simulations results power <- read_csv("power.csv", col_types = cols( sim_nr = col_factor(), re_corr = col_double(), nS = col_double(), estimate = col_double(), chisq = col_double(), p = col_double(), is_sig = col_logical(), corrs_removed = col_logical() )) %>% mutate(re_corr = factor(round(re_corr, 1), levels=seq(0, 0.8, 0.2))) %>% group_by(nS, re_corr) %>% mutate(iter_id = row_number()) %>% ungroup() # get one and two-tailed significance results in long format power_summ <- power %>% mutate(sig_type = "One-Tailed", is_sig = as.numeric(p<0.1 & estimate>0)) %>% bind_rows( ., mutate(., sig_type = "Two-Tailed", is_sig = as.numeric(p<0.05)) ) power_summ_recorr_0 <- filter(power_summ, re_corr==0) # for the plot looking at random effect correlations, sample a subset of the power_summ_all_recorrs <- power %>% group_by(nS, re_corr) %>% slice_sample(prop=1) %>% mutate(sig_type = "One-Tailed", is_sig = as.numeric(p<0.1 & estimate>0)) %>% bind_rows( ., mutate(., sig_type = "Two-Tailed", is_sig = as.numeric(p<0.05)) ) # plot power curve plt_power_summ <- power_summ_recorr_0 %>% group_by(nS, sig_type) %>% summarise( power = mean(is_sig), ci_low = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.low"), ci_high = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.high"), .groups = "drop" ) %>% ggplot(aes(nS, power, colour=sig_type, fill=sig_type)) + geom_hline(yintercept=0.8, linetype="dashed") + geom_ribbon( data=power_summ_recorr_0, aes(y=is_sig, group=sig_type), stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"), alpha=0.2, level=geom_ribbon_ci_level ) + # geom_line( # data=power_summ_recorr_0, # aes(y=is_sig), # stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"), # size = 0.25, alpha=1 # ) + geom_point(size=1, position=position_dodge(width=1)) + geom_errorbar(aes(ymin=ci_low, ymax=ci_high), width=0, position=position_dodge(width=1), key_glyph="vpath") + scale_x_continuous(breaks = seq(0, 100, by = 10)) + scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1), expand=c(0,0)) + labs( x = "Number of Participants", y = "Power" ) + scale_color_manual(name = "Significance Test", values = colours) + scale_fill_manual(name = "Significance Test", values = colours) + theme(plot.margin=unit(c(0.75,0.75,0.75,0.75), "lines")) plt_power_summ ggsave("power.png", plt_power_summ, device="png", type="cairo", width=3.5, height=3, dpi=600) ggsave("power.pdf", plt_power_summ, device="pdf", width=3.5, height=3) # plot power curve draw_key_curve <- function(data, params, size, order=2.5) { x <- seq(0.1, 0.9, 0.001) y <- 1 - rev( 0.1 + (x**order - min(x**order)) / (1/0.8 * (max(x**order) - min(x**order))) ) grobTree( linesGrob( x=x, y=y ), gp = gpar( col = data$colour %||% "grey20", fill = alpha(data$fill %||% "white", data$alpha), lwd = (data$linewidth %||% 0.5) * .pt, lty = data$linetype %||% 1 ) ) } plt_power_summ_ot <- power_summ_recorr_0 |> filter(sig_type=="One-Tailed") |> group_by(nS) %>% summarise( power = mean(is_sig), ci_low = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.low"), ci_high = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.high"), .groups = "drop" ) %>% ggplot(aes(nS, power)) + geom_ribbon( data=filter(power_summ_recorr_0, sig_type=="One-Tailed"), aes(y=is_sig, group=sig_type, fill=sprintf("Loglinear %s%% CI", geom_ribbon_ci_level*100)), stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"), alpha=1, level=geom_ribbon_ci_level, key_glyph = draw_key_curve, colour = "#F09B0F" ) + geom_hline(yintercept=0.8, linetype="dashed") + geom_point(aes(colour="Point Estimates"), size=0.75) + geom_errorbar(aes(ymin=ci_low, ymax=ci_high, colour="Point Estimates"), width=0, position=position_dodge(width=1), key_glyph="vpath", linewidth=0.4) + scale_x_continuous(breaks = seq(0, 100, by = 20)) + scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1), expand=c(0,0)) + scale_fill_manual(values = "#F09B0F", guide=guide_legend(override.aes=list(linewidth=1))) + scale_colour_manual(values = "#000000") + labs( x = "N (Participants)", y = "Power", fill = NULL, colour = NULL ) + theme_classic() + theme( legend.position = c(1, 0), legend.justification = c(1, 0), legend.background = element_blank(), legend.spacing = unit(0, "pt"), legend.key.height = unit(10, "pt"), legend.key.width = unit(15, "pt"), legend.spacing.y = unit(-4, "pt"), legend.spacing.x = unit(2, "pt"), legend.key = element_blank() ) ggsave("power_onetailed.png", plt_power_summ_ot, width=2, height=2, units="in", device="png", type="cairo") ggsave("power_onetailed.pdf", plt_power_summ_ot, width=2, height=2, units="in") plt_power_summ_corr_vals <- power_summ_all_recorrs %>% group_by(nS, re_corr, sig_type) %>% summarise( power = mean(is_sig), ci_low = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.low"), ci_high = pull(tidy_binom_ci(is_sig, level=ci_level), "conf.high"), .groups = "drop" ) %>% ggplot(aes(nS, power, colour=re_corr, group=re_corr)) + geom_hline(yintercept=0.8, linetype="dashed") + geom_line( data=power_summ_all_recorrs, aes(y=is_sig), stat="smooth", method="glm", formula = y ~ log(x), method.args=list(family="binomial"), size = 0.5 ) + scale_x_continuous(breaks = seq(0, 100, by = 20)) + scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1), expand=c(0,0)) + scale_colour_viridis_d() + guides(colour = guide_legend(override.aes = list(size = 1.5))) + labs( x = "Number of Participants", y = "Power", colour = "Random\nEffects\nCorrelation" ) + facet_wrap(vars(sig_type)) + theme( legend.position = "right", plot.margin=unit(c(0.75,0.75,0.75,0.75), "lines"), strip.background = element_rect(fill = "white") ) plt_power_summ_corr_vals ggsave("power_corr_vals.png", plt_power_summ_corr_vals, device="png", type="cairo", width=6.5, height=2.75, dpi=600) ggsave("power_corr_vals.pdf", plt_power_summ_corr_vals, device="pdf", width=6.5, height=2.75) # fit the same models as plotted for to get predictions and results tails <- unique(power_summ$sig_type) re_corrs <- unique(power_summ$re_corr) power_m <- map(tails, function(tail) { map(re_corrs, function(re_corr_i) { power_summ %>% filter(sig_type==tail & re_corr==re_corr_i) %>% mutate(power=as.numeric(is_sig)) %>% glm( power ~ log(nS), family=binomial("logit"), data = . ) }) %>% set_names(re_corrs) }) %>% set_names(tails) # get number of subjects needed for desired power poss_nS <- seq(4, 100, 4) # divisible by 4, so balanced between stimulus groups and response groups # confint(power_m[["One-Tailed"]][["0"]], level=ci_level) all_logit <- predict(power_m[["One-Tailed"]][["0"]], newdata=tibble(nS = poss_nS), se.fit=TRUE) all_logit$se.fit <- all_logit$se.fit * ci_level_norm_sd # make the standard errors into confidence intervals of ci_level all_p <- exp(all_logit$fit) / (1+exp(all_logit$fit)) all_intervals_lower <- exp(all_logit$fit - all_logit$se.fit) / (1+exp(all_logit$fit - all_logit$se.fit)) all_intervals_upper <- exp(all_logit$fit + all_logit$se.fit) / (1+exp(all_logit$fit + all_logit$se.fit)) req_power <- 0.8 dist_targ <- all_intervals_lower-req_power req_nS <- poss_nS[dist_targ == min(dist_targ[dist_targ>0])] pred_power <- all_p[poss_nS==req_nS] pred_ci <- c(all_intervals_lower[poss_nS==req_nS], all_intervals_upper[poss_nS==req_nS]) # print a summary of the results power %>% filter(re_corr==0) %>% group_by(nS, re_corr) %>% summarise( n = n(), raw_onetailed = mean(p<0.1 & estimate>0), raw_twotailed = mean(p<0.05), m_estimate = mean(estimate), sd_estimate = sd(estimate), .groups = "drop" ) %>% rowwise() %>% mutate( re_corr = as.character(re_corr), logit_onetail = predict(power_m[["One-Tailed"]][[re_corr]], newdata=tibble(nS=nS)), power_onetail = exp(logit_onetail) / (1 + exp(logit_onetail)), logit_twotail = predict(power_m[["Two-Tailed"]][[re_corr]], newdata=tibble(nS=nS)), power_twotail = exp(logit_twotail) / (1 + exp(logit_twotail)) ) message(sprintf("We decided that we need %s participants to reach >=%s%% power. Specifically, the model predicted that at this number of subjects, we would have %s%% power (%s%% CI = [%s%%, %s%%]) to detect the predicted effect.\n", req_nS, req_power*100, round(pred_power*100, 3), ci_level*100, round(pred_ci[1]*100, 3), round(pred_ci[2]*100, 3)))