# SETUP ---- n_cores <- 40 library(lme4) library(readr) library(dplyr) library(tibble) library(purrr) library(faux) library(Matrix) library(parallel) library(RhpcBLASctl) ggplot2::theme_set(theme_bw() + theme(legend.position="top")) blas_set_num_threads(1) # set.seed(1) norm01 <- function(x, ...) (x-min(x, ...))/(max(x, ...)-min(x, ...)) # some labels for fixed effects used for ease of reference in various places fe_vars <- c("0", "cong", "pred", "int") # function to convert a tibble or dataframe of correlations to a matrix (see tribbles below for example) cor_tbl2mat <- function(cor_tbl, vars_labs=fe_vars) { # initialise as matrix mat <- matrix( data = rep(NA, length(vars_labs)**2), ncol = length(vars_labs), nrow = length(vars_labs) ) # pull relevant values from dataframe for (i_x in 1:length(vars_labs)) { for (j_x in 1:length(vars_labs)) { mat[i_x, j_x] <- if (i_x==j_x) { 1 } else { cor_tbl %>% filter( i %in% vars_labs[c(i_x, j_x)] & j %in% vars_labs[c(i_x, j_x)] ) %>% pull(cor) %>% unique() } } } # return result mat } stim_csv <- read_csv("stim_tidy_long.csv") %>% mutate( prop_agree = perc_name_agree/100, pred_norm = norm01(prop_agree, na.rm=TRUE), pred_minned = prop_agree - min(prop_agree, na.rm=TRUE) ) # SIMULATION SETUP ---- # simulate the predictor coding, with expected effects # (this is used to extract predicted coefficient) min_pred <- min(stim_csv$pred_norm, na.rm=T) max_pred <- max(stim_csv$pred_norm, na.rm=T) cells <- tribble( ~cong, ~pred, ~uV, -0.5, min_pred, -5, # incongruent, lowest predictability 0.5, min_pred, -5, # congruent, lowest predictability -0.5, max_pred, -5, # incongruent, highest predictability 0.5, max_pred, -4.25 # congruent, highest predictability ) cells_m <- lm(uV ~ cong * pred, data=cells) cells_coefs <- coef(cells_m) %>% round(10) # avoid floating point errors # list which will contain values used in the simulation sim <- list() # set fixed effect parameters (values in uV) sim$B0 <- cells_coefs[["(Intercept)"]] # intercept (typical peak n170 amplitude) sim$Bcong <- cells_coefs[["cong"]] # effect of congruency sim$Bpred <- cells_coefs[["pred"]] # effect of predictability sim$Bint <- cells_coefs[["cong:pred"]] # interaction (effect of interest) # set random intercepts # subject intercepts are informed by prior n170 research # item intercepts are informed by prior n170 research and a behavioural pilot, in which images (the highest level of the nested structure) captured most variance, and words and word-image combinations captured much smaller proportions of variance # channel and subject-channel combination random effects are based on a reanalysis of the EEG data at 230 ms from Gagl (2020) using a maximal random effects structure sim$S0_sd <- 2.5 # by-subject random intercept sd sim$I0_sd <- 0.25 # by-image random intercept sd sim$W0_sd <- 0.25 # by-word random intercept sd # set random slopes, and correlations between random terms # Which random slopes are simulated depends on what the experiment's design allows us to model # subject random slopes sim$Scong_sd <- 0.75 sim$Spred_sd <- 1 sim$Sint_sd <- 1 # image random slopes sim$Icong_sd <- 0.5 # # image random effects correlation # sim$I0_cong_cor <- 0.74 # SD of residuals sim$resid_sd <- 3 # sim$nS <- 50 # number of subjects (commented out as defined as function argument) sim$nI <- 200 # number of images sim$nW <- sim$nI * 2 # number of words # SIMULATE MODEL ---- # function for simulating N subjects' data sim_experiment <- function(nS=50, re_corr=0) { sim$nS <- nS # image random effects correlation (randomised) sim$I0_cong_cor <- re_corr # subject random correlations (randomised) S0_cors <- tribble( ~i, ~j, ~cor, "0", "cong", re_corr, "0", "pred", re_corr, "0", "int", re_corr, "cong", "pred", re_corr, "cong", "int", re_corr, "pred", "int", re_corr ) # initialise as matrix sim$Scor_mat <- cor_tbl2mat(S0_cors) # find nearest positive definite var-covar matrix to that generated from random correlations sim$Scor_mat <- Matrix::nearPD(sim$Scor_mat, corr=TRUE, keepDiag=TRUE, ensureSymmetry=TRUE, maxit=10000) %>% with(mat) # simulate item random effects images <- faux::sim_design( within = list(effect = c( I0 = "By-image random intercepts", Icong = "By-subject interaction slopes" )), n = sim$nI, sd = c(sim$I0_sd, sim$Icong_sd), r = sim$I0_cong_cor, id = "image_id", plot = FALSE ) # simulate subject random effects subjects <- faux::sim_design( within = list(effect = c( S0 = "By-subject random intercepts", Scong = "By-subject congruency slopes", Spred = "By-subject predictability slopes", Sint = "By-subject interaction slopes" )), n = sim$nS, sd = c(sim$S0_sd, sim$Scong_sd, sim$Spred_sd, sim$Sint_sd), r = sim$Scor_mat, id = "subj_id", plot = FALSE ) %>% mutate(stim_set = sample(rep(c(1, 2), length.out=nrow(.)))) # get image-word combinations images_words <- stim_csv %>% mutate( item_id = row_number(), pred = pred_norm, condition = ifelse(condition=="A1", "congruent", "incongruent"), cong = scale(ifelse(condition=="incongruent", 0, 1), scale=FALSE, center=TRUE) ) %>% select(item_id, string, filename, condition, pred, cong) %>% rename(word_id=string, image_id=filename) %>% mutate( image_word_id = paste(image_id, word_id, sep="."), stim_set = sample(rep(c(1, 2), length.out=nrow(.))) ) %>% group_by(image_id) %>% mutate(pred = max(pred, na.rm=TRUE)) %>% ungroup() %>% mutate( I0 = rep(images$I0, each=2), Icong = rep(images$Icong, each=2), W0 = rnorm(sim$nW, 0, sim$W0_sd) ) # collate to simulate trials ---- trials <- crossing( subj_id = subjects$subj_id, image_word_id = images_words$image_word_id ) %>% inner_join(subjects, by="subj_id") %>% inner_join(images_words, by=c("image_word_id", "stim_set")) # joining by stim_set as well as the image_word_id ensures each simulated participant only sees their trials # simulate the outcome ---- sim_data <- trials %>% mutate( resid = rnorm(nrow(.), mean=0, sd=sim$resid_sd), uV = sim$B0 + I0 + W0 + S0 + (sim$Bcong + Icong + Scong) * cong + (sim$Bpred + Spred) * pred + (sim$Bint + Sint) * cong * pred + resid ) %>% select(image_id, word_id, image_word_id, subj_id, condition, cong, pred, uV) # simulate data loss ---- # use full model of trial accuracy to predict whether each trial is responded to correctly, then exclude incorrect responses sim_data_clean <- sim_data %>% # simulate 5% data loss to errors and extreme RTs (based on behavioural pilot) and 5% to technical issues slice_sample(prop = 0.9) %>% # rebalance deviation coding mutate(cong = scale(ifelse(condition=="incongruent", 0, 1), center=TRUE, scale=FALSE)) sim_data_clean } sim_experiment(nS = 12) %>% ggplot(aes(pred, uV, colour=condition)) + geom_point(alpha = 0.1) + geom_smooth(method="lm", formula=y~x) + facet_wrap(vars(subj_id)) sim_experiment(nS = 50) %>% ggplot(aes(pred, uV, colour=condition)) + geom_smooth(method="lm", formula=y~x) sim_experiment(nS = 150) %>% ggplot(aes(pred, uV, colour=condition)) + geom_smooth(method="lm", formula=y~x) sim_results <- function(sim_nr=1, nS=50, re_corr=0, alpha_lvl=0.05, save=FALSE) { # simulate data d <- sim_experiment(nS = nS, re_corr = re_corr) # get the results and record how long it takes t <- system.time({ # fit model m <- lmer( uV ~ cong * pred + (cong * pred | subj_id) + (cong | image_id) + (1 | word_id), REML=FALSE, control = lmerControl(optimizer="bobyqa"), data=d ) # fit model without the effect of interest m_no_int <- update(m, .~. -cong:pred) # if non-convergence, refit without random correlations - a chisquare of 0 (and p of exactly 1) is usually a good indicator if (with(anova(m, m_no_int), Chisq)[[2]]==0) { # fit model m <- lmer( uV ~ cong * pred + (cong * pred || subj_id) + (cong || image_id) + (1 | word_id), REML=FALSE, control = lmerControl(optimizer="bobyqa", optCtrl=list(maxfun=1e8)), data=d ) # fit model without the effect of interest m_no_int <- update(m, ~. -cong:pred) # record that this has occurred corrs_removed <- TRUE } else { corrs_removed <- FALSE } # get the effect of interest int_estimate <- fixef(m)[["cong:pred"]] # model comparison to test hypothesis chisq_int <- anova(m, m_no_int) chisq_value <- with(chisq_int, Chisq)[[2]] p_value <- with(chisq_int, `Pr(>Chisq)`)[[2]] }) # record results res <- tibble( sim_nr = sim_nr, nS = nS, re_corr = re_corr, estimate = int_estimate, chisq = chisq_value, p = p_value, is_sig = p_value% reduce(bind_rows) end <- Sys.time() cat(" - finished in: ") print(end-start) cat(sprintf(" - estimated power of %s\n", round(mean(res$p<0.1 & res$estimate>0), 3))) res }) }) stopCluster(cl) write_csv(power, "power.csv")