# This script takes the sample-level data from analyse_02, analyse_11, and analyse_12, fits linear mixed-effects models, and saves the fixed effects library(lme4) library(dplyr) library(purrr) library(readr) library(parallel) n_cores <- parallel::detectCores(all.tests=FALSE, logical=TRUE) - 2 # n_cores <- 7 # function to normalise between 0 and 1 norm01 <- function(x, ...) (x-min(x, ...))/(max(x, ...)-min(x, ...)) # get the stimuli's percentage of name agreement values stim <- read_csv("boss.csv", col_types = cols(perc_name_agree_denom_fq_inputs = col_number())) %>% select(filename, perc_name_agree_denom_fq_inputs) %>% rename(perc_name_agree = perc_name_agree_denom_fq_inputs) # import the max electrode data from the preprocessing, and set up the variables for the model get_sample_data <- function(path="sample_data", ch_i = NA) { list.files(path, pattern=".+\\.csv$", full.names=TRUE) %>% map_dfr(function(f) { message(sprintf(" - importing %s", f)) x <- read_csv( f, col_types=cols( subj_id = col_character(), stim_grp = col_integer(), resp_grp = col_integer(), item_nr = col_integer(), ch_name = col_character(), time = col_double(), uV = col_double(), condition = col_character(), image = col_character(), string = col_character(), .default = col_double() ), progress = FALSE ) %>% select(-stim_grp, -resp_grp, -item_nr) if (all(!is.na(ch_i))) x <- filter(x, ch_name %in% ch_i) x }) %>% left_join(stim, by=c("image" = "filename")) |> mutate( prop_agree = perc_name_agree/100, pred_norm = norm01(prop_agree, na.rm=TRUE), # as factors for size efficiency ch_name = factor(ch_name), image = factor(image), string = factor(string), time = factor(time), subj_id = factor(subj_id) ) |> select( time, condition, pred_norm, subj_id, ch_name, image, string, uV ) } # get models for each timepoint, for each electrode, and extract fixed effects, for each time locked event paths <- c("sample_data", "sample_data_picture", "sample_data_response") for (p in paths) { message(sprintf("Modelling time points and channels individually for %s", p)) # get list of data frames for each time point d_list <- get_sample_data(path=p) |> mutate(cong_dev = as.numeric(scale(ifelse(condition=="A2", 0, 1), center=TRUE, scale=FALSE))) |> group_split(time, ch_name) gc_out <- gc() message(sprintf(" - fitting models on %g cores", n_cores)) # fit models in parallel cl <- makeCluster(n_cores) cl_packages <- clusterEvalQ(cl, { library(dplyr) library(lme4) }) fe_res <- parLapply(cl, d_list, function(d_i) { # m <- lme4::lmer( # uV ~ cong_dev * pred_norm + # (cong_dev * pred_norm | subj_id) + # (cong_dev | image) + # (1 | string), # REML=FALSE, # control = lmerControl(optimizer="bobyqa"), # data=d_i # ) m <- lme4::lmer( uV ~ cong_dev * pred_norm + (1 | subj_id) + (1 | image) + (1 | string), REML=FALSE, control = lmerControl(optimizer="bobyqa"), data=d_i ) m |> summary() |> with(coefficients) |> as_tibble(rownames = "fe") |> mutate( time = unique(d_i$time), ch_name = unique(d_i$ch_name) ) }) |> reduce(bind_rows) stopCluster(cl) fe_res_tidy <- fe_res |> mutate( fe_lab = factor(recode( fe, `(Intercept)` = "Intercept", cong_dev = "Congruency", pred_norm = "Predictability", `cong_dev:pred_norm` = "Congruency * Predictability" ), levels = c("Intercept", "Congruency", "Predictability", "Congruency * Predictability")), fe_lab_newline = factor(fe_lab, labels = c("Intercept", "Congruency", "Predictability", "Congruency\n* Predictability")), bound_lower = Estimate - 1.96 * `Std. Error`, bound_upper = Estimate + 1.96 * `Std. Error` ) p_short <- sub("_data", "", p, fixed=TRUE) write_csv(fe_res_tidy, sprintf("analyse_13_%s_res.csv", p_short)) rm("d_list") gc_out <- gc() }