library(readr) library(dplyr) library(purrr) library(brms) library(tidybayes) library(parallel) # short function for calculating the population SD pop_sd <- function(x) sqrt((length(x)-1)/length(x)) * sd(x) n_cores <- parallel::detectCores(all.tests=FALSE, logical=TRUE) # n_cores <- 13 library(future) plan(multicore, workers=n_cores) set.seed(1) # import neural rdm rdm <- file.path("rdm_data", "time_resolved") |> list.files(pattern=".*\\.csv", full.names=TRUE) |> map_df(read_csv, col_types=c("subj_id"="c")) |> group_by(subj_id, time) |> mutate(rank_eeg_dissim = rank(eeg_dissim)) |> ungroup() |> arrange(time) rdm_poi <- file.path("rdm_data", "period_of_interest") |> list.files(pattern=".*\\.csv", full.names=TRUE) |> map_df(read_csv, col_types=c("subj_id"="c")) |> group_by(subj_id) |> mutate(rank_eeg_dissim = rank(eeg_dissim)) |> ungroup() rdm_p1 <- file.path("rdm_data", "p1_period") |> list.files(pattern=".*\\.csv", full.names=TRUE) |> map_df(read_csv, col_types=c("subj_id"="c")) |> group_by(subj_id) |> mutate(rank_eeg_dissim = rank(eeg_dissim)) |> ungroup() times <- sort(unique(rdm$time)) model_paths <- file.path("stim_sim", "ANNs") |> list.files(include.dirs=TRUE, full.names=TRUE) # first, for each module, get the period-of-interest correlations for (period in c("p1", "poi")) { cor_res_period <- map_df(model_paths, function(path) { mod_lab <- basename(path) layer_paths <- list.files(path, "^.*\\.csv$", full.names=TRUE) map_df(layer_paths, function(layer_path_i) { layer_lab <- tools::file_path_sans_ext(basename(layer_path_i)) message(sprintf("%s %s - %s", period, mod_lab, layer_lab)) # read the file cors <- read_csv(layer_path_i, col_types = cols(char1=col_factor(), char2=col_factor())) |> rename(mod_cor = cor) |> mutate( mod_dissim = 1-mod_cor, rank_mod_dissim = rank(mod_dissim) ) |> select(char1, char2, rank_mod_dissim) # only used variables if (period == "poi") { rdm_cors <- left_join(rdm_poi, cors, by=c("char1", "char2")) } else if (period == "p1") { rdm_cors <- left_join(rdm_p1, cors, by=c("char1", "char2")) } # correlation prior lkj_prior <- 1.5 m_rho_prior_full <- c( set_prior(sprintf("lkj(%s)", lkj_prior), class="rescor"), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_cors$rank_eeg_dissim))), class="sigma", resp="rankeegdissim", ub=max(rdm_cors$rank_eeg_dissim)), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_cors$rank_mod_dissim))), class="sigma", resp="rankmoddissim", ub=max(rdm_cors$rank_mod_dissim)) ) # fit the models in parallel m_rho <- brm( bf( mvbind(rank_eeg_dissim, rank_mod_dissim) ~ 0 ) + set_rescor(rescor=TRUE), family = brmsfamily("gaussian"), iter = 10000, warmup = 5000, chains = 8, cores = n_cores, seed = 1, # centre each dimension since we don't model intercept, then split by time points into list data = mutate(rdm_cors, across(starts_with("rank"), function(x) x - mean(x))), prior = m_rho_prior_full, control = list(adapt_delta = 0.99), silent = TRUE ) m_rho |> as_draws_df("rescor__rankeegdissim__rankmoddissim") |> rename(rho = 1) |> select(-starts_with(".")) |> mutate(model = mod_lab, layer = layer_lab, period = period) }) }) saveRDS(cor_res_period, file.path("estimates", sprintf("ANNs_%s_draws.rds", period))) rm(cor_res_period) gc() } rm(rdm_poi, rdm_p1) gc() # now, for each model, for each module, get the time-resolved correlations cor_res <- map_df(model_paths, function(path) { mod_lab <- basename(path) layer_paths <- list.files(path, "^.*\\.csv$", full.names=TRUE) map_df(layer_paths, function(layer_path_i) { layer_lab <- tools::file_path_sans_ext(basename(layer_path_i)) message(sprintf("%s - %s", mod_lab, layer_lab)) # read the file cors <- read_csv(layer_path_i, col_types = cols(char1=col_factor(), char2=col_factor())) |> rename(mod_cor = cor) |> mutate( mod_dissim = 1-mod_cor, rank_mod_dissim = rank(mod_dissim) ) |> select(char1, char2, rank_mod_dissim) # only used variables rdm_cors <- left_join(rdm, cors, by=c("char1", "char2")) # correlation prior lkj_prior <- 1 m_rho_prior_full <- c( set_prior(sprintf("lkj(%s)", lkj_prior), class="rescor"), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_cors$rank_eeg_dissim))), class="sigma", resp="rankeegdissim", ub=max(rdm_cors$rank_eeg_dissim)), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_cors$rank_mod_dissim))), class="sigma", resp="rankmoddissim", ub=max(rdm_cors$rank_mod_dissim)) ) # fit the models in parallel m_rho <- brm_multiple( bf( mvbind(rank_eeg_dissim, rank_mod_dissim) ~ 0 ) + set_rescor(rescor=TRUE), family = brmsfamily("gaussian"), iter = 10000, warmup = 5000, # chains = 8, # reduced for memory feasibility chains = 4, cores = 1, seed = 1, # centre each dimension since we don't model intercept, then split by time points into list data = rdm_cors |> group_by(time) |> mutate(across(starts_with("rank"), function(x) x - mean(x))) |> ungroup() |> group_split(time), combine = FALSE, prior = m_rho_prior_full, control = list(adapt_delta = 0.99), silent = TRUE, refresh = 0 ) ests <- map_df(1:length(times), function(t) { m_t <- m_rho[[t]] time_t <- times[[t]] m_t |> as_draws_df("rescor__rankeegdissim__rankmoddissim") |> rename(rho = 1) |> # done inside the loop for RAM median_hdi(rho, .width=0.89) |> mutate(time = time_t) }) rm(m_rho) gc() ests |> mutate( model = factor(mod_lab, levels=basename(model_paths)), layer = layer_lab ) |> dplyr::select(model, layer, time, rho, .lower, .upper) |> mutate( time = factor(time, levels=times), model = factor(model, levels=basename(model_paths)) ) }) }) saveRDS(cor_res, file.path("estimates", "ANNs_time_resolved.rds"))