library(readr) library(dplyr) library(purrr) library(tidyr) library(brms) library(ggplot2) library(ggdist) # library(fda) library(patchwork) library(parallel) n_cores <- parallel::detectCores(all.tests=FALSE, logical=TRUE) # n_cores <- 14 # options(loo.cores = n_cores) set.seed(1) stim_sim <- left_join( read_csv(file.path("stim_sim", "preregistered", "jacc.csv")), read_csv(file.path("stim_sim", "preregistered", "ot.csv")), by = c("char1", "char2") ) |> mutate( rank_jacc = rank(jacc), rank_ot = rank(ot) ) |> rowwise() |> mutate(pair_id = paste(sort(c(char1, char2)), collapse="_")) |> ungroup() cor(stim_sim$rank_ot, stim_sim$rank_jacc) rdm <- file.path("rdm_data", "time_resolved") |> list.files(pattern=".*\\.csv", full.names=TRUE) |> map_df(read_csv, col_types=c("subj_id"="c")) |> left_join(stim_sim, by=c("char1", "char2")) |> group_by(subj_id, time) |> mutate(rank_eeg_dissim = rank(eeg_dissim)) |> ungroup() |> arrange(time) times <- sort(unique(rdm$time)) rdm_poi <- file.path("rdm_data", "period_of_interest") |> list.files(pattern=".*\\.csv", full.names=TRUE) |> map_df(read_csv, col_types=c("subj_id"="c")) |> left_join(stim_sim, by=c("char1", "char2")) |> group_by(subj_id) |> mutate(rank_eeg_dissim = rank(eeg_dissim)) |> ungroup() # fit full model ------------------------------------------- # this is a multivariate model that estimates correlations between all items # partial corr model # quick approximation # ppcor::pcor(dplyr::select(rdm_poi, starts_with("rank"))) # check that cor2pcor works # cor_mat <- cor(dplyr::select(rdm_poi, rank_jacc, rank_ot, rank_eeg_dissim)) # corpcor::cor2pcor(cor_mat) # short function for calculating the population SD pop_sd <- function(x) sqrt((length(x)-1)/length(x)) * sd(x) # check correlation priors lkj_prior <- 1.5 # lkj_prior <- 10000 # r <- rethinking::rlkjcorr(n=1e5, K=3, eta=lkj_prior) # plot(density(r[, 1, 2])) # plot(density(r[, 1, 3])) # plot(density(r[, 1, 2] - r[, 2, 3])) m_rho_prior_full <- c( # biased towards 0 for non-diagonal correlations, because lower correlations are more likely in RSA of EEG data set_prior(sprintf("lkj(%s)", lkj_prior), class="rescor"), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_poi$rank_eeg_dissim))), class="sigma", resp="rankeegdissim", ub=max(rdm_poi$rank_eeg_dissim)), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_poi$rank_jacc))), class="sigma", resp="rankjacc", ub=max(rdm_poi$rank_jacc)), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_poi$rank_ot))), class="sigma", resp="rankot", ub=max(rdm_poi$rank_ot)) ) m_rho_full_poi <- brm( bf( mvbind(rank_eeg_dissim, rank_ot, rank_jacc) ~ 0 ) + set_rescor(rescor=TRUE), family = brmsfamily("gaussian"), iter = 24000, warmup = 6000, chains = 8, cores = n_cores, seed = 1, # centre each dimension since we don't model intercept data = mutate(rdm_poi, across(starts_with("rank"), function(x) x - mean(x))), prior = m_rho_prior_full, save_pars = save_pars(all=TRUE), sample_prior = FALSE, control = list(adapt_delta = 0.99) ) # full_samps <- as_draws_df(m_rho_full_poi, variable="^rescor_.*", regex=TRUE) # get a vector of response variable names (will match order of correlation matrix) var_names <- colnames(get_y(m_rho_full_poi)) # get correlation matrices for each draw cor_samps <- as_draws_df(m_rho_full_poi, "^Lrescor", regex=TRUE) |> # convert columns into covariance, correlation, and partial correlation matrix for each draw pivot_longer(cols=starts_with("Lrescor"), names_to="idx", values_to="r") |> mutate(idx = sub("Lrescor", "", idx, fixed=TRUE)) |> mutate(idx = gsub("(\\[)|(\\])", "", idx)) |> separate(idx, c("idx1", "idx2"), sep=",") |> mutate(idx1=as.integer(idx1), idx2=as.integer(idx2)) |> group_by(.chain, .iteration, .draw) |> nest() |> # get correlation and partial correlation matrix for each draw mutate( cor_mat = map( data, function(x) { cov_mat <- matrix(0, nrow=max(x$idx1), ncol=max(x$idx2)) cov_mat[cbind(x$idx1, x$idx2)] <- x$r cov_mat[upper.tri(cov_mat, diag=FALSE)] <- t(cov_mat)[upper.tri(cov_mat, diag=FALSE)] colnames(cov_mat) <- var_names rownames(cov_mat) <- var_names cov2cor(cov_mat) } ), pcor_mat = map( cor_mat, function(x) { pcor_mat <- corpcor::cor2pcor(x) colnames(pcor_mat) <- var_names rownames(pcor_mat) <- var_names pcor_mat } ), cor_rankeegdissim_rankjacc = map_dbl(cor_mat, function(x) x["rankeegdissim", "rankjacc"]), cor_rankeegdissim_rankot = map_dbl(cor_mat, function(x) x["rankeegdissim", "rankot"]), pcor_rankeegdissim_rankjacc = map_dbl(pcor_mat, function(x) x["rankeegdissim", "rankjacc"]), pcor_rankeegdissim_rankot = map_dbl(pcor_mat, function(x) x["rankeegdissim", "rankot"]) ) # calculate Evidence Ratios for each hypothesis bf_1 <- sum(cor_samps$cor_rankeegdissim_rankot > 0) / sum(cor_samps$cor_rankeegdissim_rankot < 0) bf_2a <- sum(cor_samps$cor_rankeegdissim_rankot > cor_samps$cor_rankeegdissim_rankjacc) / sum(cor_samps$cor_rankeegdissim_rankot <= cor_samps$cor_rankeegdissim_rankjacc) bf_2b <- sum(cor_samps$pcor_rankeegdissim_rankot > cor_samps$pcor_rankeegdissim_rankjacc) / sum(cor_samps$pcor_rankeegdissim_rankot <= cor_samps$pcor_rankeegdissim_rankjacc) # save the partial Rho estimates cor_samps_long <- cor_samps |> pivot_longer(c(starts_with("pcor_rankeegdissim"), starts_with("cor_rankeegdissim")), names_to="cor_lab", values_to="Rho") |> mutate( model = ifelse(grepl("jacc", cor_lab, fixed=TRUE), "Jaccard Distance", "Wasserstein Distance"), is_partial = grepl("^pcor", cor_lab) ) |> dplyr::select(-data, -cor_mat, -pcor_mat) cor_samps_long |> group_by(model, is_partial) |> median_hdi(Rho, .width=.89) saveRDS(cor_samps_long, file.path("estimates", "prereg_cor_samps_long.rds")) # reduce RAM rm(cor_samps, cor_samps_long, m_rho_full_poi, rdm_poi, stim_sim) gc() # fit time-resolved models ----------------------------------------------- # fit models for each time point # Note: fitting models to all subjects' data, where ranked within subject, gives roughly equivalent peak posteriors to averaging over per-subject Rho estimates # fit models in parallel for speed library(future) plan(multicore, workers=n_cores) message("Fitting time-resolved models in parallel") m_rho <- brm_multiple( bf( mvbind(rank_eeg_dissim, rank_ot, rank_jacc) ~ 0 ) + set_rescor(rescor=TRUE), family = brmsfamily("gaussian"), iter = 10000, warmup = 5000, chains = 8, cores = 1, seed = 1, # centre each dimension since we don't model intercept, then split by time points into list data = rdm |> group_by(time) |> mutate(across(starts_with("rank"), function(x) x - mean(x))) |> ungroup() |> group_split(time), combine = FALSE, prior = m_rho_prior_full, control = list(adapt_delta = 0.99), save_pars = save_pars(all=TRUE) ) # get posteriors for each time point tc <- map_df(1:length(m_rho), function(i) { t_i <- times[[i]] m_i <- m_rho[[i]] var_names_i <- colnames(get_y(m_i)) as_draws_df(m_i, "^Lrescor", regex=TRUE) |> # convert columns into covariance, correlation, and partial correlation matrix for each draw pivot_longer(cols=starts_with("Lrescor"), names_to="idx", values_to="r") |> mutate(idx = sub("Lrescor", "", idx, fixed=TRUE)) |> mutate(idx = gsub("(\\[)|(\\])", "", idx)) |> separate(idx, c("idx1", "idx2"), sep=",") |> mutate(idx1=as.integer(idx1), idx2=as.integer(idx2)) |> group_by(.chain, .iteration, .draw) |> nest() |> # get correlation and partial correlation matrix for each draw mutate( cor_mat = map( data, function(x) { cov_mat <- matrix(0, nrow=max(x$idx1), ncol=max(x$idx2)) cov_mat[cbind(x$idx1, x$idx2)] <- x$r cov_mat[upper.tri(cov_mat, diag=FALSE)] <- t(cov_mat)[upper.tri(cov_mat, diag=FALSE)] colnames(cov_mat) <- var_names_i rownames(cov_mat) <- var_names_i cov2cor(cov_mat) } ), pcor_mat = map( cor_mat, function(x) { pcor_mat <- corpcor::cor2pcor(x) colnames(pcor_mat) <- var_names_i rownames(pcor_mat) <- var_names_i pcor_mat } ), cor_rankeegdissim_rankjacc = map_dbl(cor_mat, function(x) x["rankeegdissim", "rankjacc"]), cor_rankeegdissim_rankot = map_dbl(cor_mat, function(x) x["rankeegdissim", "rankot"]), pcor_rankeegdissim_rankjacc = map_dbl(pcor_mat, function(x) x["rankeegdissim", "rankjacc"]), pcor_rankeegdissim_rankot = map_dbl(pcor_mat, function(x) x["rankeegdissim", "rankot"]), time = t_i ) |> dplyr::select(-data, -cor_mat, -pcor_mat) }) saveRDS(tc, file.path("estimates", "planned_timecourse.rds"))