# This script runs a sensitivity analysis, where we alter the value of the LKJ eta parameter and extract the posteriors library(readr) library(dplyr) library(purrr) library(tidyr) library(brms) library(tidybayes) library(parallel) n_cores <- parallel::detectCores() # total number of cores n_cores_per_model <- 4 # will be best if n_cores is divisible by this n_parallel_models <- floor(n_cores / n_cores_per_model) # number of models to be fit in parallel at one time set.seed(1) stim_sim <- left_join( read_csv(file.path("stim_sim", "preregistered", "jacc.csv")), read_csv(file.path("stim_sim", "preregistered", "ot.csv")), by = c("char1", "char2") ) |> mutate( rank_jacc = rank(jacc), rank_ot = rank(ot) ) |> rowwise() |> mutate(pair_id = paste(sort(c(char1, char2)), collapse="_")) |> ungroup() rdm_poi <- file.path("rdm_data", "period_of_interest") |> list.files(pattern=".*\\.csv", full.names=TRUE) |> map_df(read_csv, col_types=c("subj_id"="c")) |> left_join(stim_sim, by=c("char1", "char2")) |> group_by(subj_id) |> mutate(rank_eeg_dissim = rank(eeg_dissim)) |> ungroup() # short function for calculating the population SD pop_sd <- function(x) sqrt((length(x)-1)/length(x)) * sd(x) # function to fit the model fit_mod <- function(lkj_prior, sample_prior="no") { m_rho_prior_full <- c( set_prior(sprintf("lkj(%s)", lkj_prior), class="rescor"), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_poi$rank_eeg_dissim))), class="sigma", resp="rankeegdissim", ub=max(rdm_poi$rank_eeg_dissim)), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_poi$rank_jacc))), class="sigma", resp="rankjacc", ub=max(rdm_poi$rank_jacc)), set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_poi$rank_ot))), class="sigma", resp="rankot", ub=max(rdm_poi$rank_ot)) ) m_rho_full_poi <- brm( bf( mvbind(rank_eeg_dissim, rank_ot, rank_jacc) ~ 0 ) + set_rescor(rescor=TRUE), family = brmsfamily("gaussian"), iter = 24000, warmup = 6000, chains = 8, cores = n_cores_per_model, seed = 1, # centre each dimension since we don't model intercept data = mutate(rdm_poi, across(starts_with("rank"), function(x) x - mean(x))), prior = m_rho_prior_full, save_pars = save_pars(all=TRUE), sample_prior = sample_prior, control = list(adapt_delta = 0.99) ) m_rho_full_poi } # function to fit the model and return the median and 89% HDI for Wasserstein and Jaccard for the posteriors sample_and_summarise_posteriors <- function(lkj_prior) { m_rho_full_poi <- fit_mod(lkj_prior, sample_prior="no") # get a vector of response variable names (will match order of correlation matrix) var_names <- colnames(get_y(m_rho_full_poi)) # summarise the posteriors summ <- as_draws_df(m_rho_full_poi, "^Lrescor", regex=TRUE) |> # convert columns into covariance, correlation, and partial correlation matrix for each draw pivot_longer(cols=starts_with("Lrescor"), names_to="idx", values_to="r") |> mutate(idx = sub("Lrescor", "", idx, fixed=TRUE)) |> mutate(idx = gsub("(\\[)|(\\])", "", idx)) |> separate(idx, c("idx1", "idx2"), sep=",") |> mutate(idx1=as.integer(idx1), idx2=as.integer(idx2)) |> group_by(.chain, .iteration, .draw) |> nest() |> # get correlation and partial correlation matrix for each draw mutate( cor_mat = map( data, function(x) { cov_mat <- matrix(0, nrow=max(x$idx1), ncol=max(x$idx2)) cov_mat[cbind(x$idx1, x$idx2)] <- x$r cov_mat[upper.tri(cov_mat, diag=FALSE)] <- t(cov_mat)[upper.tri(cov_mat, diag=FALSE)] colnames(cov_mat) <- var_names rownames(cov_mat) <- var_names cov2cor(cov_mat) } ), pcor_mat = map( cor_mat, function(x) { pcor_mat <- corpcor::cor2pcor(x) colnames(pcor_mat) <- var_names rownames(pcor_mat) <- var_names pcor_mat } ), cor_rankeegdissim_rankjacc = map_dbl(cor_mat, function(x) x["rankeegdissim", "rankjacc"]), cor_rankeegdissim_rankot = map_dbl(cor_mat, function(x) x["rankeegdissim", "rankot"]), pcor_rankeegdissim_rankjacc = map_dbl(pcor_mat, function(x) x["rankeegdissim", "rankjacc"]), pcor_rankeegdissim_rankot = map_dbl(pcor_mat, function(x) x["rankeegdissim", "rankot"]) ) |> pivot_longer(c(starts_with("pcor_rankeegdissim"), starts_with("cor_rankeegdissim")), names_to="cor_lab", values_to="Rho") |> mutate( model = ifelse(grepl("jacc", cor_lab, fixed=TRUE), "Jaccard Distance", "Wasserstein Distance"), is_partial = grepl("^pcor", cor_lab) ) |> dplyr::select(-data, -cor_mat, -pcor_mat) |> group_by(model, is_partial) |> median_hdi(Rho, .width=.89) |> ungroup() |> mutate(eta = lkj_prior) |> select(eta, everything()) summ } # set up a vector of eta values to use as the lkj prior n_models <- 50 lkj_vals <- 1 * 10 ** seq(-3, 3, length.out=n_models) # make a cluster with as many workers as necessary cl <- makeCluster( n_parallel_models ) cl_pkg <- clusterEvalQ(cl, { library(dplyr) library(purrr) library(tidyr) library(brms) library(tidybayes) }) cl_obj <- clusterExport(cl, list( "pop_sd", "rdm_poi", "fit_mod", "n_cores_per_model" )) posterior_res <- parLapply(cl, lkj_vals, sample_and_summarise_posteriors) |> reduce(bind_rows) stopCluster(cl) saveRDS(posterior_res, file.path("estimates", "sensitivity_lkj_prior.rds"))