07_rsa_anns.R 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. library(readr)
  2. library(dplyr)
  3. library(purrr)
  4. library(brms)
  5. library(tidybayes)
  6. library(parallel)
  7. # short function for calculating the population SD
  8. pop_sd <- function(x) sqrt((length(x)-1)/length(x)) * sd(x)
  9. n_cores <- parallel::detectCores(all.tests=FALSE, logical=TRUE)
  10. # n_cores <- 13
  11. library(future)
  12. plan(multicore, workers=n_cores)
  13. set.seed(1)
  14. # import neural rdm
  15. rdm <- file.path("rdm_data", "time_resolved") |>
  16. list.files(pattern=".*\\.csv", full.names=TRUE) |>
  17. map_df(read_csv, col_types=c("subj_id"="c")) |>
  18. group_by(subj_id, time) |>
  19. mutate(rank_eeg_dissim = rank(eeg_dissim)) |>
  20. ungroup() |>
  21. arrange(time)
  22. rdm_poi <- file.path("rdm_data", "period_of_interest") |>
  23. list.files(pattern=".*\\.csv", full.names=TRUE) |>
  24. map_df(read_csv, col_types=c("subj_id"="c")) |>
  25. group_by(subj_id) |>
  26. mutate(rank_eeg_dissim = rank(eeg_dissim)) |>
  27. ungroup()
  28. rdm_p1 <- file.path("rdm_data", "p1_period") |>
  29. list.files(pattern=".*\\.csv", full.names=TRUE) |>
  30. map_df(read_csv, col_types=c("subj_id"="c")) |>
  31. group_by(subj_id) |>
  32. mutate(rank_eeg_dissim = rank(eeg_dissim)) |>
  33. ungroup()
  34. times <- sort(unique(rdm$time))
  35. model_paths <- file.path("stim_sim", "ANNs") |>
  36. list.files(include.dirs=TRUE, full.names=TRUE)
  37. # first, for each module, get the period-of-interest correlations
  38. for (period in c("p1", "poi")) {
  39. cor_res_period <- map_df(model_paths, function(path) {
  40. mod_lab <- basename(path)
  41. layer_paths <- list.files(path, "^.*\\.csv$", full.names=TRUE)
  42. map_df(layer_paths, function(layer_path_i) {
  43. layer_lab <- tools::file_path_sans_ext(basename(layer_path_i))
  44. message(sprintf("%s %s - %s", period, mod_lab, layer_lab))
  45. # read the file
  46. cors <- read_csv(layer_path_i, col_types = cols(char1=col_factor(), char2=col_factor())) |>
  47. rename(mod_cor = cor) |>
  48. mutate(
  49. mod_dissim = 1-mod_cor,
  50. rank_mod_dissim = rank(mod_dissim)
  51. ) |>
  52. select(char1, char2, rank_mod_dissim) # only used variables
  53. if (period == "poi") {
  54. rdm_cors <- left_join(rdm_poi, cors, by=c("char1", "char2"))
  55. } else if (period == "p1") {
  56. rdm_cors <- left_join(rdm_p1, cors, by=c("char1", "char2"))
  57. }
  58. # correlation prior
  59. lkj_prior <- 1.5
  60. m_rho_prior_full <- c(
  61. set_prior(sprintf("lkj(%s)", lkj_prior), class="rescor"),
  62. set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_cors$rank_eeg_dissim))), class="sigma", resp="rankeegdissim", ub=max(rdm_cors$rank_eeg_dissim)),
  63. set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_cors$rank_mod_dissim))), class="sigma", resp="rankmoddissim", ub=max(rdm_cors$rank_mod_dissim))
  64. )
  65. # fit the models in parallel
  66. m_rho <- brm(
  67. bf(
  68. mvbind(rank_eeg_dissim, rank_mod_dissim) ~ 0
  69. ) +
  70. set_rescor(rescor=TRUE),
  71. family = brmsfamily("gaussian"),
  72. iter = 10000,
  73. warmup = 5000,
  74. chains = 8,
  75. cores = n_cores,
  76. seed = 1,
  77. # centre each dimension since we don't model intercept, then split by time points into list
  78. data = mutate(rdm_cors, across(starts_with("rank"), function(x) x - mean(x))),
  79. prior = m_rho_prior_full,
  80. control = list(adapt_delta = 0.99),
  81. silent = TRUE
  82. )
  83. m_rho |>
  84. as_draws_df("rescor__rankeegdissim__rankmoddissim") |>
  85. rename(rho = 1) |>
  86. select(-starts_with(".")) |>
  87. mutate(model = mod_lab, layer = layer_lab, period = period)
  88. })
  89. })
  90. saveRDS(cor_res_period, file.path("estimates", sprintf("ANNs_%s_draws.rds", period)))
  91. rm(cor_res_period)
  92. gc()
  93. }
  94. rm(rdm_poi, rdm_p1)
  95. gc()
  96. # now, for each model, for each module, get the time-resolved correlations
  97. cor_res <- map_df(model_paths, function(path) {
  98. mod_lab <- basename(path)
  99. layer_paths <- list.files(path, "^.*\\.csv$", full.names=TRUE)
  100. map_df(layer_paths, function(layer_path_i) {
  101. layer_lab <- tools::file_path_sans_ext(basename(layer_path_i))
  102. message(sprintf("%s - %s", mod_lab, layer_lab))
  103. # read the file
  104. cors <- read_csv(layer_path_i, col_types = cols(char1=col_factor(), char2=col_factor())) |>
  105. rename(mod_cor = cor) |>
  106. mutate(
  107. mod_dissim = 1-mod_cor,
  108. rank_mod_dissim = rank(mod_dissim)
  109. ) |>
  110. select(char1, char2, rank_mod_dissim) # only used variables
  111. rdm_cors <- left_join(rdm, cors, by=c("char1", "char2"))
  112. # correlation prior
  113. lkj_prior <- 1
  114. m_rho_prior_full <- c(
  115. set_prior(sprintf("lkj(%s)", lkj_prior), class="rescor"),
  116. set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_cors$rank_eeg_dissim))), class="sigma", resp="rankeegdissim", ub=max(rdm_cors$rank_eeg_dissim)),
  117. set_prior(sprintf("constant(%s)", pop_sd(1:max(rdm_cors$rank_mod_dissim))), class="sigma", resp="rankmoddissim", ub=max(rdm_cors$rank_mod_dissim))
  118. )
  119. # fit the models in parallel
  120. m_rho <- brm_multiple(
  121. bf(
  122. mvbind(rank_eeg_dissim, rank_mod_dissim) ~ 0
  123. ) +
  124. set_rescor(rescor=TRUE),
  125. family = brmsfamily("gaussian"),
  126. iter = 10000,
  127. warmup = 5000,
  128. # chains = 8, # reduced for memory feasibility
  129. chains = 4,
  130. cores = 1,
  131. seed = 1,
  132. # centre each dimension since we don't model intercept, then split by time points into list
  133. data = rdm_cors |>
  134. group_by(time) |>
  135. mutate(across(starts_with("rank"), function(x) x - mean(x))) |>
  136. ungroup() |>
  137. group_split(time),
  138. combine = FALSE,
  139. prior = m_rho_prior_full,
  140. control = list(adapt_delta = 0.99),
  141. silent = TRUE,
  142. refresh = 0
  143. )
  144. ests <- map_df(1:length(times), function(t) {
  145. m_t <- m_rho[[t]]
  146. time_t <- times[[t]]
  147. m_t |>
  148. as_draws_df("rescor__rankeegdissim__rankmoddissim") |>
  149. rename(rho = 1) |>
  150. # done inside the loop for RAM
  151. median_hdi(rho, .width=0.89) |>
  152. mutate(time = time_t)
  153. })
  154. rm(m_rho)
  155. gc()
  156. ests |>
  157. mutate(
  158. model = factor(mod_lab, levels=basename(model_paths)),
  159. layer = layer_lab
  160. ) |>
  161. dplyr::select(model, layer, time, rho, .lower, .upper) |>
  162. mutate(
  163. time = factor(time, levels=times),
  164. model = factor(model, levels=basename(model_paths))
  165. )
  166. })
  167. })
  168. saveRDS(cor_res, file.path("estimates", "ANNs_time_resolved.rds"))