analysis-notebook.Rmd 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675
  1. ---
  2. title: "Data Analysis of Predictive coding in ASD"
  3. author: "Z. Shi, L. Theisinger, F. Allenmark, R. Pistorius, H. Müller, C. Falter-Wagner"
  4. output: html_notebook
  5. ---
  6. # Data Structure
  7. 1. `/experiments`: Experimental codes and instructions
  8. This sub-folder contains Matlab codes and instructions for the duration reproduction task. The sequences of the duration reproductions are stored in the sub-folder `/experiments/seqs`. Those sequences were used for matched participants.
  9. 2. `/data`: raw data files
  10. - `rawdata.csv`: Raw reproduction trials
  11. - `outliers.csv`: those produced almost flat reproduction
  12. 3. `/figures`: store figures used in the paper.
  13. # Data Analysis
  14. ## 1. load raw data
  15. ```{r packages, message=FALSE, warning=FALSE, include=FALSE}
  16. # load packages
  17. library(tidyverse)
  18. library(ez)
  19. library(cowplot)
  20. library(BayesFactor) # in case need Bayes factor analysis
  21. library(bayestestR)
  22. library(rstatix) # using tidyverse friendly statistics
  23. library(GGally)
  24. # ---- read data and preparation -----
  25. rawdata = read_csv('./data/rawdata.csv')
  26. rawdata$group = toupper(rawdata$group)
  27. # flag
  28. saveFig = FALSE
  29. ```
  30. Show the raw trial structure:
  31. ```{r raw trial structure}
  32. glimpse(rawdata)
  33. ```
  34. In the raw trials, there are several important columns which are relevant for further analyses.
  35. - `Duration`: The test durations generated by computer. 'pdur' is the actual presented durations by the computer. There were some fluctuations, but within 5 ms (within 1 refresh frame).
  36. - `sub`, `group`, `sequence` are anonymized subject number, group and the duration sequence used in the experiment.
  37. - `Reproduction`, `rep_err`: the reproduced durations and the reproduction errors compared to the given duration.
  38. - `itd`, `preDuration`: the inter-trial difference in a given trial, and the duration used in the previous trial. These were used in the sequential effect analysis.
  39. ### Participants
  40. ```{r participants}
  41. # Total participants
  42. print(length (unique(rawdata$sub)))
  43. # total sequences (pairs)
  44. print(length(unique(rawdata$sequence)))
  45. ```
  46. ## The sampled durations and sequences
  47. Let's first illustrate the sequence and the distribution of the sample durations.
  48. ```{r sequence}
  49. # ---- illustrate one sequence -----
  50. fig11 = ggplot(rawdata, aes(Duration)) + geom_histogram(binwidth = 0.1, fill = I("white"), col = I('black')) +
  51. theme_classic() + xlab('Duration (secs)')
  52. # a typical sequence
  53. sub1 = rawdata %>% filter(sub == 'ara27')
  54. fig12 = ggplot(sub1, aes(trlNo, Duration, color = Volatility)) + geom_line() +
  55. xlab("Trial Sequence") + theme_classic()+
  56. theme(legend.position = 'top')
  57. # histogram of the typical sequence
  58. fig13 = ggplot(sub1, aes(y=Duration, color = Volatility)) + geom_density() + theme_classic() + theme(legend.position = 'top')
  59. fig13
  60. fig1 = plot_grid(fig12,fig13, rel_widths =c(3,1))
  61. fig1
  62. if (saveFig){
  63. ggsave("figures/fig_sequence.png", fig1, width=4, height=3.5)
  64. ggsave("figures/fig_sequence.pdf", fig1, width=4, height=3.5)
  65. }
  66. ```
  67. ## Explorative data analysis and outlier detection
  68. We first estimate two key signatures - the central tendency index (ci) and the sequential dependence index (si).
  69. ```{r linear_models}
  70. # using error for regression, -slope is the central tendency index.
  71. ct_model <- function(df){
  72. lm(rep_err ~ Duration, data = df)
  73. }
  74. # sequential effect using the duration from the previous trial
  75. seq_model <- function(df){
  76. lm(rep_err ~ preDuration, data = df)
  77. }
  78. # exclude extreme trials (beyond [1/3D, 3D])
  79. vdata = rawdata %>%
  80. filter(Reproduction > Duration/3, Reproduction < 3*Duration)
  81. # calculate slope for the central tendency as well as the sequential dependence.
  82. slopes_ct <- vdata %>% group_by(sub, sequence, Volatility, group, Order) %>%
  83. nest() %>% # nested data
  84. mutate(model = map(data, ct_model)) %>% # linear regression
  85. mutate(slope = map(model, broom::tidy)) %>% # get estimates out
  86. unnest(slope, .drop = TRUE) %>% # remove raw data
  87. select(-std.error,-statistic, -p.value) %>% # remove unnecessary columns
  88. spread(term, estimate) %>% # spread estimates
  89. rename(intercept_ct = `(Intercept)`, slope = Duration) %>%
  90. select(-data, -model)
  91. #sequential effect
  92. slopes_seq <- vdata %>% filter(!is.na(preDuration)) %>%
  93. group_by(sub, sequence, Volatility, group, Order) %>%
  94. nest() %>% # nested data
  95. mutate(model = map(data, seq_model)) %>% # linear regression
  96. mutate(slope = map(model, broom::tidy)) %>% # get estimates out
  97. unnest(slope, .drop = TRUE) %>% # remove raw data
  98. select(-std.error,-statistic, -p.value) %>% # remove unnecessary columns
  99. spread(term, estimate) %>% # spread estimates
  100. rename(intercept_seq = `(Intercept)`, seq_slope = preDuration) %>%
  101. select(-data, -model)
  102. # merge two tables together
  103. slopes_all = left_join(slopes_ct, slopes_seq,
  104. by = c("sub", "sequence", "Volatility","group","Order")) %>%
  105. mutate(ci = -slope, si = seq_slope) #central tendency index
  106. # estimated general biases (over-/under-estimates)
  107. # individual mean interval (middle point): most of them were around 1 by design
  108. mInterval = vdata %>% group_by(sub) %>% summarise(mDur = mean(Duration))
  109. # join the mean interval, and estimate the general bias
  110. slopes = slopes_all %>% left_join(., mInterval, by = c('sub')) %>%
  111. mutate(gBias = (intercept_ct + slope * mDur)*1000)
  112. # change factor order for plotting
  113. slopes$Volatility = factor(slopes$Volatility)
  114. slopes$Order = factor(slopes$Order)
  115. slopes$group = factor(slopes$group)
  116. head(slopes)
  117. ```
  118. Let's plot the histograms of the central tendency index and the sequential dependence index, which show some outliers. We visualize the outliers using 3-sigma rule. The 3-sigma rule only rule out 0.3% of the population if we assume the population is normal.
  119. ```{r}
  120. ci_3sig = mean(slopes$ci) + c(-1,1)*3*sd(slopes$ci)
  121. si_3sig = mean(slopes$si) + c(-1,1)*3*sd(slopes$si)
  122. hist1 = ggplot(slopes, aes(x = ci, y = ..density..)) +
  123. geom_histogram(colour = 1, fill = 'white', bins = 20) +
  124. geom_vline(xintercept = ci_3sig, linetype = 'dashed', color = 'red') +
  125. theme_classic() + xlab('Central Tendency Index')
  126. hist2 = ggplot(slopes, aes(x = si, y = ..density..)) +
  127. geom_histogram(colour = 1, fill = 'white', bins = 20) +
  128. geom_vline(xintercept = ci_3sig, linetype = 'dashed', color = 'red') +
  129. theme_classic() + xlab('Sequential Dependence Index')
  130. hist_fig = plot_grid(hist1,hist2, nrow = 2)
  131. hist_fig
  132. if (saveFig){
  133. ggsave("figures/hist_fig.png", hist_fig, width=4, height=3.5)
  134. ggsave("figures/hist_fig.pdf", fig1, width=4, height=3.5)
  135. }
  136. ```
  137. Note, the above outlier detection method is agnostic to the groups and conditions!
  138. Let's find out those outliers and exclude by sequences for further analyses (given that ASD and TD groups were paired).
  139. ```{r}
  140. slopes %>% ungroup() %>% filter(ci > ci_3sig[2] | ci < ci_3sig[1] | si > si_3sig[2] | si < si_3sig[1]) %>% select(sequence) -> outlier_seq
  141. outlier_seq$sequence
  142. ```
  143. Now let's visualize the outliers together with their matched pairs.
  144. ```{r outliers}
  145. # ---- plot outliers ----
  146. fig_outlier = rawdata %>% ungroup() %>% filter(sequence %in% outlier_seq$sequence) %>%
  147. filter(Reproduction > Duration/3, Reproduction < 3*Duration) %>%
  148. group_by(sub, group,sequence, Volatility, Duration) %>%
  149. summarise(mRep = mean(Reproduction), sdr=sd(Reproduction), n = n(),
  150. se = sd(Reproduction)/sqrt(n-1)) %>% filter(n>5) %>% #approx. for linear regression without averaging first
  151. ggplot(aes(Duration, mRep, color = group,
  152. group = interaction(group, Volatility),
  153. shape = Volatility, linetype = Volatility)) +
  154. geom_point() + geom_smooth(method = 'lm', se = FALSE) +
  155. facet_wrap(~sequence, ncol = 4) +
  156. theme_classic() + theme(legend.pos = 'bottom') +
  157. geom_abline(slope = 1, linetype = 3) +
  158. xlab('Duration (Secs)') + ylab(' Reproduction (Secs)') +
  159. theme(strip.background = element_blank(), strip.text.x = element_blank())
  160. fig_outlier
  161. if (saveFig){
  162. ggsave("figures/fig_outlier.png", fig_outlier, width=5, height=5)
  163. ggsave("figures/fig_outlier.pdf", fig_outlier, width=5, height=5)
  164. }
  165. ```
  166. Mean slopes and related statistics:
  167. ```{r}
  168. slopes %>% filter(sequence %in% outlier_seq$sequence) %>% arrange(group, Volatility)
  169. vslopes = slopes %>% filter(!(sequence %in% outlier_seq$sequence))
  170. # outliers
  171. oslopes = slopes %>% filter(sequence %in% outlier_seq$sequence)
  172. # ANOVA
  173. ezANOVA(data = oslopes, dv = ci,
  174. wid = sub,
  175. within = Volatility,
  176. between = group)
  177. ```
  178. ### Outliers and sequential dependence
  179. Visualize the strong sequential dependence.
  180. ```{r}
  181. fig_sdep31 = rawdata %>% filter(sequence %in% c(31) ) %>% # the extreme sequential dependence pair
  182. mutate_at("preDuration", round, 1) %>%
  183. group_by(sequence, group, preDuration) %>%
  184. summarise(rep_err = mean(rep_err)) %>%
  185. ggplot(aes(preDuration, rep_err, color = group) ) +
  186. geom_point() + geom_smooth(method = 'lm') + theme_classic() +
  187. geom_abline(slope = 0, linetype = 2) +
  188. # facet_wrap(~sequence) +
  189. xlab('Duration trial n-1') + ylab('Repr. Error (Secs)') +
  190. theme(strip.background = element_blank(), strip.text.x = element_blank(), legend.position = 'bottom')
  191. fig_sdep31
  192. fig_o = plot_grid(fig_outlier, fig_sdep31, nrow = 1, labels = c('a','b'), rel_widths = c(2.4,1))
  193. if (saveFig){
  194. ggsave("figures/fig_o.png", fig_o, width=7, height=3)
  195. ggsave("figures/fig_o.pdf", fig_o, width=7, height=3)
  196. }
  197. ```
  198. ## Typical reproduction performance
  199. And here are the two typical participants produced errors from sequence 12:
  200. ```{r typical participants}
  201. # ---- reproduction figure - an example -----
  202. # mean analysis
  203. mrep = rawdata %>% ungroup() %>% filter(!(sequence %in% outlier_seq$sequence)) %>%
  204. filter(Reproduction > Duration/3, Reproduction < 3*Duration) %>%
  205. group_by(sub, group,Order, sequence, Volatility, Duration) %>%
  206. summarise(mRep = mean(Reproduction), sdr=sd(Reproduction), n = n(),
  207. se = sd(Reproduction)/sqrt(n-1)) %>% filter(n>3)
  208. # individual examples
  209. # asd
  210. fig_asd = mrep%>%
  211. filter( sequence == '11', group == 'ASD') %>%
  212. ggplot(aes(Duration, mRep, color = Volatility, group = Volatility, shape = Volatility)) +
  213. geom_point(size = 2) +
  214. #geom_line(aes(linetype = Volatility)) +
  215. geom_smooth(method = 'lm', aes(fill = Volatility), se = FALSE) +
  216. geom_errorbar(aes(ymin = mRep - se, ymax = mRep +se), width = 0.05) +
  217. theme_classic() + theme(strip.background = element_blank()) +
  218. geom_abline(slope = 1, linetype = 2) +
  219. theme(legend.position = 'none', plot.margin = unit(c(0,0,0,0),'cm')) +
  220. ylab('') + xlab('')
  221. #td
  222. fig_td = mrep%>%
  223. filter( sequence == '11', group == 'TD') %>%
  224. ggplot(aes(Duration, mRep, color = Volatility, group = Volatility, shape = Volatility)) +
  225. geom_point(size = 2) +
  226. geom_smooth(method = 'lm', aes(fill = Volatility), se = FALSE) +
  227. geom_errorbar(aes(ymin = mRep - se, ymax = mRep +se), width = 0.05) +
  228. theme_classic() + theme(strip.background = element_blank()) +
  229. geom_abline(slope = 1, linetype = 2) +
  230. theme(legend.position = 'none', plot.margin = unit(c(0,0,0,0),'cm')) +
  231. ylab('') + xlab('')
  232. # group mean reproductions
  233. fig_repd_gr = mrep%>%
  234. ggplot(aes(Duration, mRep, color = Volatility, group = Volatility, shape = Volatility)) +
  235. geom_point(alpha = 0.2) +
  236. geom_smooth(method = 'lm', se = FALSE, aes(fill = Volatility)) +
  237. theme_classic() + theme(strip.background = element_blank()) +
  238. geom_abline(slope = 1, linetype = 2) +
  239. facet_wrap(~group) + theme(legend.position = c(0.1,0.85)) +
  240. xlab('Duration (Secs)') + ylab('Reproduction (Secs)')
  241. # plot individual example as inset.
  242. fig_mrep = ggdraw() + draw_plot(fig_repd_gr) +
  243. draw_plot(fig_asd, x = 0.3, y = 0.13, width = .2, height = .3) +
  244. draw_plot(fig_td, x = 0.75, y = 0.13, width = .2, height = .3)
  245. fig_mrep
  246. if(saveFig){
  247. ggsave("figures/fig_reproduction.png", fig_mrep, width=7, height=3.5)
  248. ggsave("figures/fig_reproduction.pdf", fig_mrep, width=7, height=3.5)
  249. }
  250. ```
  251. As we can see from the patterns above. The ASD participant produced relative flat errors, while the TD participant showed a strong central tendency effect (shorts being overestimated and longs being underestimated).
  252. ### Visulize CTE and sequential dependence
  253. Let's visualize the biases (central tendency and serial dependence) for two groups (ASD vs. TD)
  254. ```{r plot_biases}
  255. pd = position_dodge(width = 0.05)
  256. # plot CTI and SI together
  257. fig_biases = vslopes%>%
  258. group_by(group, Volatility, Order) %>%
  259. summarise(msi = mean(si), n = n(), se_si = sd(si)/sqrt(n),
  260. mci = mean(ci), se_ci = sd(ci)/sqrt(n)) %>%
  261. ggplot(aes(msi, mci, color = group, shape = Volatility,
  262. group=interaction(Order, group))) +
  263. geom_hline(yintercept = 0, color = 'gray', linetype = 'dashed') +
  264. geom_vline(xintercept = 0, color = 'gray', linetype = 'dashed') +
  265. geom_point(size = 2) +
  266. geom_line(aes(linetype = Order)) +
  267. geom_errorbar(aes(ymin = mci - se_ci, ymax = mci + se_ci), width = 0.01 ) +
  268. geom_errorbarh(aes(xmin = msi - se_si, xmax = msi + se_si), height = 0.01) +
  269. xlab('Serial Dependence') + ylab('Central Tendency') +
  270. scale_y_continuous(labels = scales::percent) +
  271. scale_x_continuous(labels = scales::percent) +
  272. guides(color = guide_legend(title = 'Group'),
  273. linetype = guide_legend(title = 'Volatility Order'),
  274. ) +
  275. theme_classic() +
  276. theme(legend.position = 'bottom')
  277. fig_biases
  278. if (saveFig){
  279. ggsave("figures/fig_biases.png", fig_biases, width=5, height=4)
  280. ggsave("figures/fig_biases.pdf", fig_biases, width=5, height=4)
  281. }
  282. ```
  283. By visual inspection, individuals with ASD exhibited less central tendency relative to their matched TD controls (the red lines below the cyan lines in the above figure), while the local serial dependence was relatively comparable between two groups except in the low volatility condition when that condition started first. Interestingly, the central tendency and serial dependence were similar for both groups when the high-volatility session started first (the solid lines), while they differed when the low-volatility session started first (the dashed lines).
  284. Given that the main difference was shown in CTE. We further plot the mean CTEs.
  285. ```{r}
  286. #pd = position_dodge(width = 0.5)
  287. # separate plots for appendix
  288. fig_cti = vslopes %>%
  289. group_by(group, Volatility, Order) %>%
  290. summarise(mci = mean(ci), n = n(), se = sd(ci)/sqrt(n)) %>%
  291. ggplot(aes(Volatility, mci, shape = Order, color = Order, group = Order)) +
  292. geom_line() + geom_point(size= 2) +
  293. #geom_bar(stat = 'identity', position = pd, width = 0.5) +
  294. facet_wrap(~group)+
  295. geom_errorbar(aes(ymin = mci - se, ymax = mci + se), width = 0.2) +
  296. theme_classic() + theme(legend.position = 'bottom', strip.background = element_blank()) +
  297. xlab('Volatility') + ylab('CTI') +
  298. scale_y_continuous(labels = scales::percent) +
  299. guides(color = guide_legend(title = 'Order'), fill = guide_legend(title = 'Order'))
  300. fig_cti
  301. ```
  302. Let's do the sequential dependence as well.
  303. ```{r}
  304. pd = position_dodge(width = 0.5)
  305. # separate plots for appendix
  306. fig_sdi = vslopes %>%
  307. group_by(group, Volatility, Order) %>%
  308. summarise(msi = mean(si), n = n(), se = sd(si)/sqrt(n)) %>%
  309. ggplot(aes(Volatility, msi, shape = Order, color = Order, group = Order)) +
  310. geom_line() + geom_point(size = 2) +
  311. #geom_bar(stat = 'identity', position = pd, width = 0.5) +
  312. facet_wrap(~group)+
  313. geom_errorbar(aes(ymin = msi - se, ymax = msi + se), width = 0.2) +
  314. theme_classic() + theme(legend.position = 'bottom', strip.background = element_blank()) +
  315. xlab('Volatility') + ylab('SDI') +
  316. scale_y_continuous(labels = scales::percent) +
  317. guides(color = guide_legend(title = 'Order'), fill = guide_legend(title = 'Order'))
  318. fig_sdi
  319. fig3 = plot_grid(fig_cti, fig_sdi, nrow = 1, labels = c("a","b"))
  320. fig3
  321. if (saveFig){
  322. ggsave('figures/fig3.pdf',fig3, width = 7, height = 3.5)
  323. ggsave('figures/fig3.png',fig3, width = 7, height = 3.5)
  324. }
  325. ```
  326. ### Statistics
  327. 1. Central tendency effect
  328. Average CTIs
  329. ```{r}
  330. vslopes %>% group_by(group) %>% summarise(mcti = mean(ci))
  331. vslopes %>% group_by(Volatility) %>% summarise(mcti = mean(ci))
  332. vslopes %>% group_by(Order) %>% summarise(mcti = mean(ci))
  333. # calculate mean elevated CTIs between HV First vs. LV First
  334. vslopes %>% group_by(group, Volatility, Order) %>% summarise(mcti = mean(ci)) %>%
  335. pivot_wider(names_from = Order, values_from = mcti) %>%
  336. mutate(dCTI = `HV First` - `LV First`) %>%
  337. group_by(group) %>% summarise(md = mean(dCTI))
  338. ```
  339. ```{r ANOVAs_cti}
  340. vslopes = as.data.frame(vslopes) # required by rstatix
  341. # ---- central tendency index----
  342. # repeated measures ANOVA on central tendency index
  343. anova1 = anova_test(data = vslopes, dv = ci,
  344. wid = sub,
  345. within = Volatility,
  346. between = c(group, Order))
  347. anova1
  348. ## separate for Volatility order
  349. anova1a = anova_test(data = vslopes %>% filter(group == 'ASD'),
  350. dv = ci,
  351. wid = sub,
  352. within = Volatility,
  353. between = Order)
  354. anova1a
  355. anova1b = anova_test(data = vslopes %>% filter(group == 'TD'),
  356. dv = ci,
  357. wid = sub,
  358. within = Volatility,
  359. between = Order)
  360. anova1b
  361. ```
  362. 2. Bayes factor analysis for pair-wise comparison
  363. The above analysis showed that the main difference came from the central tendency. Here we further get the central tendency bias and Bayes factor analyses:
  364. ```{r cti_bayes}
  365. # --- Bayes t-tests
  366. bftest = function(df){
  367. df = as.data.frame(df)
  368. # get the means
  369. rdf = df%>% summarise(mci = mean(ci),
  370. se_ci = sd(ci)/sqrt(n()),
  371. msi = mean(si),
  372. se_si = sd(si)/sqrt(n()))
  373. rdf$ci_bf = ttestBF(df$ci, mu = 0) %>% extractBF() %>% .$bf
  374. rdf$si_bf = ttestBF(df$si, mu = 0) %>% extractBF() %>% .$bf
  375. return(rdf)
  376. }
  377. vslopes %>% group_by(group, Volatility, Order) %>% nest() %>%
  378. mutate(bf = map(data, bftest)) %>% unnest(bf, .drop = TRUE)
  379. # group comparison for the low-vol-first, high-vol session
  380. vslopes %>% filter(Volatility == 'High Vola.', Order == 'LV First') %>% as.data.frame() ->v11
  381. t_test(data = v11, formula = ci ~ group)
  382. ttestBF(data = v11, formula = ci ~ group)
  383. vslopes %>% filter(Volatility == 'Low Vola.', Order == 'LV First') %>% as.data.frame() ->v12
  384. t_test(data = v12, formula = ci ~ group)
  385. ttestBF(data = v12, formula = ci ~ group)
  386. ```
  387. 3. ANOVA analyses for the serial dependence indices:
  388. ```{r ANOVAs_sdi}
  389. # ---- Serial dependence index----
  390. anova2 = ezANOVA(data = vslopes, dv = si,
  391. wid = sub,
  392. within = .(Volatility),
  393. between = .(group, Order))
  394. anova2$ANOVA
  395. # Bayes factors
  396. bf = anovaBF(si ~ Volatility + group + Order, data = vslopes, whichRandom = "sub")
  397. bayesfactor_inclusion(bf) #bayes inclusion values
  398. ## separate for session order
  399. anova2a = ezANOVA(data = vslopes %>% filter(Order == 'LV First'),
  400. dv = si,
  401. wid = sub,
  402. within = .(Volatility),
  403. between = .(group))
  404. anova2a$ANOVA
  405. anova2b = ezANOVA(data = vslopes %>% filter(Order == 'HV First'),
  406. dv = si,
  407. wid = sub,
  408. within = .(Volatility),
  409. between = .(group))
  410. anova2b$ANOVA
  411. ```
  412. 4. General biases
  413. Additionally we examined the general biases:
  414. ```{r gBias}
  415. # ---- descriptive of general bias ----
  416. vslopes %>% group_by(group) %>% summarise(mg = mean(gBias), se = sd(gBias)/sqrt(n()))
  417. # ANOVA shows no differences
  418. anova2 = ezANOVA(data = vslopes, dv = gBias,
  419. wid = sub,
  420. within = .(Volatility),
  421. between = .(group, Order))
  422. anova2$ANOVA
  423. bf = anovaBF(gBias ~ group + Volatility + Order,
  424. data = as.data.frame(vslopes), whichRandom = "sub")
  425. bayesfactor_inclusion(bf) #bayes inclusion values
  426. ```
  427. We then visualize the general biases
  428. ```{r plotgBias}
  429. # --- general bias
  430. fig_bias = vslopes %>% group_by(group, Volatility, Order) %>%
  431. summarise(mbias = mean(gBias), n = n(), se = sd(gBias)/sqrt(n)) %>%
  432. ggplot(aes(Volatility, mbias, color = Order,linetype = group, group = interaction(Order, group), shape = group)) +
  433. geom_point(size = 3, position = pd) +
  434. geom_line(position = pd) +
  435. geom_errorbar(aes(ymin = mbias - se, ymax = mbias + se), width = 0.2, position = pd) +
  436. theme_classic() + theme(legend.position = 'bottom') + xlab('Volatility') +
  437. ylab('Mean overestimation (ms)')
  438. fig_bias
  439. ```
  440. There was no difference in two groups in general biases, although both groups were positive overestimated.
  441. Next we examined the reproduced variability:
  442. ```{r rep_var}
  443. # ---- Reproduction variability -----
  444. msds_r <- mrep %>%
  445. filter(n>5) %>%
  446. group_by(group, Order, Volatility, sub) %>%
  447. summarise(msd = mean(sdr), n=n(), msd_se = sd(sdr)/sqrt(n))
  448. sd_ANOVA <- ezANOVA(msds_r, dv=msd, wid=sub, between=.(group,Order), within=Volatility)
  449. sd_ANOVA$ANOVA
  450. mmsds_r <- msds_r %>% summarize(mmsd=mean(msd*1000), n = n(), se = sd(msd*1000)/sqrt(n))
  451. pd = position_dodge(width = 0.5)
  452. fig_sd <- ggplot(mmsds_r, aes(Volatility, mmsd, shape = Order, color = Order, group = Order)) +
  453. geom_line() + geom_point() +
  454. #geom_bar(stat = 'identity', position = pd, width = 0.5) +
  455. geom_errorbar(aes(ymin = mmsd - se, ymax = mmsd + se), width = 0.3) +
  456. facet_wrap(~group) +
  457. ylab('Mean Standard Deviation (ms)') + xlab('Volatility') + theme_classic()
  458. # coord_cartesian(ylim = c(140, 200))
  459. fig_sd
  460. if(saveFig){
  461. ggsave('figures/fig_sd.png', fig_sd, width=5, height=4)
  462. ggsave('figures/fig_sd.pdf', fig_sd, width=5, height=4)
  463. }
  464. ```
  465. ## Correlation analysis
  466. Next we import individual participant ratings and do correlation analyses.
  467. ```{r parinfo}
  468. pinfo = read.csv('data/parinfo.csv')
  469. # join with ctis, sdis
  470. res = left_join(vslopes, pinfo, by = c('group','sequence'))
  471. res$group = as.factor(res$group)
  472. # scatter plot to see any relations
  473. fig_corr = ggduo(res, c('AQ','EQ','BDI'), c('ci','si'), mapping = aes(color = group, shape = Order),
  474. types = list(continuous = 'smooth_lm', se = FALSE),
  475. showStrips = FALSE, legend = 3,
  476. columnLabelsY = c('CTI','SDI')) +
  477. theme_classic() + theme(legend.position = 'bottom', strip.background = element_blank())
  478. if(saveFig){
  479. ggsave('figures/fig_corr.png', fig_corr, width=5, height=4)
  480. ggsave('figures/fig_corr.pdf', fig_corr, width=5, height=4)
  481. }
  482. fig_corr
  483. ```
  484. Get linear regression out
  485. ```{r}
  486. ci_AQ <- function(df){
  487. lm(ci ~ AQ, data = df)
  488. }
  489. ci_EQ <- function(df){
  490. lm(ci ~ EQ, data = df)
  491. }
  492. ci_BDI <- function(df){
  493. lm(ci ~ BDI, data = df)
  494. }
  495. si_AQ <- function(df){
  496. lm(si ~ AQ, data = df)
  497. }
  498. si_EQ <- function(df){
  499. lm(si ~ EQ, data = df)
  500. }
  501. si_BDI <- function(df){
  502. lm(si ~ BDI, data = df)
  503. }
  504. # regression
  505. reg_res <- res %>% group_by(Volatility, group, Order) %>%
  506. nest() %>% # nested data
  507. mutate(ci_AQ = map(data, ci_AQ), ci_EQ = map(data, ci_EQ),
  508. ci_BDI = map(data, ci_BDI),
  509. si_AQ = map(data, si_AQ), si_EQ = map(data, si_EQ),
  510. si_BDI = map(data, si_BDI)) %>%
  511. mutate(mci_AQ = map(ci_AQ, broom::tidy),
  512. mci_EQ = map(ci_EQ, broom::tidy),
  513. mci_BDI = map(ci_BDI, broom::tidy),
  514. msi_AQ = map(si_AQ, broom::tidy),
  515. msi_EQ = map(si_EQ, broom::tidy),
  516. msi_BDI = map(si_BDI, broom::tidy) ) %>%
  517. unnest(cols = c(mci_AQ, mci_EQ, mci_BDI, msi_AQ, msi_EQ, msi_BDI), names_repair = 'unique' , .drop = TRUE) %>%
  518. select(-ci_AQ, -ci_BDI, -ci_EQ, -si_AQ, -si_EQ, -si_BDI, -data,
  519. -starts_with('st')) %>%
  520. filter(term == 'AQ') # remove intercept
  521. ```
  522. Given there is no significant of slopes (after correction), but some difference in intercepts, we do general linear regression, without separating groups.
  523. ```{r}
  524. reg_res2 <- res %>% ungroup() %>% group_by(Order, Volatility) %>%
  525. nest() %>% # nested data
  526. mutate(ci_AQ = map(data, ci_AQ), ci_EQ = map(data, ci_EQ),
  527. ci_BDI = map(data, ci_BDI),
  528. si_AQ = map(data, si_AQ), si_EQ = map(data, si_EQ),
  529. si_BDI = map(data, si_BDI)) %>%
  530. mutate(mci_AQ = map(ci_AQ, broom::tidy),
  531. mci_EQ = map(ci_EQ, broom::tidy),
  532. mci_BDI = map(ci_BDI, broom::tidy),
  533. msi_AQ = map(si_AQ, broom::tidy),
  534. msi_EQ = map(si_EQ, broom::tidy),
  535. msi_BDI = map(si_BDI, broom::tidy) ) %>%
  536. unnest(cols = c(mci_AQ, mci_EQ, mci_BDI, msi_AQ, msi_EQ, msi_BDI), names_repair = 'unique' , .drop = TRUE) %>%
  537. select(-ci_AQ, -ci_BDI, -ci_EQ, -si_AQ, -si_EQ, -si_BDI, -data,
  538. -starts_with('st')) %>%
  539. filter(term == 'AQ') # remove intercept
  540. ```
  541. Again, there is no significant of slopes.