final_results_analysis.Rmd 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. ---
  2. title: "R Notebook"
  3. output:
  4. html_document:
  5. df_print: paged
  6. editor_options:
  7. chunk_output_type: inline
  8. ---
  9. ```{r, include=FALSE}
  10. library(ggplot2)
  11. library(dplyr)
  12. library(tidyverse)
  13. library(lme4)
  14. library(sjPlot)
  15. library(ggfortify)
  16. library(svglite)
  17. library(ggeffects)
  18. library(saemix)
  19. library(ggpubr)
  20. ```
  21. ```{r}
  22. results <- read.csv("results/results.csv")
  23. ```
  24. # Plot results with noised data : example of the speakers noise
  25. ```{r}
  26. families <- c("Providence_Alex",
  27. "Providence_William",
  28. "Providence_Ethan",
  29. "Providence_Violet",
  30. "Providence_Lily",
  31. "Providence_Naima")
  32. plt_noise <- ggplot(filter(results,
  33. phonemes_order_noise == 0 &
  34. phonemes_noise == 0 &
  35. # speakers_noise == 0 &
  36. family %in% families &
  37. age > 0 & age <= 60)) +
  38. aes(x=age, y=entropy, color=speaker) +
  39. geom_point(aes(x = age, y = entropy),
  40. size = 0.5) +
  41. stat_cor(method="spearman",
  42. aes(color = speaker, label = paste(..r.label.., sep = "~,~")),
  43. size=6,
  44. label.y.npc = "top",
  45. label.x.npc="middle",
  46. show.legend = FALSE,
  47. cor.coef.name = "rho") +
  48. facet_wrap(speakers_noise ~ .) +
  49. guides(color=guide_legend(override.aes=list(fill=NA))) +
  50. theme_bw(base_size = 9) +
  51. theme(legend.position="bottom",
  52. legend.title = element_blank(),
  53. text = element_text(size = 30)) +
  54. scale_color_manual(values = cbp1) +
  55. ylab("Entropy") +
  56. xlab("Age (months)")
  57. ```
  58. ```{r}
  59. plt_noise
  60. ```
  61. # Prepare data
  62. ```{r}
  63. set.seed(8261)
  64. split_train_test <- function(data, percentage){
  65. smp_size <- floor(percentage * nrow(data))
  66. train_ind <- sample(seq_len(nrow(data)), size = smp_size)
  67. return(list("train" = data[train_ind, ], "test" = data[-train_ind, ]))
  68. }
  69. remove_outliers <- function(x, na.rm = TRUE, ...) {
  70. qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
  71. H <- 1.5 * IQR(x, na.rm = na.rm)
  72. y <- x
  73. y[x < (qnt[1] - H)] <- NA
  74. y[x > (qnt[2] + H)] <- NA
  75. y
  76. }
  77. data_without_outliers <- function(data, group_name) {
  78. return(
  79. data <- data %>%
  80. group_by({{group_name}}, rounded_ages) %>%
  81. mutate(entropy = remove_outliers(entropy)) %>%
  82. na.omit() %>%
  83. as.data.frame()
  84. )
  85. }
  86. prepare_data <- function(data, group_name) {
  87. data_child <- data %>%
  88. filter(speaker == "Target_Child", age > 0 & age < 60)
  89. data_child$rounded_ages <- round(data_child$age, 0)
  90. data_child <- data_without_outliers(data_child, {{group_name}})
  91. data_adult <- data %>%
  92. filter(speaker == "Adult", age > 0 & age < 60)
  93. data_adult$rounded_ages <- round(data_adult$age, 0)
  94. data_adult <- data_without_outliers(data_adult, {{group_name}})
  95. return(list("Adult" = split_train_test(data_adult, .80),
  96. "Target_Child" = split_train_test(data_child, .80)))
  97. }
  98. recode_families <- function(data) {
  99. return(data %>%
  100. mutate(family = recode(family,
  101. Providence_Alex = "Alex",
  102. Providence_William = "William",
  103. Providence_Ethan = "Ethan",
  104. Providence_Violet = "Violet",
  105. Providence_Lily = "Lily",
  106. Providence_Naima = "Naima")))
  107. }
  108. filter_age_noises <- function(data) {
  109. return(data %>%
  110. filter(phonemes_order_noise == 0 & # set all noises to 0.
  111. phonemes_noise == 0 &
  112. speakers_noise == 0 &
  113. age > 0 & age <= 60))
  114. }
  115. ```
  116. # The model
  117. ```{r}
  118. logistic.model <- function(psi, id, xidep) {
  119. age <- xidep[, 1]
  120. slope <- psi[id, 1]
  121. lower_asymptote <- psi[id, 2]
  122. inflection <- psi[id, 3]
  123. y_hat <- lower_asymptote / (1 + slope * inflection ** age)
  124. return(y_hat)
  125. }
  126. logistic.model.saemix <- saemixModel(model = logistic.model,
  127. description = "Logistic decay",
  128. psi0 = matrix(c(-.01, 2, 10),
  129. ncol = 3, byrow = TRUE,
  130. dimnames = list(NULL,
  131. c("slope", "lower_asymptote", "inflection"))))
  132. ```
  133. ## Make saemix data
  134. ```{r}
  135. get_saemix_data <- function(data, group_name) {
  136. return(
  137. saemixData(name.data = data,
  138. name.group = group_name,
  139. name.predictors = "age",
  140. name.response = "entropy",
  141. units = list(x = "en mois"))
  142. )
  143. }
  144. ```
  145. ## fitting function
  146. ```{r}
  147. fit.model <- function(model, data, group_name) {
  148. saemix.options<-list(seed = 94352514, save = FALSE, save.graphs = FALSE)
  149. return(saemix(model, get_saemix_data(data, group_name), saemix.options))
  150. }
  151. ```
  152. # plots
  153. ```{r}
  154. plot_fitted_model <- function(data_for_child,
  155. data_for_adult,
  156. fitted_model_child,
  157. fitted_model_adult) {
  158. cbp1 <- c("#000000", "#D55E00", "#CC79A7")
  159. ### children
  160. reordered_data_for_child <- data_for_child[with(data_for_child, order(language, age)),]
  161. reordered_data_for_child$predicted <- fitted_model_child@results@ipred
  162. reordered_data_for_child$ci <- quantile(fitted_model_child@results@ires, 1 - .05)
  163. reordered_data_for_child$ppredicted <- fitted_model_child@results@ppred
  164. ### adults
  165. reordered_data_for_adult <- data_for_adult[with(data_for_adult, order(language, age)),]
  166. reordered_data_for_adult$predicted <- fitted_model_adult@results@ipred
  167. reordered_data_for_adult$ci <- quantile(fitted_model_adult@results@ires, 1 - .05)
  168. reordered_data_for_adult$ppredicted <- fitted_model_adult@results@ppred
  169. data <- rbind(reordered_data_for_child, reordered_data_for_adult)
  170. return(
  171. ggplot(data) +
  172. aes(color=speaker, y = entropy, x = age) +
  173. geom_point(aes(x = age, y = entropy),
  174. size = .7) +
  175. geom_line(aes(x = age, y = predicted, linetype='Individu'), size=1.3) +
  176. # geom_ribbon(aes(x = age,
  177. # y = entropy,
  178. # ymin = predicted - ci,
  179. # ymax = predicted + ci), # shadowing cnf intervals
  180. # alpha=.06,
  181. # size=0.0) +
  182. geom_line(aes(x = age, y = ppredicted, linetype='Population'), size=1.3) +
  183. stat_cor(method="spearman",
  184. aes(color = speaker, label =
  185. paste("Spearman", paste(..r.label.., sep = "~`,`~"), sep = "~` `~")),
  186. size=4.5,
  187. label.y.npc = "top",
  188. label.x.npc="left",
  189. show.legend = FALSE,
  190. cor.coef.name = "rho") +
  191. facet_wrap(language ~ . ) +
  192. theme_bw(base_size = 9) +
  193. theme(legend.position="bottom",
  194. legend.title = element_blank(),
  195. text = element_text(size = 20),
  196. legend.key = element_blank(),
  197. legend.text = element_text(size = 20),
  198. legend.key.size = unit(0.35, "in")) +
  199. scale_color_manual(values = cbp1, labels=c("Adult", "Target Child")) +
  200. scale_linetype_manual(values=c('Individu'='solid','Population'="dashed")) +
  201. ylab("Cross-Entropy") +
  202. xlab("Age (months)")
  203. )
  204. }
  205. ```
  206. # Main
  207. ```{r}
  208. data_ready <- prepare_data(filter_age_noises(results))
  209. ```
  210. ```{r}
  211. estimated_child <- fit.model(logistic.model.saemix, data_ready$Target_Child$train, "language")
  212. estimated_adult <- fit.model(logistic.model.saemix, data_ready$Adult$train, "language")
  213. ```
  214. ```{r}
  215. plot_results <- plot_fitted_model(data_ready$Target_Child$train,
  216. data_ready$Adult$train,
  217. estimated_child,
  218. estimated_adult)
  219. ```
  220. ```{r}
  221. plot_results
  222. ```
  223. ```{r}
  224. # ggsave(filename = "plots/plots_study1/results2.pdf", plot=plot_res1, device="pdf", dpi=720, height = 6, width = 8)
  225. ggsave(filename = "plots/plot_results.png", plot=plot_results, device="png", dpi=320, height = 8, width = 13)
  226. ```
  227. ```{r}
  228. estimated_child@results
  229. ```
  230. ```{r}
  231. estimated_adult@results
  232. ```