123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267 |
- ---
- title: "R Notebook"
- output:
- html_document:
- df_print: paged
- editor_options:
- chunk_output_type: inline
- ---
- ```{r, include=FALSE}
- library(ggplot2)
- library(dplyr)
- library(tidyverse)
- library(lme4)
- library(sjPlot)
- library(ggfortify)
- library(svglite)
- library(ggeffects)
- library(saemix)
- library(ggpubr)
- ```
- ```{r}
- results <- read.csv("results/results.csv")
- ```
- # Plot results with noised data : example of the speakers noise
- ```{r}
- families <- c("Providence_Alex",
- "Providence_William",
- "Providence_Ethan",
- "Providence_Violet",
- "Providence_Lily",
- "Providence_Naima")
- plt_noise <- ggplot(filter(results,
- phonemes_order_noise == 0 &
- phonemes_noise == 0 &
- # speakers_noise == 0 &
- family %in% families &
- age > 0 & age <= 60)) +
- aes(x=age, y=entropy, color=speaker) +
- geom_point(aes(x = age, y = entropy),
- size = 0.5) +
- stat_cor(method="spearman",
- aes(color = speaker, label = paste(..r.label.., sep = "~,~")),
- size=6,
- label.y.npc = "top",
- label.x.npc="middle",
- show.legend = FALSE,
- cor.coef.name = "rho") +
- facet_wrap(speakers_noise ~ .) +
- guides(color=guide_legend(override.aes=list(fill=NA))) +
- theme_bw(base_size = 9) +
- theme(legend.position="bottom",
- legend.title = element_blank(),
- text = element_text(size = 30)) +
- scale_color_manual(values = cbp1) +
- ylab("Entropy") +
- xlab("Age (months)")
- ```
- ```{r}
- plt_noise
- ```
- # Prepare data
- ```{r}
- set.seed(8261)
- split_train_test <- function(data, percentage){
- smp_size <- floor(percentage * nrow(data))
- train_ind <- sample(seq_len(nrow(data)), size = smp_size)
- return(list("train" = data[train_ind, ], "test" = data[-train_ind, ]))
- }
- remove_outliers <- function(x, na.rm = TRUE, ...) {
- qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
- H <- 1.5 * IQR(x, na.rm = na.rm)
- y <- x
- y[x < (qnt[1] - H)] <- NA
- y[x > (qnt[2] + H)] <- NA
- y
- }
- data_without_outliers <- function(data, group_name) {
- return(
- data <- data %>%
- group_by({{group_name}}, rounded_ages) %>%
- mutate(entropy = remove_outliers(entropy)) %>%
- na.omit() %>%
- as.data.frame()
- )
- }
- prepare_data <- function(data, group_name) {
- data_child <- data %>%
- filter(speaker == "Target_Child", age > 0 & age < 60)
- data_child$rounded_ages <- round(data_child$age, 0)
- data_child <- data_without_outliers(data_child, {{group_name}})
-
- data_adult <- data %>%
- filter(speaker == "Adult", age > 0 & age < 60)
- data_adult$rounded_ages <- round(data_adult$age, 0)
- data_adult <- data_without_outliers(data_adult, {{group_name}})
-
-
- return(list("Adult" = split_train_test(data_adult, .80),
- "Target_Child" = split_train_test(data_child, .80)))
- }
- recode_families <- function(data) {
- return(data %>%
- mutate(family = recode(family,
- Providence_Alex = "Alex",
- Providence_William = "William",
- Providence_Ethan = "Ethan",
- Providence_Violet = "Violet",
- Providence_Lily = "Lily",
- Providence_Naima = "Naima")))
- }
- filter_age_noises <- function(data) {
- return(data %>%
- filter(phonemes_order_noise == 0 & # set all noises to 0.
- phonemes_noise == 0 &
- speakers_noise == 0 &
- age > 0 & age <= 60))
- }
- ```
- # The model
- ```{r}
- logistic.model <- function(psi, id, xidep) {
- age <- xidep[, 1]
- slope <- psi[id, 1]
- lower_asymptote <- psi[id, 2]
- inflection <- psi[id, 3]
- y_hat <- lower_asymptote / (1 + slope * inflection ** age)
- return(y_hat)
- }
- logistic.model.saemix <- saemixModel(model = logistic.model,
- description = "Logistic decay",
- psi0 = matrix(c(-.01, 2, 10),
- ncol = 3, byrow = TRUE,
- dimnames = list(NULL,
- c("slope", "lower_asymptote", "inflection"))))
- ```
- ## Make saemix data
- ```{r}
- get_saemix_data <- function(data, group_name) {
- return(
- saemixData(name.data = data,
- name.group = group_name,
- name.predictors = "age",
- name.response = "entropy",
- units = list(x = "en mois"))
- )
- }
- ```
- ## fitting function
- ```{r}
- fit.model <- function(model, data, group_name) {
- saemix.options<-list(seed = 94352514, save = FALSE, save.graphs = FALSE)
- return(saemix(model, get_saemix_data(data, group_name), saemix.options))
- }
- ```
- # plots
- ```{r}
- plot_fitted_model <- function(data_for_child,
- data_for_adult,
- fitted_model_child,
- fitted_model_adult) {
- cbp1 <- c("#000000", "#D55E00", "#CC79A7")
- ### children
- reordered_data_for_child <- data_for_child[with(data_for_child, order(language, age)),]
- reordered_data_for_child$predicted <- fitted_model_child@results@ipred
- reordered_data_for_child$ci <- quantile(fitted_model_child@results@ires, 1 - .05)
- reordered_data_for_child$ppredicted <- fitted_model_child@results@ppred
- ### adults
- reordered_data_for_adult <- data_for_adult[with(data_for_adult, order(language, age)),]
- reordered_data_for_adult$predicted <- fitted_model_adult@results@ipred
- reordered_data_for_adult$ci <- quantile(fitted_model_adult@results@ires, 1 - .05)
- reordered_data_for_adult$ppredicted <- fitted_model_adult@results@ppred
- data <- rbind(reordered_data_for_child, reordered_data_for_adult)
- return(
- ggplot(data) +
- aes(color=speaker, y = entropy, x = age) +
- geom_point(aes(x = age, y = entropy),
- size = .7) +
- geom_line(aes(x = age, y = predicted, linetype='Individu'), size=1.3) +
- # geom_ribbon(aes(x = age,
- # y = entropy,
- # ymin = predicted - ci,
- # ymax = predicted + ci), # shadowing cnf intervals
- # alpha=.06,
- # size=0.0) +
- geom_line(aes(x = age, y = ppredicted, linetype='Population'), size=1.3) +
- stat_cor(method="spearman",
- aes(color = speaker, label =
- paste("Spearman", paste(..r.label.., sep = "~`,`~"), sep = "~` `~")),
- size=4.5,
- label.y.npc = "top",
- label.x.npc="left",
- show.legend = FALSE,
- cor.coef.name = "rho") +
- facet_wrap(language ~ . ) +
- theme_bw(base_size = 9) +
- theme(legend.position="bottom",
- legend.title = element_blank(),
- text = element_text(size = 20),
- legend.key = element_blank(),
- legend.text = element_text(size = 20),
- legend.key.size = unit(0.35, "in")) +
- scale_color_manual(values = cbp1, labels=c("Adult", "Target Child")) +
- scale_linetype_manual(values=c('Individu'='solid','Population'="dashed")) +
- ylab("Cross-Entropy") +
- xlab("Age (months)")
- )
- }
- ```
- # Main
- ```{r}
- data_ready <- prepare_data(filter_age_noises(results))
- ```
- ```{r}
- estimated_child <- fit.model(logistic.model.saemix, data_ready$Target_Child$train, "language")
- estimated_adult <- fit.model(logistic.model.saemix, data_ready$Adult$train, "language")
- ```
- ```{r}
- plot_results <- plot_fitted_model(data_ready$Target_Child$train,
- data_ready$Adult$train,
- estimated_child,
- estimated_adult)
- ```
- ```{r}
- plot_results
- ```
- ```{r}
- # ggsave(filename = "plots/plots_study1/results2.pdf", plot=plot_res1, device="pdf", dpi=720, height = 6, width = 8)
- ggsave(filename = "plots/plot_results.png", plot=plot_results, device="png", dpi=320, height = 8, width = 13)
- ```
- ```{r}
- estimated_child@results
- ```
- ```{r}
- estimated_adult@results
- ```
|