--- title: "R Notebook" output: html_document: df_print: paged editor_options: chunk_output_type: inline --- ```{r, include=FALSE} library(ggplot2) library(dplyr) library(tidyverse) library(lme4) library(sjPlot) library(ggfortify) library(svglite) library(ggeffects) library(saemix) library(ggpubr) ``` ```{r} results <- read.csv("results/results.csv") ``` # Plot results with noised data : example of the speakers noise ```{r} families <- c("Providence_Alex", "Providence_William", "Providence_Ethan", "Providence_Violet", "Providence_Lily", "Providence_Naima") plt_noise <- ggplot(filter(results, phonemes_order_noise == 0 & phonemes_noise == 0 & # speakers_noise == 0 & family %in% families & age > 0 & age <= 60)) + aes(x=age, y=entropy, color=speaker) + geom_point(aes(x = age, y = entropy), size = 0.5) + stat_cor(method="spearman", aes(color = speaker, label = paste(..r.label.., sep = "~,~")), size=6, label.y.npc = "top", label.x.npc="middle", show.legend = FALSE, cor.coef.name = "rho") + facet_wrap(speakers_noise ~ .) + guides(color=guide_legend(override.aes=list(fill=NA))) + theme_bw(base_size = 9) + theme(legend.position="bottom", legend.title = element_blank(), text = element_text(size = 30)) + scale_color_manual(values = cbp1) + ylab("Entropy") + xlab("Age (months)") ``` ```{r} plt_noise ``` # Prepare data ```{r} set.seed(8261) split_train_test <- function(data, percentage){ smp_size <- floor(percentage * nrow(data)) train_ind <- sample(seq_len(nrow(data)), size = smp_size) return(list("train" = data[train_ind, ], "test" = data[-train_ind, ])) } remove_outliers <- function(x, na.rm = TRUE, ...) { qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...) H <- 1.5 * IQR(x, na.rm = na.rm) y <- x y[x < (qnt[1] - H)] <- NA y[x > (qnt[2] + H)] <- NA y } data_without_outliers <- function(data, group_name) { return( data <- data %>% group_by({{group_name}}, rounded_ages) %>% mutate(entropy = remove_outliers(entropy)) %>% na.omit() %>% as.data.frame() ) } prepare_data <- function(data, group_name) { data_child <- data %>% filter(speaker == "Target_Child", age > 0 & age < 60) data_child$rounded_ages <- round(data_child$age, 0) data_child <- data_without_outliers(data_child, {{group_name}}) data_adult <- data %>% filter(speaker == "Adult", age > 0 & age < 60) data_adult$rounded_ages <- round(data_adult$age, 0) data_adult <- data_without_outliers(data_adult, {{group_name}}) return(list("Adult" = split_train_test(data_adult, .80), "Target_Child" = split_train_test(data_child, .80))) } recode_families <- function(data) { return(data %>% mutate(family = recode(family, Providence_Alex = "Alex", Providence_William = "William", Providence_Ethan = "Ethan", Providence_Violet = "Violet", Providence_Lily = "Lily", Providence_Naima = "Naima"))) } filter_age_noises <- function(data) { return(data %>% filter(phonemes_order_noise == 0 & # set all noises to 0. phonemes_noise == 0 & speakers_noise == 0 & age > 0 & age <= 60)) } ``` # The model ```{r} logistic.model <- function(psi, id, xidep) { age <- xidep[, 1] slope <- psi[id, 1] lower_asymptote <- psi[id, 2] inflection <- psi[id, 3] y_hat <- lower_asymptote / (1 + slope * inflection ** age) return(y_hat) } logistic.model.saemix <- saemixModel(model = logistic.model, description = "Logistic decay", psi0 = matrix(c(-.01, 2, 10), ncol = 3, byrow = TRUE, dimnames = list(NULL, c("slope", "lower_asymptote", "inflection")))) ``` ## Make saemix data ```{r} get_saemix_data <- function(data, group_name) { return( saemixData(name.data = data, name.group = group_name, name.predictors = "age", name.response = "entropy", units = list(x = "en mois")) ) } ``` ## fitting function ```{r} fit.model <- function(model, data, group_name) { saemix.options<-list(seed = 94352514, save = FALSE, save.graphs = FALSE) return(saemix(model, get_saemix_data(data, group_name), saemix.options)) } ``` # plots ```{r} plot_fitted_model <- function(data_for_child, data_for_adult, fitted_model_child, fitted_model_adult) { cbp1 <- c("#000000", "#D55E00", "#CC79A7") ### children reordered_data_for_child <- data_for_child[with(data_for_child, order(language, age)),] reordered_data_for_child$predicted <- fitted_model_child@results@ipred reordered_data_for_child$ci <- quantile(fitted_model_child@results@ires, 1 - .05) reordered_data_for_child$ppredicted <- fitted_model_child@results@ppred ### adults reordered_data_for_adult <- data_for_adult[with(data_for_adult, order(language, age)),] reordered_data_for_adult$predicted <- fitted_model_adult@results@ipred reordered_data_for_adult$ci <- quantile(fitted_model_adult@results@ires, 1 - .05) reordered_data_for_adult$ppredicted <- fitted_model_adult@results@ppred data <- rbind(reordered_data_for_child, reordered_data_for_adult) return( ggplot(data) + aes(color=speaker, y = entropy, x = age) + geom_point(aes(x = age, y = entropy), size = .7) + geom_line(aes(x = age, y = predicted, linetype='Individu'), size=1.3) + # geom_ribbon(aes(x = age, # y = entropy, # ymin = predicted - ci, # ymax = predicted + ci), # shadowing cnf intervals # alpha=.06, # size=0.0) + geom_line(aes(x = age, y = ppredicted, linetype='Population'), size=1.3) + stat_cor(method="spearman", aes(color = speaker, label = paste("Spearman", paste(..r.label.., sep = "~`,`~"), sep = "~` `~")), size=4.5, label.y.npc = "top", label.x.npc="left", show.legend = FALSE, cor.coef.name = "rho") + facet_wrap(language ~ . ) + theme_bw(base_size = 9) + theme(legend.position="bottom", legend.title = element_blank(), text = element_text(size = 20), legend.key = element_blank(), legend.text = element_text(size = 20), legend.key.size = unit(0.35, "in")) + scale_color_manual(values = cbp1, labels=c("Adult", "Target Child")) + scale_linetype_manual(values=c('Individu'='solid','Population'="dashed")) + ylab("Cross-Entropy") + xlab("Age (months)") ) } ``` # Main ```{r} data_ready <- prepare_data(filter_age_noises(results)) ``` ```{r} estimated_child <- fit.model(logistic.model.saemix, data_ready$Target_Child$train, "language") estimated_adult <- fit.model(logistic.model.saemix, data_ready$Adult$train, "language") ``` ```{r} plot_results <- plot_fitted_model(data_ready$Target_Child$train, data_ready$Adult$train, estimated_child, estimated_adult) ``` ```{r} plot_results ``` ```{r} # ggsave(filename = "plots/plots_study1/results2.pdf", plot=plot_res1, device="pdf", dpi=720, height = 6, width = 8) ggsave(filename = "plots/plot_results.png", plot=plot_results, device="png", dpi=320, height = 8, width = 13) ``` ```{r} estimated_child@results ``` ```{r} estimated_adult@results ```