---
title: "R Notebook"
output:
html_document:
df_print: paged
editor_options:
chunk_output_type: inline
---
```{r, include=FALSE}
library(ggplot2)
library(dplyr)
library(tidyverse)
library(lme4)
library(sjPlot)
library(ggfortify)
library(svglite)
library(ggeffects)
library(saemix)
library(ggpubr)
```
```{r}
results <- read.csv("results/results.csv")
```
# Plot results with noised data : example of the speakers noise
```{r}
families <- c("Providence_Alex",
"Providence_William",
"Providence_Ethan",
"Providence_Violet",
"Providence_Lily",
"Providence_Naima")
plt_noise <- ggplot(filter(results,
phonemes_order_noise == 0 &
phonemes_noise == 0 &
# speakers_noise == 0 &
family %in% families &
age > 0 & age <= 60)) +
aes(x=age, y=entropy, color=speaker) +
geom_point(aes(x = age, y = entropy),
size = 0.5) +
stat_cor(method="spearman",
aes(color = speaker, label = paste(..r.label.., sep = "~,~")),
size=6,
label.y.npc = "top",
label.x.npc="middle",
show.legend = FALSE,
cor.coef.name = "rho") +
facet_wrap(speakers_noise ~ .) +
guides(color=guide_legend(override.aes=list(fill=NA))) +
theme_bw(base_size = 9) +
theme(legend.position="bottom",
legend.title = element_blank(),
text = element_text(size = 30)) +
scale_color_manual(values = cbp1) +
ylab("Entropy") +
xlab("Age (months)")
```
```{r}
plt_noise
```
# Prepare data
```{r}
set.seed(8261)
split_train_test <- function(data, percentage){
smp_size <- floor(percentage * nrow(data))
train_ind <- sample(seq_len(nrow(data)), size = smp_size)
return(list("train" = data[train_ind, ], "test" = data[-train_ind, ]))
}
remove_outliers <- function(x, na.rm = TRUE, ...) {
qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
H <- 1.5 * IQR(x, na.rm = na.rm)
y <- x
y[x < (qnt[1] - H)] <- NA
y[x > (qnt[2] + H)] <- NA
y
}
data_without_outliers <- function(data, group_name) {
return(
data <- data %>%
group_by({{group_name}}, rounded_ages) %>%
mutate(entropy = remove_outliers(entropy)) %>%
na.omit() %>%
as.data.frame()
)
}
prepare_data <- function(data, group_name) {
data_child <- data %>%
filter(speaker == "Target_Child", age > 0 & age < 60)
data_child$rounded_ages <- round(data_child$age, 0)
data_child <- data_without_outliers(data_child, {{group_name}})
data_adult <- data %>%
filter(speaker == "Adult", age > 0 & age < 60)
data_adult$rounded_ages <- round(data_adult$age, 0)
data_adult <- data_without_outliers(data_adult, {{group_name}})
return(list("Adult" = split_train_test(data_adult, .80),
"Target_Child" = split_train_test(data_child, .80)))
}
recode_families <- function(data) {
return(data %>%
mutate(family = recode(family,
Providence_Alex = "Alex",
Providence_William = "William",
Providence_Ethan = "Ethan",
Providence_Violet = "Violet",
Providence_Lily = "Lily",
Providence_Naima = "Naima")))
}
filter_age_noises <- function(data) {
return(data %>%
filter(phonemes_order_noise == 0 & # set all noises to 0.
phonemes_noise == 0 &
speakers_noise == 0 &
age > 0 & age <= 60))
}
```
# The model
```{r}
logistic.model <- function(psi, id, xidep) {
age <- xidep[, 1]
slope <- psi[id, 1]
lower_asymptote <- psi[id, 2]
inflection <- psi[id, 3]
y_hat <- lower_asymptote / (1 + slope * inflection ** age)
return(y_hat)
}
logistic.model.saemix <- saemixModel(model = logistic.model,
description = "Logistic decay",
psi0 = matrix(c(-.01, 2, 10),
ncol = 3, byrow = TRUE,
dimnames = list(NULL,
c("slope", "lower_asymptote", "inflection"))))
```
## Make saemix data
```{r}
get_saemix_data <- function(data, group_name) {
return(
saemixData(name.data = data,
name.group = group_name,
name.predictors = "age",
name.response = "entropy",
units = list(x = "en mois"))
)
}
```
## fitting function
```{r}
fit.model <- function(model, data, group_name) {
saemix.options<-list(seed = 94352514, save = FALSE, save.graphs = FALSE)
return(saemix(model, get_saemix_data(data, group_name), saemix.options))
}
```
# plots
```{r}
plot_fitted_model <- function(data_for_child,
data_for_adult,
fitted_model_child,
fitted_model_adult) {
cbp1 <- c("#000000", "#D55E00", "#CC79A7")
### children
reordered_data_for_child <- data_for_child[with(data_for_child, order(language, age)),]
reordered_data_for_child$predicted <- fitted_model_child@results@ipred
reordered_data_for_child$ci <- quantile(fitted_model_child@results@ires, 1 - .05)
reordered_data_for_child$ppredicted <- fitted_model_child@results@ppred
### adults
reordered_data_for_adult <- data_for_adult[with(data_for_adult, order(language, age)),]
reordered_data_for_adult$predicted <- fitted_model_adult@results@ipred
reordered_data_for_adult$ci <- quantile(fitted_model_adult@results@ires, 1 - .05)
reordered_data_for_adult$ppredicted <- fitted_model_adult@results@ppred
data <- rbind(reordered_data_for_child, reordered_data_for_adult)
return(
ggplot(data) +
aes(color=speaker, y = entropy, x = age) +
geom_point(aes(x = age, y = entropy),
size = .7) +
geom_line(aes(x = age, y = predicted, linetype='Individu'), size=1.3) +
# geom_ribbon(aes(x = age,
# y = entropy,
# ymin = predicted - ci,
# ymax = predicted + ci), # shadowing cnf intervals
# alpha=.06,
# size=0.0) +
geom_line(aes(x = age, y = ppredicted, linetype='Population'), size=1.3) +
stat_cor(method="spearman",
aes(color = speaker, label =
paste("Spearman", paste(..r.label.., sep = "~`,`~"), sep = "~` `~")),
size=4.5,
label.y.npc = "top",
label.x.npc="left",
show.legend = FALSE,
cor.coef.name = "rho") +
facet_wrap(language ~ . ) +
theme_bw(base_size = 9) +
theme(legend.position="bottom",
legend.title = element_blank(),
text = element_text(size = 20),
legend.key = element_blank(),
legend.text = element_text(size = 20),
legend.key.size = unit(0.35, "in")) +
scale_color_manual(values = cbp1, labels=c("Adult", "Target Child")) +
scale_linetype_manual(values=c('Individu'='solid','Population'="dashed")) +
ylab("Cross-Entropy") +
xlab("Age (months)")
)
}
```
# Main
```{r}
data_ready <- prepare_data(filter_age_noises(results))
```
```{r}
estimated_child <- fit.model(logistic.model.saemix, data_ready$Target_Child$train, "language")
estimated_adult <- fit.model(logistic.model.saemix, data_ready$Adult$train, "language")
```
```{r}
plot_results <- plot_fitted_model(data_ready$Target_Child$train,
data_ready$Adult$train,
estimated_child,
estimated_adult)
```
```{r}
plot_results
```
```{r}
# ggsave(filename = "plots/plots_study1/results2.pdf", plot=plot_res1, device="pdf", dpi=720, height = 6, width = 8)
ggsave(filename = "plots/plot_results.png", plot=plot_results, device="png", dpi=320, height = 8, width = 13)
```
```{r}
estimated_child@results
```
```{r}
estimated_adult@results
```