---
title: "R Notebook"
output:
  html_document:
    df_print: paged
editor_options:
  chunk_output_type: inline
---

```{r, include=FALSE}
library(ggplot2)
library(dplyr)
library(tidyverse)
library(lme4)
library(sjPlot)
library(ggfortify)
library(svglite)
library(ggeffects)
library(saemix)
library(ggpubr)
```

```{r}
results <- read.csv("results/results.csv")
```

# Plot results with noised data : example of the speakers noise

```{r}
families <- c("Providence_Alex",
              "Providence_William",
              "Providence_Ethan",
              "Providence_Violet",
              "Providence_Lily",
              "Providence_Naima")
plt_noise <- ggplot(filter(results,
              phonemes_order_noise == 0 &
              phonemes_noise == 0 &
              # speakers_noise == 0 &
              family %in% families &
              age > 0 & age <= 60)) + 
    aes(x=age, y=entropy, color=speaker) +
    geom_point(aes(x = age, y = entropy),
               size = 0.5) +
    stat_cor(method="spearman", 
             aes(color = speaker, label = paste(..r.label.., sep = "~,~")),
             size=6,
             label.y.npc = "top",
             label.x.npc="middle",
             show.legend = FALSE,
             cor.coef.name = "rho") +
    facet_wrap(speakers_noise ~ .) +
    guides(color=guide_legend(override.aes=list(fill=NA))) +  
    theme_bw(base_size = 9) +
    theme(legend.position="bottom", 
          legend.title = element_blank(),
          text = element_text(size = 30)) +
    scale_color_manual(values = cbp1) +
    ylab("Entropy") +
    xlab("Age (months)")
```


```{r}
plt_noise
```


# Prepare data

```{r}
set.seed(8261)
split_train_test <- function(data, percentage){
  smp_size <- floor(percentage * nrow(data))
  train_ind <- sample(seq_len(nrow(data)), size = smp_size)
  return(list("train" = data[train_ind, ], "test" = data[-train_ind, ]))
}
remove_outliers <- function(x, na.rm = TRUE, ...) {
  qnt <- quantile(x, probs=c(.25, .75), na.rm = na.rm, ...)
  H <- 1.5 * IQR(x, na.rm = na.rm)
  y <- x
  y[x < (qnt[1] - H)] <- NA
  y[x > (qnt[2] + H)] <- NA
  y
}
data_without_outliers <- function(data, group_name) {
  return(
    data <- data %>%
    group_by({{group_name}}, rounded_ages) %>%
    mutate(entropy = remove_outliers(entropy)) %>%
    na.omit() %>%
    as.data.frame()
  )
}
prepare_data <- function(data, group_name) {
  data_child <- data %>%
  filter(speaker == "Target_Child", age > 0 & age < 60) 
  data_child$rounded_ages <- round(data_child$age, 0)
  data_child <- data_without_outliers(data_child, {{group_name}})
  
  data_adult <- data %>%
  filter(speaker == "Adult", age > 0 & age < 60) 
  data_adult$rounded_ages <- round(data_adult$age, 0)
  data_adult <- data_without_outliers(data_adult, {{group_name}})
  
  
  return(list("Adult" = split_train_test(data_adult, .80), 
              "Target_Child" = split_train_test(data_child, .80)))
}
recode_families <- function(data) {
  return(data %>%
    mutate(family = recode(family,
                           Providence_Alex = "Alex",
                           Providence_William = "William",
                           Providence_Ethan = "Ethan",
                           Providence_Violet = "Violet",
                           Providence_Lily = "Lily",
                           Providence_Naima = "Naima")))
}
filter_age_noises <- function(data) {
  return(data %>%
  filter(phonemes_order_noise == 0 & # set all noises to 0.
              phonemes_noise == 0 &
              speakers_noise == 0 &
              age > 0 & age <= 60))
  }
```


# The model

```{r}
logistic.model <- function(psi, id, xidep) {
  age <- xidep[, 1]
  slope <- psi[id, 1]
  lower_asymptote <- psi[id, 2]
  inflection <- psi[id, 3]
  y_hat <- lower_asymptote / (1 + slope * inflection ** age)
  return(y_hat)
}

logistic.model.saemix <- saemixModel(model = logistic.model,
                              description = "Logistic decay",
                              psi0 = matrix(c(-.01, 2, 10),
                                            ncol = 3, byrow = TRUE,
                                            dimnames = list(NULL, 
                                                            c("slope", "lower_asymptote", "inflection"))))
```

## Make saemix data


```{r}
get_saemix_data <- function(data, group_name) {
  return(
    saemixData(name.data = data,
               name.group = group_name,
               name.predictors = "age",
               name.response = "entropy",
               units = list(x = "en mois"))
  )
}
```

## fitting function

```{r}
fit.model <- function(model, data, group_name) {
  saemix.options<-list(seed = 94352514, save = FALSE, save.graphs = FALSE)
  return(saemix(model, get_saemix_data(data, group_name), saemix.options))
}
```

# plots

```{r}
plot_fitted_model <- function(data_for_child, 
                              data_for_adult, 
                              fitted_model_child, 
                              fitted_model_adult) {
  cbp1 <- c("#000000", "#D55E00", "#CC79A7")
  ### children
  reordered_data_for_child <- data_for_child[with(data_for_child, order(language, age)),]
  reordered_data_for_child$predicted <- fitted_model_child@results@ipred
  reordered_data_for_child$ci <- quantile(fitted_model_child@results@ires, 1 - .05)
  reordered_data_for_child$ppredicted <- fitted_model_child@results@ppred
  ### adults
  reordered_data_for_adult <- data_for_adult[with(data_for_adult, order(language, age)),]
  reordered_data_for_adult$predicted <- fitted_model_adult@results@ipred
  reordered_data_for_adult$ci <- quantile(fitted_model_adult@results@ires, 1 - .05)
  reordered_data_for_adult$ppredicted <- fitted_model_adult@results@ppred
  data <- rbind(reordered_data_for_child, reordered_data_for_adult)
  return(
    ggplot(data) + 
      aes(color=speaker, y = entropy, x = age) +
      geom_point(aes(x = age, y = entropy),
                 size = .7) +
      geom_line(aes(x = age, y = predicted, linetype='Individu'), size=1.3) +
      # geom_ribbon(aes(x = age, 
                      # y = entropy,
                      # ymin = predicted - ci,
                      # ymax = predicted + ci),    # shadowing cnf intervals
                # alpha=.06,
                # size=0.0) + 
      geom_line(aes(x = age, y = ppredicted, linetype='Population'), size=1.3) +
      stat_cor(method="spearman",
         aes(color = speaker, label =
             paste("Spearman", paste(..r.label.., sep = "~`,`~"), sep = "~` `~")),
         size=4.5,
         label.y.npc = "top",
         label.x.npc="left",
         show.legend = FALSE,
         cor.coef.name = "rho") +
      facet_wrap(language ~ . ) +
      theme_bw(base_size = 9) +
      theme(legend.position="bottom", 
            legend.title = element_blank(),
            text = element_text(size = 20),
            legend.key = element_blank(), 
            legend.text = element_text(size = 20),
            legend.key.size =  unit(0.35, "in")) +
      scale_color_manual(values = cbp1, labels=c("Adult", "Target Child")) +
      scale_linetype_manual(values=c('Individu'='solid','Population'="dashed")) +
      ylab("Cross-Entropy") +
      xlab("Age (months)")
      )
  }
```


# Main

```{r}
data_ready <- prepare_data(filter_age_noises(results))
```

```{r}
estimated_child <- fit.model(logistic.model.saemix, data_ready$Target_Child$train, "language")
estimated_adult <- fit.model(logistic.model.saemix, data_ready$Adult$train, "language")
```


```{r}
plot_results <- plot_fitted_model(data_ready$Target_Child$train,
                               data_ready$Adult$train, 
                               estimated_child, 
                               estimated_adult)
```

```{r}
plot_results
```

```{r}
# ggsave(filename = "plots/plots_study1/results2.pdf", plot=plot_res1, device="pdf", dpi=720, height = 6, width = 8)
ggsave(filename = "plots/plot_results.png", plot=plot_results, device="png", dpi=320, height = 8, width = 13)
```

```{r}
estimated_child@results
```


```{r}
estimated_adult@results
```