# This script takes the sample-level data from analyse_02, analyse_11, and analyse_12, fits linear mixed-effects models, and saves the fixed effects

library(lme4)
library(dplyr)
library(purrr)
library(readr)
library(parallel)

n_cores <- parallel::detectCores(all.tests=FALSE, logical=TRUE) - 2
# n_cores <- 7

# function to normalise between 0 and 1
norm01 <- function(x, ...) (x-min(x, ...))/(max(x, ...)-min(x, ...))

# get the stimuli's percentage of name agreement values
stim <- read_csv("boss.csv", col_types = cols(perc_name_agree_denom_fq_inputs = col_number())) %>%
  select(filename, perc_name_agree_denom_fq_inputs) %>%
  rename(perc_name_agree = perc_name_agree_denom_fq_inputs)

# import the max electrode data from the preprocessing, and set up the variables for the model
get_sample_data <- function(path="sample_data", ch_i = NA) {
  list.files(path, pattern=".+\\.csv$", full.names=TRUE) %>%
    map_dfr(function(f) {
      message(sprintf("  - importing %s", f))
      x <- read_csv(
        f,
        col_types=cols(
          subj_id = col_character(),
          stim_grp = col_integer(),
          resp_grp = col_integer(),
          item_nr = col_integer(),
          ch_name = col_character(),
          time = col_double(),
          uV = col_double(),
          condition = col_character(),
          image = col_character(),
          string = col_character(),
          .default = col_double()
        ),
        progress = FALSE
      ) %>%
        select(-stim_grp, -resp_grp, -item_nr)
      if (all(!is.na(ch_i))) x <- filter(x, ch_name %in% ch_i)
      x
    }) %>%
    left_join(stim, by=c("image" = "filename")) |>
    mutate(
      prop_agree = perc_name_agree/100,
      pred_norm = norm01(prop_agree, na.rm=TRUE),
      # as factors for size efficiency
      ch_name = factor(ch_name),
      image = factor(image),
      string = factor(string),
      time = factor(time),
      subj_id = factor(subj_id)
    ) |>
    select(
      time, condition, pred_norm,
      subj_id, ch_name, image, string,
      uV
    )
}

# get models for each timepoint, for each electrode, and extract fixed effects, for each time locked event

paths <- c("sample_data", "sample_data_picture", "sample_data_response")

for (p in paths) {
  
  message(sprintf("Modelling time points and channels individually for %s", p))
  
  # get list of data frames for each time point
  d_list <- get_sample_data(path=p)  |>
    mutate(cong_dev = as.numeric(scale(ifelse(condition=="A2", 0, 1), center=TRUE, scale=FALSE))) |>
    group_split(time, ch_name)
  
  gc_out <- gc()
  
  message(sprintf("  - fitting models on %g cores", n_cores))
  
  # fit models in parallel
  cl <- makeCluster(n_cores)
  cl_packages <- clusterEvalQ(cl, {
    library(dplyr)
    library(lme4)
  })
  
  fe_res <- parLapply(cl, d_list, function(d_i) {
    # m <- lme4::lmer(
    #   uV ~ cong_dev * pred_norm +
    #     (cong_dev * pred_norm | subj_id) +
    #     (cong_dev | image) +
    #     (1 | string),
    #   REML=FALSE,
    #   control = lmerControl(optimizer="bobyqa"),
    #   data=d_i
    # )
    m <- lme4::lmer(
      uV ~ cong_dev * pred_norm +
        (1 | subj_id) +
        (1 | image) +
        (1 | string),
      REML=FALSE,
      control = lmerControl(optimizer="bobyqa"),
      data=d_i
    )
    
    m |>
      summary() |>
      with(coefficients) |>
      as_tibble(rownames = "fe") |>
      mutate(
        time = unique(d_i$time),
        ch_name = unique(d_i$ch_name)
      )
  }) |>
    reduce(bind_rows)
  
  stopCluster(cl)
  
  fe_res_tidy <- fe_res |>
    mutate(
      fe_lab = factor(recode(
        fe,
        `(Intercept)` = "Intercept",
        cong_dev = "Congruency",
        pred_norm = "Predictability",
        `cong_dev:pred_norm` = "Congruency * Predictability"
      ), levels = c("Intercept", "Congruency", "Predictability", "Congruency * Predictability")),
      fe_lab_newline = factor(fe_lab, labels = c("Intercept", "Congruency", "Predictability", "Congruency\n* Predictability")),
      bound_lower = Estimate - 1.96 * `Std. Error`,
      bound_upper = Estimate + 1.96 * `Std. Error`
    )
  
  p_short <- sub("_data", "", p, fixed=TRUE)
  
  write_csv(fe_res_tidy, sprintf("analyse_13_%s_res.csv", p_short))
  
  rm("d_list")
  gc_out <- gc()
  
}