---
title: "Axonal Length"
author: "Sergio D?ez Hermano"
date: '`r format(Sys.Date(),"%e de %B, %Y")`'
output:
  html_document:
      highlight: tango
      toc: yes
      toc_depth: 4
      toc_float:
        collapsed: no
---

```{r setup, include=FALSE}
require(knitr)
# include this code chunk as-is to set options
opts_chunk$set(comment = NA, prompt = FALSE, fig.height=5, fig.width=5, dpi=300, fig.align = "center", echo = TRUE, message = FALSE, warning = FALSE, cache=FALSE)
Sys.setlocale("LC_TIME", "C")
```


##Load libraries and functions

```{r}
library(R.matlab)
library(lattice)
library(dplyr)
library(ggplot2)
library(ggridges)
library(ggpubr)
library(hrbrthemes)
library(viridis)
library(mltools)
library(data.table)
library(caret)
library(interactions)
library(performance)
library(MASS)
library(geepack)
library(pstools)
library(MXM)
library(rmcorr)
library(multcomp)
library(Hmisc)
library(tolerance)

options(digits = 10)

# https://rstudio-pubs-static.s3.amazonaws.com/297778_5fce298898d64c81a4127cf811a9d486.html

################
# Bootstrap PI #
################

the.replication <- function(reg, s, x_Np1 = 0){
  # Make bootstrap residuals
  ep.star <- sample(s, size=length(reg$residuals),replace=TRUE)

  # Make bootstrap Y
  y.star <- fitted(reg) + ep.star

  # Do bootstrap regression
  lp <- model.frame(reg)[,2]
  bs.reg <- lm(y.star ~ lp - 1)

  # Create bootstrapped adjusted residuals
  bs.lev <- influence(bs.reg,do.coef = FALSE)$hat
  bs.s   <- residuals(bs.reg)/sqrt(1-bs.lev)
  bs.s   <- bs.s - mean(bs.s)

  # Calculate draw on prediction error
  # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"] 
  xb.xb <- (coef(my.reg)[1] - coef(bs.reg)[1])*x_Np1
  return(unname(xb.xb + sample(bs.s, size=1)))
  
}

##############################
# Bootstrap PI no rand noise #
##############################

the.replication2 <- function(reg, s, x_Np1 = 0){
  # Make bootstrap residuals
  ep.star <- sample(s, size=length(reg$residuals),replace=TRUE)

  # Make bootstrap Y
  y.star <- fitted(reg) + ep.star

  # Do bootstrap regression
  lp <- model.frame(reg)[,2]
  bs.reg <- lm(y.star ~ lp - 1)

  # Calculate draw on prediction error
  # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"] 
  xb.xb <- coef(bs.reg)[1]*x_Np1
  return(unname(xb.xb))
  
}


############################
# Bootstrap PI generalized #
############################

the.replication.gen <- function(reg, s, axType, x_Np1 = 0){
  
  # Make bootstrap residuals
  ep.star <- sample(s, size=length(reg$residuals), replace=TRUE)

  # Make bootstrap Y
  y.star <- fitted(reg) + ep.star

  # Do bootstrap regression
  lp <- model.frame(reg)[,2]
  nt <- model.frame(reg)[,3]
  bs.reg <- lm(y.star ~ lp:nt - 1)

  # Create bootstrapped adjusted residuals
  bs.lev <- influence(bs.reg, do.coef = FALSE)$hat
  bs.s   <- residuals(bs.reg)/sqrt(1-bs.lev)
  bs.s   <- bs.s - mean(bs.s)

  # Calculate draw on prediction error
  # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"] 
  xb.xb <- (coef(my.reg)[axType] - coef(bs.reg)[axType])*x_Np1
  return(unname(xb.xb + sample(bs.s, size=1)))
  
}

##############################
# Stratified random sampling #
##############################

stratified <- function(df, group, size, select = NULL, 
                       replace = FALSE, bothSets = FALSE) {
  if (is.null(select)) {
    df <- df
  } else {
    if (is.null(names(select))) stop("'select' must be a named list")
    if (!all(names(select) %in% names(df)))
      stop("Please verify your 'select' argument")
    temp <- sapply(names(select),
                   function(x) df[[x]] %in% select[[x]])
    df <- df[rowSums(temp) == length(select), ]
  }
  df.interaction <- interaction(df[group], drop = TRUE)
  df.table <- table(df.interaction)
  df.split <- split(df, df.interaction)
  if (length(size) > 1) {
    if (length(size) != length(df.split))
      stop("Number of groups is ", length(df.split),
           " but number of sizes supplied is ", length(size))
    if (is.null(names(size))) {
      n <- setNames(size, names(df.split))
      message(sQuote("size"), " vector entered as:\n\nsize = structure(c(",
              paste(n, collapse = ", "), "),\n.Names = c(",
              paste(shQuote(names(n)), collapse = ", "), ")) \n\n")
    } else {
      ifelse(all(names(size) %in% names(df.split)),
             n <- size[names(df.split)],
             stop("Named vector supplied with names ",
                  paste(names(size), collapse = ", "),
                  "\n but the names for the group levels are ",
                  paste(names(df.split), collapse = ", ")))
    }
  } else if (size < 1) {
    n <- round(df.table * size, digits = 0)
  } else if (size >= 1) {
    if (all(df.table >= size) || isTRUE(replace)) {
      n <- setNames(rep(size, length.out = length(df.split)),
                    names(df.split))
    } else {
      message(
        "Some groups\n---",
        paste(names(df.table[df.table < size]), collapse = ", "),
        "---\ncontain fewer observations",
        " than desired number of samples.\n",
        "All observations have been returned from those groups.")
      n <- c(sapply(df.table[df.table >= size], function(x) x = size),
             df.table[df.table < size])
    }
  }
  temp <- lapply(
    names(df.split),
    function(x) df.split[[x]][sample(df.table[x],
                                     n[x], replace = replace), ])
  set1 <- do.call("rbind", temp)
  
  if (isTRUE(bothSets)) {
    set2 <- df[!rownames(df) %in% rownames(set1), ]
    list(SET1 = set1, SET2 = set2)
  } else {
    set1
  }
}
```


##Load data

```{r}
# Get file paths
axonNames <- list.files(paste("EstereoDataPlanos/", sep=""), full.names=T)

# Load matlab file
axonFiles <- lapply(axonNames, function(x) readMat(x))
names(axonFiles) <- c("Type1", "Type2", "Type3", "Type4")

# Extract data
# errorMatrix contains 4 arrays, one per neuron type
# within each array, the dimensions corresponds to step:dist:neuron
# this means that each array has as many matrices as neuron of a certain type
dist         <- lapply(axonFiles, function(x) x$Distancias.Entre.Planos)[[1]]
step         <- lapply(axonFiles, function(x) x$Step.Lengths)[[1]]
errorMatrix  <- lapply(axonFiles, function(x) x$MATRIZ.ERROR.LENGTH)
lpMatrix     <- lapply(axonFiles, function(x) x$MATRIZ.ESTIMATED.AXON.LENGTH)
lrVals       <- lapply(axonFiles, function(x) x$AXON.REAL.LENGTH)

# Get number of neurons per type
numberTypes  <- unlist(lapply(errorMatrix, function(x) dim(x)[3]))

# Vectorizing the arrays goes as follows: neuron, dist, step
errorVector <- unlist(lapply(errorMatrix, function(x) as.vector(x)))
lpVector <- unlist(lapply(lpMatrix, function(x) as.vector(x)))
lrVector <- unlist(lapply(lrVals, function(x) as.vector(x)))

# Store in data frames
allData <- data.frame(id = rep(1:sum(numberTypes), each = 90),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), times = 90*numberTypes),
                      dist = rep(rep(dist, each = 9), sum(numberTypes)), 
                      step = rep(rep(step, 10), sum(numberTypes)),
                      error = abs(errorVector),
                      error2 = errorVector,
                      LP = lpVector,
                      LR = rep(lrVector, each = 90))
allData$errorRaw <- allData$LR - allData$LP
allData$distep <- allData$dist * allData$step
allData$interact <- interaction(allData$dist, allData$step)
```

##Boot explanation

```{r}
# OLS
my.reg <- lm(LR ~ LP:neurType - 1, allData)

# Predict LR for neurType axCount
axCount <- 1
k <- 64000
y.p <- coef(my.reg)[axCount]*k

# Create adjusted residuals
leverage <- influence(my.reg, do.coef = FALSE)$hat
my.s.resid <- residuals(my.reg)/sqrt(1-leverage)
my.s.resid <- my.s.resid - mean(my.s.resid)

reg <- my.reg
s <- my.s.resid

#####BOOTSTRAP (1 replicate)
# Make bootstrap residuals
ep.star <- sample(s, size=length(reg$residuals), replace=TRUE)

# Make bootstrap Y
y.star <- fitted(reg) + ep.star

# Do bootstrap regression
lp <- model.frame(reg)[,2]
nt <- model.frame(reg)[,3]
bs.reg <- lm(y.star ~ lp:nt - 1)

# Create bootstrapped adjusted residuals
bs.lev <- influence(bs.reg, do.coef = FALSE)$hat
bs.s   <- residuals(bs.reg)/sqrt(1-bs.lev)
bs.s   <- bs.s - mean(bs.s)

# Calculate draw on prediction error
x_Np1 <- k
axType <- axCount
xb.xb <- (coef(my.reg)[axType] - coef(bs.reg)[axType])*x_Np1
unname(xb.xb + sample(bs.s, size=1))
```

##Descriptive

###Correlations

####Error v Dist v Step by Neuron

```{r}
# # One plot per neuron type
car::scatter3d(error ~ dist + step, data = allData[allData$neurType == "Type1", ],
               point.col = "red", surface=FALSE, ellipsoid = FALSE)
rgl::open3d()
car::scatter3d(error ~ dist + step, data = allData[allData$neurType == "Type2", ],
               point.col = "blue", surface=FALSE, ellipsoid = FALSE)
rgl::open3d()
car::scatter3d(error ~ dist + step, data = allData[allData$neurType == "Type3", ],
               point.col = "green", surface=FALSE, ellipsoid = FALSE)
rgl::open3d()
car::scatter3d(error ~ dist + step, data = allData[allData$neurType == "Type4", ],
               point.col = "purple", surface=FALSE, ellipsoid = FALSE)

# All neurons in one plot
# Add some jitter to step and dist in order to visualize the neuron types
stepJit <- rep(c(0, 2, 4, 6), times = table(allData$neurType))
distJit <- rep(c(0, 0.5, 1, 1.5), times = table(allData$neurType))
allData$stepJit <- allData$step + stepJit
allData$distJit <- allData$dist + distJit
# Plot
rgl::open3d()
car::scatter3d(error ~ distJit + stepJit | neurType, data = allData,
               surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)

# Plot
# Duplicate data to avoid confusion
data3D <- allData
# Add a number identifyng every neuron-plane combination
data3D$plaNum <- c(rep(1, numberTypes[1]*90), rep(2, numberTypes[2]*90), rep(3, numberTypes[3]*90), rep(4, numberTypes[4]*90))
# Plot
rgl::open3d()
car::scatter3d(x = data3D$LP, y = data3D$LR, z = data3D$plaNum, group = data3D$neurType, 
               surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
# rgl::rgl.snapshot("LR_LP_corr.png", fmt = "png")
```

####LP v Dist v Step by Neuron

```{r}
# All neurons in one plot
# Add some jitter to step and dist in order to visualize the neuron types
stepJit <- rep(c(0, 2, 4, 6), times = table(allData$neurType))
distJit <- rep(c(0, 0.5, 1, 1.5), times = table(allData$neurType))
allData$stepJit <- allData$step + stepJit
allData$distJit <- allData$dist + distJit
# Plot
rgl::open3d()
car::scatter3d(LP ~ distJit + stepJit | neurType, data = allData,
               surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)

```

####LR vs LP 2d

```{r, fig.width=20, fig.height=15}
# Define color and formula
coul <- viridisLite::viridis(90)
# coul <- viridisLite::viridis(10)
formPlot <- formula("LR ~ LP")

setwd("EstereoAnalysis/")
png(filename=paste("scatterPerType.png", sep=""), type="cairo",
    units="in", width=20, height=15, pointsize=12,
    res=300)

# Plor for every axon type
par(mfcol=c(3,4))

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  # Select data
  data2d <- allData[allData$neurType == i, ]
  data2d$combNum <- rep(1:90, length(unique(data2d$id)))
  # data2d$combNum <- rep(rep(1:10, each = 9), length(unique(data2d$id)))
  data2d$col <-  coul[data2d$combNum]
  
  # Plot full data, 3.70 and 30.150; red dots are LRs
  plot(formPlot, data2d, pch = 21, bg = alpha(data2d$col, 0.5), col = NULL, 
       cex = 0.8, cex.main = 2.5, cex.lab = 1.5, cex.axis = 1.5, main = i)
  points(data2d$LR, data2d$LR, bg="red", pch = 21, col = NULL, cex = 0.5)
  
  plot(formPlot, data2d[data2d$dist == 3 & data2d$step == 70, ], 
       xlim = c(0, max(data2d$LP)), 
       pch = 21, bg = alpha(data2d[data2d$dist == 3 & data2d$step == 70, "col"], 0.5), 
       cex = 0.8, cex.main = 2.5, cex.lab = 1.5, cex.axis = 1.5, main = "3.70")
  points(data2d$LR, data2d$LR, bg="red", pch = 21, col = NULL, cex = 0.5)
  
  plot(formPlot, data2d[data2d$dist == 30 & data2d$step == 150, ], 
       xlim = c(0, max(data2d$LP)), 
       pch = 21, bg = alpha(data2d[data2d$dist == 30 & data2d$step == 150, "col"], 0.5), 
       cex = 0.8, cex.main = 2.5, cex.lab = 1.5, cex.axis = 1.5, main = "30.150")
  points(data2d$LR, data2d$LR, bg="red", pch = 21, col = NULL, cex = 0.5)
  
}

dev.off()
```

####Error vs Dist.Step 2d

```{r, fig.width=20, fig.height=10}
# Define color and formula
coul <- viridisLite::viridis(90)
# coul <- viridisLite::viridis(10)

setwd("EstereoAnalysis/")
png(filename=paste("scatterPerType_error.png", sep=""), type="cairo",
    units="in", width=20, height=10, pointsize=12,
    res=300)

# Plor for every axon type
maxErr <- max(allData$error)

par(mfcol=c(2,4))

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  # Select data
  data2d <- allData[allData$neurType == i, ]
  data2d$combNum <- rep(1:90, length(unique(data2d$id)))
  # data2d$combNum <- rep(rep(1:10, each = 9), length(unique(data2d$id)))
  data2d$col <-  coul[data2d$combNum]
  
  formPlot <- formula("error ~ interact")
  # Plot full data, 3.70 and 30.150; red dots are LRs
  plot(formPlot, data2d, pch = 21, col = alpha(data2d$col, 0.5),
       cex = 0.8, cex.main = 2.5, cex.lab = 1.5, cex.axis = 1.5, main = i)
  # points(data2d$distep, data2d$error, bg="red", pch = 21, col = NULL, cex = 0.5)
  
  formPlot2 <- formula("error ~ distep")
  # Plot full data, 3.70 and 30.150; red dots are LRs
  plot(formPlot2, data2d, pch = 21, bg = alpha(data2d$col, 0.5), col = NULL, 
       cex = 0.8, cex.main = 2.5, cex.lab = 1.5, cex.axis = 1.5, main = i)
  # points(data2d$distep, data2d$error, bg="red", pch = 21, col = NULL, cex = 0.5)

  
}

dev.off()
```

####Error vs Dist.Step 2d (ylim)

```{r, fig.width=20, fig.height=10}
# Define color and formula
coul <- viridisLite::viridis(90)
# coul <- viridisLite::viridis(10)
maxErr <- max(allData$error)

setwd("EstereoAnalysis/")
png(filename=paste("scatterPerType_errorylim.png", sep=""), type="cairo",
    units="in", width=20, height=10, pointsize=12,
    res=300)

# Plor for every axon type
par(mfcol=c(2,4))

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  # Select data
  data2d <- allData[allData$neurType == i, ]
  data2d$combNum <- rep(1:90, length(unique(data2d$id)))
  # data2d$combNum <- rep(rep(1:10, each = 9), length(unique(data2d$id)))
  data2d$col <-  coul[data2d$combNum]
  
  formPlot <- formula("error ~ interact")
  # Plot full data, 3.70 and 30.150; red dots are LRs
  plot(formPlot, data2d, pch = 21, col = alpha(data2d$col, 0.5),
       cex = 0.8, cex.main = 2.5, cex.lab = 1.5, cex.axis = 1.5, main = i, ylim = c(0, maxErr))
  # points(data2d$distep, data2d$error, bg="red", pch = 21, col = NULL, cex = 0.5)
  
  formPlot2 <- formula("error ~ distep")
  # Plot full data, 3.70 and 30.150; red dots are LRs
  plot(formPlot2, data2d, pch = 21, bg = alpha(data2d$col, 0.5), col = NULL, 
       cex = 0.8, cex.main = 2.5, cex.lab = 1.5, cex.axis = 1.5, main = i,  ylim = c(0, maxErr))
  # points(data2d$distep, data2d$error, bg="red", pch = 21, col = NULL, cex = 0.5)

  
}

dev.off()
```

##lmer

```{r}
library(lme4)
library(lmerTest)


dataLmer <- allData[allData$neurType == "Type1", ]
# dataLmer$interact <- interaction(dataLmer$dist, dataLmer$step, lex.order = T)
# dataLmer$LPsca <- scale(dataLmer$LP)
# dataLmer$LRsca <- scale(dataLmer$LR)
# dataLmer$distsca <- scale(dataLmer$dist)
# dataLmer$stepsca <- scale(dataLmer$step)
# dataLmer$id2 <- factor(dataLmer$id)

datagroup <- dataLmer %>% group_by(id)

fit2 <- lmer(LP ~ LR + (LR|id), data=datagroup)
rand(fit2)
```

```{r}
library(scales)

# Function to add a polygon if we have an X vector and two Y vectors of the same length.
addpoly <- function(x,y1,y2,col=alpha("lightgrey",0.8),...){
  ii <- order(x)
  y1 <- y1[ii]
  y2 <- y2[ii]
  x <- x[ii]
  polygon(c(x,rev(x)), c(y1, rev(y2)), col=col, border=NA,...)
}

bb <- bootMer(fit2,
              FUN=function(x) predict(x, interval = "prediction"),
              nsim=250)
```

```{r}
# Take quantiles of predictions from bootstrap replicates.
# These represent the confidence interval of the mean at any value of Time.
dataLmer$lci <- apply(bb$t, 2, quantile, 0.025)
dataLmer$uci <- apply(bb$t, 2, quantile, 0.975)
dataLmer$pred <- predict(fit2, interval = "prediction")

# We will just plot one Diet for illustration
dat <- subset(dataLmer, c(dist == 3 & step == 70))

with(dat, {
  plot(LR,LP)
  addpoly(LR, lci, uci)
  lines(LR, pred)
})

# We will just plot one Diet for illustration
dat <- subset(dataLmer, c(dist == 30 & step == 150))

with(dat, {
  plot(LR,LP)
  addpoly(LR, lci, uci)
  lines(LR, pred)
})
```

##lm

```{r, fig.width=20, fig.height=15}
gList <- list()
gCount <- 1

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  dataMod <- allData[allData$neurType == i, ]
  lmMod <- lm(LR ~ LP - 1, dataMod)
  
  pi <- predict(lmMod, interval = "prediction")
  dataJoin <- cbind(dataMod, pi)
  
  gList[[gCount]] <- ggplot(dataJoin, aes(LP, LR))+
    geom_point() +
    geom_line(aes(y=lwr), color = "red", linetype = "dashed")+
    geom_line(aes(y=upr), color = "red", linetype = "dashed")+
    geom_smooth(method=lm, se=TRUE)+
    ggtitle(i)+
    theme_bw()+
    theme(plot.title = element_text(size = 20, face = "bold"),
          axis.title = element_text(size = 15, face = "bold"))
  
  gCount <- gCount + 1
  
  gList[[gCount]] <- ggplot(dataJoin[dataJoin$dist == 3 & dataJoin$step == 70, ], aes(LP, LR))+
    geom_point() +
    geom_line(aes(y=lwr), color = "red", linetype = "dashed")+
    geom_line(aes(y=upr), color = "red", linetype = "dashed")+
    geom_smooth(method=lm, se=TRUE)+
    ggtitle("3.70") + 
    theme_bw()+
    theme(plot.title = element_text(size = 20, face = "bold"),
          axis.title = element_text(size = 15, face = "bold"))
  
  gCount <- gCount + 1
  
  gList[[gCount]] <- ggplot(dataJoin[dataJoin$dist == 30 & dataJoin$step == 150, ], aes(LP, LR))+
    geom_point() +
    geom_line(aes(y=lwr), color = "red", linetype = "dashed")+
    geom_line(aes(y=upr), color = "red", linetype = "dashed")+
    geom_smooth(method=lm, se=TRUE)+
    ggtitle("30.150")+
    theme_bw()+
    theme(plot.title = element_text(size = 20, face = "bold"),
          axis.title = element_text(size = 15, face = "bold"))
  
  gCount <- gCount + 1
  
  # gList[[i]] <- list(g0, g1, g2)
  
}

gList <- list(gList[[1]], gList[[4]], gList[[7]], gList[[10]],
              gList[[2]], gList[[5]], gList[[8]], gList[[11]],
              gList[[3]], gList[[6]], gList[[9]], gList[[12]])

setwd("EstereoAnalysis/")
png(filename=paste("confpredInt_lm.png", sep=""), type="cairo",
    units="in", width=20, height=15, pointsize=12,
    res=300)

ggarrange(plotlist = gList, ncol = 4, nrow = 3)

dev.off()
```

###lm train/test

```{r, fig.width=20, fig.height=15}
gList <- list()
gCount <- 1

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  
  dataType <- allData[allData$neurType == i, ]
  
  # Stratified random training/test sets
  stratDat <- stratified(dataType, c("dist", "step"), 0.7, bothSets = TRUE, replace = FALSE)
  trainSet <- stratDat$SET1
  testSet <- stratDat$SET2
  
  lmMod <- lm(LR ~ LP - 1, trainSet)
  
  pi <- predict(lmMod, testSet, interval = "prediction")
  dataJoin <- cbind(testSet, pi)
  
  gList[[gCount]] <- ggplot(dataJoin, aes(LP, LR))+
    geom_point() +
    geom_line(aes(y=lwr), color = "red", linetype = "dashed")+
    geom_line(aes(y=upr), color = "red", linetype = "dashed")+
    geom_smooth(method=lm, se=TRUE)+
    ggtitle(i)+
    theme_bw()+
    theme(plot.title = element_text(size = 20, face = "bold"),
          axis.title = element_text(size = 15, face = "bold"))
  
  gCount <- gCount + 1
  
  gList[[gCount]] <- ggplot(dataJoin[dataJoin$dist == 3 & dataJoin$step == 70, ], aes(LP, LR))+
    geom_point() +
    geom_line(aes(y=lwr), color = "red", linetype = "dashed")+
    geom_line(aes(y=upr), color = "red", linetype = "dashed")+
    geom_smooth(method=lm, se=TRUE)+
    ggtitle("3.70") + 
    theme_bw()+
    theme(plot.title = element_text(size = 20, face = "bold"),
          axis.title = element_text(size = 15, face = "bold"))
  
  gCount <- gCount + 1
  
  gList[[gCount]] <- ggplot(dataJoin[dataJoin$dist == 30 & dataJoin$step == 150, ], aes(LP, LR))+
    geom_point() +
    geom_line(aes(y=lwr), color = "red", linetype = "dashed")+
    geom_line(aes(y=upr), color = "red", linetype = "dashed")+
    geom_smooth(method=lm, se=TRUE)+
    ggtitle("30.150")+
    theme_bw()+
    theme(plot.title = element_text(size = 20, face = "bold"),
          axis.title = element_text(size = 15, face = "bold"))
  
  gCount <- gCount + 1
  
  # gList[[i]] <- list(g0, g1, g2)
  
}

gList <- list(gList[[1]], gList[[4]], gList[[7]], gList[[10]],
              gList[[2]], gList[[5]], gList[[8]], gList[[11]],
              gList[[3]], gList[[6]], gList[[9]], gList[[12]])

setwd("EstereoAnalysis/")
png(filename=paste("confpredInt_lm_traintest.png", sep=""), type="cairo",
    units="in", width=20, height=15, pointsize=12,
    res=300)

ggarrange(plotlist = gList, ncol = 4, nrow = 3)

dev.off()
```


##Bootstrap PI

This code is meant to be run independently for every neurType and in parallel

```{r}
# OLS
my.reg <- lm(LR ~ LP:neurType - 1, allData)
# summary(my.reg)

# List to store distances
distList <- list()
# List to store random selection
distRand <- list()
distCount <- 1
# Subset a particular neurType
dataToBoot <- allData[allData$neurType == "Type1", ]
axCount <- 1

# Ensure reproducibility
set.seed(123456)

for (i in unique(allData$dist)) {
  
  # List to store steps
  stepList <- list()
  # List to store random selection
  stepRand <- list()
  stepCount <- 1
  
  for (j in unique(allData$step)) {
    
    # Subset dist and step
    dataGroup <- dataToBoot[dataToBoot$dist == i & dataToBoot$step == j, ]
    
    # Random sample of N=10
    dataSub <- sample_n(dataGroup, 10, replace = FALSE)
    
    # Store selected for later
    stepRand[[stepCount]] <- dataSub
    
    # List to store errors
    epList <- list()
    epCount <- 1
    
    for (k in dataSub[ , "LP"]) {
      
      # Predict LR for neurType axCount
      y.p <- coef(my.reg)[axCount]*k
      
      # Create adjusted residuals
      leverage <- influence(my.reg, do.coef = FALSE)$hat
      my.s.resid <- residuals(my.reg)/sqrt(1-leverage)
      my.s.resid <- my.s.resid - mean(my.s.resid)
      
      # Do bootstrap with 100 replications
      print("Enter replication")
      ep.draws <- replicate(n=100, the.replication.gen(reg = my.reg, s = my.s.resid, axCount, x_Np1 = k))
      
      # Store in list
      epList[[epCount]] <- ep.draws
      epCount <- epCount + 1
      
      print(epCount)
      
    }
    
    # Store in list
    stepList[[stepCount]] <- epList
    stepCount <- stepCount + 1
    
  }
  
  names(stepList) <- unique(allData$step)[1]
  names(stepRand) <- unique(allData$step)[1]
  
  # Store in list
  distList[[distCount]] <- stepList
  distRand[[distCount]] <- stepRand
  distCount <- distCount + 1
  
}

names(distList) <- unique(allData$dist)[1]
names(distRand) <- unique(allData$dist)[1]

# saveRDS(list(distList, distRand), paste("stereo_bootPI_full_Type", axCount, ".rds", sep = ""))
```


####Estimate porcentual errors

```{r, fig.width=8, fig.height=8}
# 10 random neurons per combination of dist:step
numExtract <- 20

# List to store axon types
axList <- list()

for (m in c("Type1", "Type2", "Type3", "Type4")) {
  
  # Load object
  bootPI <- readRDS(paste("EstereoAnalysis/stereo_bootPI_full_", m, "_boot20.rds", sep = ""))
  # First list is prediction error, second is random subset
  typeList <- bootPI[[1]]
  typeRand <- bootPI[[2]]
  
  # Estimate and store in list
  peList <- list()
  peCount <- 1
  
  for (i in factor(unique(allData$dist))) {
    
    for (j in factor(unique(allData$step))) {
      
      # Extract LR values
      lr <- typeRand[[i]][[j]]$LR
      
      for (k in 1:length(lr)) {
        
        # Divide absolute errors by LR to get porcentual errors
        pe <- typeList[[i]][[j]][[k]] / lr[k]
        
        # Store in list
        peList[[peCount]] <- pe*100
        peCount <-peCount + 1
      }
      
    }
    
  }
  
  # Reformat as data frame
  peFrame <- data.frame(pe = unlist(peList),
                        neurType = rep(m, 90*numExtract*100),
                        dist = rep(unique(allData$dist), each = 9*numExtract*100),
                        step = rep(rep(unique(allData$step), each = numExtract*100), 10))
  peFrame$interact <- interaction(peFrame$dist, peFrame$step, lex.order=T)
  
  # Store in list
  axList[[m]] <- list(peFrame = peFrame, typeList = typeList, typeRand = typeRand)
  
}

```

#####Plot density

```{r, fig.height=10, fig.width=20}
# Plot
# peGroup <- peFrame %>% group_by(interact)
# peSample <- sample_n(peGroup, 100)

plotList <- list()

for (m in c("Type1", "Type2", "Type3", "Type4")) {
  
  plotList[[m]] <- ggplot(axList[[m]]$peFrame, aes(x = `pe`, y = `interact`, fill = ifelse(..x.. >=-5 & ..x.. <=5, ">= -5", "< 5"))) +
    scale_fill_manual(values = c("gray70", "coral2"), name = NULL) +
    geom_density_ridges_gradient(alpha = 0.8) +
    theme_ridges() + 
    theme(legend.position = "none") + 
    xlab("% Error") + 
    xlim(c(-40,40)) +
    ggtitle(m)
  
}

setwd("EstereoAnalysis/")
png(filename=paste("prederrorDistributions_ols_replacementBOOT20.png", sep=""), type="cairo",
    units="in", width=20, height=10, pointsize=12,
    res=300)

ggarrange(plotlist = plotList, ncol = 4, nrow = 1)

dev.off()
```


#####Errors vs cumul probability

```{r, fig.height=5, fig.width=20}
# Inverse quantile
quantInv <- function(distr, value) ecdf(distr)(value)

# Function to apply to all axon types
quantType <- function(peFrame, probSeq) {
  
  probList <- list()
  
  for (i in unique(peFrame$interact)) {
    
    dataProb <- peFrame[peFrame$interact == i, "pe"]
    probVec <- numeric()
    
    for (j in probSeq) {
      
      errProb <- quantInv(dataProb, j) - quantInv(dataProb, -j)
      probVec <- c(probVec, errProb)
      
    }
    
    probList[[i]] <- probVec
    
  }
  
  return(probList)
  
}

# Define errors for which to calculate probability
binProb <- 0.5
probSeq <- seq(binProb, 100, binProb)
# Store axon types in lists
axProb <- lapply(axList, function(x) quantType(x$peFrame, probSeq))
# Save 
saveRDS(axProb, "errorProbs_bin0.5_boot20.rds")
```

######Plot heatmap

```{r, fig.height=5, fig.width=15}
library(latticeExtra)

# Reformat as dataframe
binProb <- 0.5
probSeq <- seq(binProb, 100, binProb)
axProb <- readRDS("errorProbs_bin0.5_boot20_notRel.rds")
probFrame <- data.frame(error = rep(probSeq, 90*4), 
                        neurType = rep(c("Type1", "Type2", "Type3", "Type4"), each = length(probSeq)*90),
                        dist = rep(rep(unique(allData$dist), each = length(probSeq)*9), 4),
                        step = rep(rep(unique(allData$step), each = length(probSeq)), 10*4),
                        prob = unlist(axProb))

# Plot conttour plot for 5% error
library(viridisLite)
coul <- viridis(1000)

# Store heatmaps
heatList <- list()
smoothList <- list()

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  levelList1 <- list()
  levelList2 <- list()
  
  for (j in c(5, 10, 20)) {
    
    dataPlot <- probFrame[probFrame$neurType == i & probFrame$error == j, ]
    
    levelList1[[as.character(j)]] <- levelplot(prob ~ dist * step, data = dataPlot, 
                                               col.regions = coul,
                                               scales = list(y = list(at = unique(allData$step), labels = unique(allData$step)),
                                                             x = list(at = unique(allData$dist), labels = unique(allData$dist))),
                                               ylim = c(155,65),
                                               colorkey = list(at = (seq(min(dataPlot$prob),
                                                                         max(dataPlot$prob),
                                                                         0.001))),
                                               main = paste("P(%Error <=", j, ")", sep = "")) 
    
    levelList2[[as.character(j)]] <- levelplot(prob ~ dist * step, data = dataPlot, 
                                               col.regions = coul,
                                               scales = list(y = list(at = unique(allData$step), labels = unique(allData$step)),
                                                             x = list(at = unique(allData$dist), labels = unique(allData$dist))),
                                               ylim = c(150,70),
                                               xlim = c(3,30),
                                               cuts = 20,
                                               colorkey = list(at = (seq(min(dataPlot$prob),
                                                                         max(dataPlot$prob),
                                                                         0.001))),
                                               main = paste("P(%Error <=", j, ")", sep = ""),
                                               panel = panel.levelplot.points, cex = 0) + 
      layer_(panel.2dsmoother(..., n = 200))
  }
  
  heatList[[i]] <- levelList1
  smoothList[[i]] <- levelList2
}

# Plot
setwd("EstereoAnalysis/")
for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  png(filename=paste("errorProbabilities_heatmap_", i, "_BOOT20.png", sep=""), type="cairo",
      units="in", width=15, height=10, pointsize=12,
      res=300)
  
  gridExtra::grid.arrange(grobs = c(heatList[[i]], smoothList[[i]]), ncol = 3, nrow = 2)
  
  dev.off()
  
}


```

#####Mean errors

```{r, fig.height=5, fig.width=20}
# Function to apply to all axon types
meanType <- function(peFrame) {
  
  probList <- list()
  
  for (i in unique(peFrame$interact)) {
    
    dataProb <- peFrame[peFrame$interact == i, "pe"]
    probList[[i]] <- mean(abs(dataProb))
    
  }
  
  return(probList)
  
}

# Store axon types in lists
axMean <- lapply(axList, function(x) meanType(x$peFrame))
```

######Plot heatmap

```{r, fig.height=5, fig.width=15}
library(latticeExtra)

# Reformat as dataframe
meanFrame <- data.frame(neurType = rep(c("Type1", "Type2", "Type3", "Type4"), each = 90),
                        dist = rep(rep(unique(allData$dist), each = 9), 4),
                        step = rep(unique(allData$step), 10*4),
                        meanErr = unlist(axMean))

# Contour color palette
colfunc <- colorRampPalette(c("navy","royalblue","springgreen","gold","yellow"))
colfin <- colfunc(1000)
library(viridisLite)
coul <- viridis(1000)



# Store heatmaps
heatList <- list()
smoothList <- list()

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  heatList[[i]] <- levelplot(meanErr ~ dist * step, data = meanFrame[meanFrame$neurType == i, ], col.regions = coul,
                             scales = list(y = list(at = unique(allData$step), labels = unique(allData$step)),
                                           x = list(at = unique(allData$dist), labels = unique(allData$dist))),
                             ylim = c(155, 65),
                             colorkey = list(at = (seq(min(meanFrame[meanFrame$neurType == i, "meanErr"]),
                                                       max(meanFrame[meanFrame$neurType == i, "meanErr"]),
                                                       0.05))),
                             main = paste("Mean Abs Error,", i)) 
  
  smoothList[[i]] <- levelplot(meanErr ~ dist * step, data = meanFrame[meanFrame$neurType == i, ], col.regions = coul,
                               scales = list(y = list(at = unique(allData$step), labels = unique(allData$step)),
                                             x = list(at = unique(allData$dist), labels = unique(allData$dist))),
                               ylim = c(150,70),
                               xlim = c(3,30),
                               cuts = 20, 
                               colorkey = list(at = (seq(min(meanFrame[meanFrame$neurType == i, "meanErr"]),
                                                         max(meanFrame[meanFrame$neurType == i, "meanErr"]),
                                                         0.05))),
                               main = paste("Mean Abs Error,", i),
                               panel = panel.levelplot.points, cex = 0) + 
    layer_(panel.2dsmoother(..., n = 200))
  
}


# Plot
setwd("EstereoAnalysis/")

png(filename="meanProbabilities_heatmap_reverseBOOT20.png", type="cairo",
    units="in", width=20, height=10, pointsize=12,
    res=300)

  gridExtra::grid.arrange(grobs = c(heatList, smoothList), ncol = 4, nrow = 2)
 
dev.off()
  
```

####Interval limits

```{r, fig.width=8, fig.height=8}
# 10 random neurons per combination of dist:step
numExtract <- 20

# List to store axon types
axList <- list()

for (m in c("Type1", "Type2", "Type3", "Type4")) {
  
  # Load object
  bootPI <- readRDS(paste("EstereoAnalysis/stereo_bootPI_full_", m, "_boot20.rds", sep = ""))
  # First list is prediction error, second is random subset
  typeList <- bootPI[[1]]
  typeRand <- bootPI[[2]]
  
  # Estimate and store in list
  peList <- list()
  peCount <- 1
  
  for (i in factor(unique(allData$dist))) {
    
    for (j in factor(unique(allData$step))) {
      
      # Extract LR values
      lr <- typeRand[[i]][[j]]$LR
      
      for (k in 1:length(lr)) {
        
        # Divide absolute errors by LR to get porcentual errors
        pe <- typeList[[i]][[j]][[k]] / lr[k]
        
        # Store in list
        peList[[peCount]] <- pe*100
        peCount <-peCount + 1
      }
      
    }
    
  }
  
  # Reformat as data frame
  peFrame <- data.frame(pe = unlist(peList),
                        neurType = rep(m, 90*numExtract*100),
                        dist = rep(unique(allData$dist), each = 9*numExtract*100),
                        step = rep(rep(unique(allData$step), each = numExtract*100), 10))
  peFrame$interact <- interaction(peFrame$dist, peFrame$step, lex.order=T)
  
  # Store in list
  axList[[m]] <- list(peFrame = peFrame, typeList = typeList, typeRand = typeRand)
  
}

```

##Bootstrap PI (only one type)

This code is meant to be run independently for every neurType and in parallel

```{r}
# OLS
my.reg <- lm(LR ~ LP - 1, data = allData[allData$neurType == "Type1", ])
# summary(my.reg)

# List to store distances
distList <- list()
# List to store random selection
distRand <- list()
distCount <- 1
# Subset a particular neurType
dataToBoot <- allData[allData$neurType == "Type1", ]
axCount <- 1

# Ensure reproducibility
set.seed(123456)

for (i in unique(allData$dist)) {
  
  # List to store steps
  stepList <- list()
  # List to store random selection
  stepRand <- list()
  stepCount <- 1
  
  for (j in unique(allData$step)) {
    
    # Subset dist and step
    dataGroup <- dataToBoot[dataToBoot$dist == i & dataToBoot$step == j, ]
    
    # Random sample of N=10
    # dataSub <- sample_n(dataGroup, 10, replace = FALSE)
    dataSub <- dataGroup
    
    # Store selected for later
    stepRand[[stepCount]] <- dataSub
    
    # List to store errors
    epList <- list()
    epCount <- 1
    
    for (k in dataSub[ , "LP"]) {
      
      # Predict LR for neurType axCount
      y.p <- coef(my.reg)[1]*k
      
      # Create adjusted residuals
      leverage <- influence(my.reg, do.coef = FALSE)$hat
      my.s.resid <- residuals(my.reg)/sqrt(1-leverage)
      my.s.resid <- my.s.resid - mean(my.s.resid)
      
      # Do bootstrap with 100 replications
      # print("Enter replication")
      ep.draws <- replicate(n=100, the.replication2(reg = my.reg, s = my.s.resid, x_Np1 = k))
      
      # Store in list
      epList[[epCount]] <- ep.draws
      # print(epCount)
      epCount <- epCount + 1
      
      
    }
    
    # Store in list
    stepList[[stepCount]] <- epList
    print(paste("Step", stepCount, "complete"))
    stepCount <- stepCount + 1
    
  }
  
  names(stepList) <- unique(allData$step)
  names(stepRand) <- unique(allData$step)
  
  # Store in list
  distList[[distCount]] <- stepList
  distRand[[distCount]] <- stepRand
  print(paste("Dist", distCount, "complete"))
  distCount <- distCount + 1
  
}

names(distList) <- unique(allData$dist)
names(distRand) <- unique(allData$dist)

saveRDS(list(distList, distRand), paste("stereo_bootPI_full_ONLYType", axCount, ".rds", sep = ""))
```

####Estimate porcentual errors

```{r, fig.width=8, fig.height=8}
# 10 random neurons per combination of dist:step
numExtract <- 90

# List to store axon types
axList <- list()

for (m in c("Type1")) {
  
  # Load object
  bootPI <- readRDS(paste("EstereoAnalysis/stereo_bootPI_full_ONLY", m, ".rds", sep = ""))
  # First list is prediction error, second is random subset
  typeList <- bootPI[[1]]
  typeRand <- bootPI[[2]]
  
  # Estimate and store in list
  peList <- list()
  peCount <- 1
  
  for (i in factor(unique(allData$dist))) {
    
    for (j in factor(unique(allData$step))) {
      
      # Extract LR values
      lr <- typeRand[[i]][[j]]$LR
      
      for (k in 1:length(lr)) {
        
        # Divide absolute errors by LR to get porcentual errors
        pe <- (lr[k] - typeList[[i]][[j]][[k]]) / lr[k]
        
        # Store in list
        peList[[peCount]] <- pe*100
        peCount <-peCount + 1
      }
      
    }
    
  }
  
  # Reformat as data frame
  peFrame <- data.frame(pe = unlist(peList),
                        neurType = rep(m, 90*numExtract*100),
                        dist = rep(unique(allData$dist), each = 9*numExtract*100),
                        step = rep(rep(unique(allData$step), each = numExtract*100), 10))
  peFrame$interact <- interaction(peFrame$dist, peFrame$step, lex.order=T)
  
  # Store in list
  axList[[m]] <- list(peFrame = peFrame, typeList = typeList, typeRand = typeRand)
  
}

```

#####Mean errors

```{r, fig.height=5, fig.width=20}
# Function to apply to all axon types
meanType <- function(peFrame) {
  
  probList <- list()
  
  for (i in unique(peFrame$interact)) {
    
    dataProb <- peFrame[peFrame$interact == i, "pe"]
    probList[[i]] <- mean(abs(dataProb))
    
  }
  
  return(probList)
  
}

# Store axon types in lists
axMean <- lapply(axList, function(x) meanType(x$peFrame))
```

######Plot heatmap

```{r, fig.height=5, fig.width=15}
library(latticeExtra)

# Reformat as dataframe
meanFrame <- data.frame(neurType = rep("Type1", 90),
                        dist = rep(unique(allData$dist), each = 9),
                        step = rep(unique(allData$step), 10),
                        meanErr = unlist(axMean))

# Contour color palette
colfunc <- colorRampPalette(c("navy","royalblue","springgreen","gold","yellow"))
colfin <- colfunc(1000)
library(viridisLite)
coul <- viridis(1000)



# Store heatmaps
heatList <- list()
smoothList <- list()

for (i in c("Type1")) {
  
  heatList[[i]] <- levelplot(meanErr ~ dist * step, data = meanFrame[meanFrame$neurType == i, ], col.regions = coul,
                             scales = list(y = list(at = unique(allData$step), labels = unique(allData$step)),
                                           x = list(at = unique(allData$dist), labels = unique(allData$dist))),
                             ylim = c(155, 65),
                             colorkey = list(at = (seq(min(meanFrame[meanFrame$neurType == i, "meanErr"]),
                                                       max(meanFrame[meanFrame$neurType == i, "meanErr"]),
                                                       0.05))),
                             main = paste("Mean Abs Error,", i)) 
  
  smoothList[[i]] <- levelplot(meanErr ~ dist * step, data = meanFrame[meanFrame$neurType == i, ], col.regions = coul,
                               scales = list(y = list(at = unique(allData$step), labels = unique(allData$step)),
                                             x = list(at = unique(allData$dist), labels = unique(allData$dist))),
                               ylim = c(150,70),
                               xlim = c(3,30),
                               cuts = 20, 
                               colorkey = list(at = (seq(min(meanFrame[meanFrame$neurType == i, "meanErr"]),
                                                         max(meanFrame[meanFrame$neurType == i, "meanErr"]),
                                                         0.05))),
                               main = paste("Mean Abs Error,", i),
                               panel = panel.levelplot.points, cex = 0) + 
    layer_(panel.2dsmoother(..., n = 200))
  
}


# Plot
setwd("EstereoAnalysis/")

png(filename="meanProbabilities_heatmap_reverseBOOT20.png", type="cairo",
    units="in", width=20, height=10, pointsize=12,
    res=300)

  gridExtra::grid.arrange(grobs = c(heatList, smoothList), ncol = 4, nrow = 2)
 
dev.off()
  
```

##GEE

```{r}
# regFul <- geeglm(LR ~ LP:dist:step:neurType - 1, allData, id = id, corstr = "independence", family = "gaussian")
regFul <- lm(LR ~ LP:dist:step:neurType - 1, allData)
summary(regFul)

t1 <- allData[allData$neurType == "Type1", ]
t1$prod <- t1$LP*t1$dist*t1$step
t1$alpha <- t1$LR/t1$prod
t1$LRp <- t1$prod*coef(regFul)[1]
```

```{r, fig.height=5, fig.width=15}
t1 <- allData[allData$neurType == "Type1", ]

par(mfrow=c(1,3))
plot(t1$dist*t1$step, t1$error2)
plot(t1$dist, t1$error2)
plot(t1$step, t1$error2)

t1mean <- aggregate(error2 ~ step + dist, t1, mean)

par(mfrow=c(1,3))
plot(t1mean$dist*t1mean$step, t1mean$error2)
plot(t1mean$dist, t1mean$error2)
plot(t1mean$step, t1mean$error2)

t1mean$prod <- t1mean$step*t1mean$dist
summary(lm(error2 ~ step*dist, t1mean))
```

```{r, fig.height=5, fig.width=15}
t1 <- allData[allData$neurType == "Type1", ]

par(mfrow=c(1,3))
plot(t1$dist*t1$step, t1$error, pch = 20, cex = 0.1, ylim = c(0,5))
plot(t1$dist, t1$error)
plot(t1$step, t1$error)

t1mean <- aggregate(error ~ step + dist, t1, mean)

par(mfrow=c(1,3))
plot(t1mean$dist*t1mean$step, t1mean$error, pch = 20, cex = 0.8)
plot(t1mean$dist, t1mean$error)
plot(t1mean$step, t1mean$error)

# t1mean$prod <- t1mean$step*t1mean$dist
# summary(lm(error ~ step*dist, t1mean))
# summary(lm(error ~ step*dist, t1))
# summary(geeglm(error ~ step*dist, t1, id = id, corstr = "exchangeable", family = "gaussian"))
```


```{r}
my.reg <- lm(LR ~ LP - 1, allData)

cv.lm(fit, k = 5, seed = 5483, max_cores = 1)
```