sergio.dh
/
Rubio_etal_2021


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202
							---
title: "Axonal Length"
author: "Sergio D?ez Hermano"
date: '`r format(Sys.Date(),"%e de %B, %Y")`'
output:
  html_document:
      highlight: tango
      toc: yes
      toc_depth: 4
      toc_float:
        collapsed: no
---

```{r setup, include=FALSE}
require(knitr)
# include this code chunk as-is to set options
opts_chunk$set(comment = NA, prompt = FALSE, fig.height=5, fig.width=5, dpi=300, fig.align = "center", echo = TRUE, message = FALSE, warning = FALSE, cache=FALSE)
Sys.setlocale("LC_TIME", "C")
```


##Load libraries and functions

```{r}
library(R.matlab)
library(lattice)
library(dplyr)
library(ggplot2)
library(ggridges)
library(hrbrthemes)
library(viridis)
library(ggpubr)
library(mltools)
library(data.table)
library(caret)
library(interactions)
library(performance)
library(MASS)
library(geepack)
library(pstools)
library(MXM)
library(rmcorr)
library(multcomp)
library(Hmisc)


options(digits = 10)

# https://rstudio-pubs-static.s3.amazonaws.com/297778_5fce298898d64c81a4127cf811a9d486.html

#########################################
# Deviation from empirical variance GEE #
#########################################
var.crit <- function(mymodel) sum(abs(mymodel[["geese"]]$vbeta.naiv - mymodel[["geese"]]$vbeta))

############################
# Distances between planes #
############################
distPlan <- function(mydata) {
  
  # Vector to store distances
  distPlanes <- vector()
  
  # Estimate distances between planes for each subject
  for (i in unique(mydata$id)) {
    LPvals <- mydata[mydata$id == i, 3]
    distPlanes <- c(distPlanes, 
                    (LPvals[1] - LPvals[2])/max(LPvals[1], LPvals[2]),
                    (LPvals[1] - LPvals[3])/max(LPvals[1], LPvals[3]),
                    (LPvals[2] - LPvals[3])/max(LPvals[2], LPvals[3]))
  }
  
   
  # Concatenate factor with substraction information
  numSubjects <- length(unique(mydata$id))
  subsFactor <- rep(c("XY - XZ", "XY - YZ", "YZ - XZ"), rep = numSubjects)
  distFrame <- data.frame(id = mydata$id, 
                          type = mydata$neurType,
                          distPlanes = distPlanes,
                          substraction = subsFactor)
  
  return(distFrame)
  
}


##########################
# Bootstrap coefficients #
##########################

bootCoef <- function(mydata, axontype, projection, corrMat, nreps) {
  # Select data
  modData <- mydata[mydata$neurType == axontype & mydata$Plane == projection, ]
  
  # Fit the model
  modGee <- geeglm(LR ~ LP, data = modData, id = id, corstr = corrMat)
  
  # Ensure reproducibility
  set.seed(12345) 
  
  # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
  coefmat <- matrix(NA, nreps, 2)
  
  # We need the fitted values and the residuals from the model
  resids <- residuals(modGee)
  preds <- fitted(modGee)
  
  # Repeatedly generate bootstrapped responses
  for(i in 1:nreps) {
    booty <- preds + sample(resids, rep=TRUE)
    modGee2 <- update(modGee, booty ~ .)
    coefmat[i,] <- coef(modGee2)
  }
  
  # Create a dataframe to store predictors values
  colnames(coefmat) <- names(coef(modGee2))
  coefmat <- data.frame(coefmat)
  
  return(coefmat)
}

##############################
# Bootstrap coefficients gEE #
##############################

# For models with interactions

bootCoefint <- function(mydata, modForm, corrMat, nreps) {
  
  # Fit model
    modGee <- geeglm(formula(modForm), data = mydata, id = id, corstr = corrMat)

  # Ensure reproducibility
  # set.seed(12345) 
  
  # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
  coefmat <- matrix(NA, nreps, length(coef(modGee)))
  
  # We need the fitted values and the residuals from the model
  resids <- residuals(modGee)
  preds <- fitted(modGee)
  
  # Repeatedly generate bootstrapped responses
  for(i in 1:nreps) {
    booty <- preds + sample(resids, rep=TRUE)
    modGee2 <- update(modGee, booty ~ .)
    coefmat[i,] <- coef(modGee2)
    print(i)
  }
  
  # Create a dataframe to store predictors values
  colnames(coefmat) <- names(coef(modGee2))
  coefmat <- data.frame(coefmat)
  
  return(coefmat)
}

#############################
# Bootstrap coefficients LM #
#############################

# For models with interactions

bootCoefintLM <- function(mydata, modForm, nreps) {
  
  # Fit model
  modLM <- lm(formula(modForm), data = mydata)

  # Ensure reproducibility
  # set.seed(12345) 
  
  # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
  coefmat <- matrix(NA, nreps, length(coef(modLM)))
  
  # We need the fitted values and the residuals from the model
  resids <- residuals(modLM)
  preds <- fitted(modLM)
  
  # Repeatedly generate bootstrapped responses
  for(i in 1:nreps) {
    booty <- preds + sample(resids, rep=TRUE)
    modLM2 <- update(modLM, booty ~ .)
    coefmat[i,] <- coef(modLM2)
    print(i)
  }
  
  # Create a dataframe to store predictors values
  colnames(coefmat) <- names(coef(modLM2))
  coefmat <- data.frame(coefmat)
  
  return(coefmat)
}


################
# Bootstrap PI #
################

the.replication <- function(reg, s, x_Np1 = 0){
  # Make bootstrap residuals
  ep.star <- sample(s, size=length(reg$residuals),replace=TRUE)

  # Make bootstrap Y
  y.star <- fitted(reg) + ep.star

  # Do bootstrap regression
  lp <- model.frame(reg)[,2]
  nt <- model.frame(reg)[,3]
  bs.reg <- lm(y.star ~ lp:nt - 1)

  # Create bootstrapped adjusted residuals
  bs.lev <- influence(bs.reg)$hat
  bs.s   <- residuals(bs.reg)/sqrt(1-bs.lev)
  bs.s   <- bs.s - mean(bs.s)

  # Calculate draw on prediction error
  # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"] 
  xb.xb <- (coef(my.reg)[1] - coef(bs.reg)[1])*x_Np1
  return(unname(xb.xb + sample(bs.s, size=1)))
  
}


############################
# Bootstrap PI generalized #
############################

the.replication.gen <- function(reg, s, axType, x_Np1 = 0){
  # Make bootstrap residuals
  ep.star <- sample(s, size=length(reg$residuals), replace=TRUE)

  # Make bootstrap Y
  y.star <- fitted(reg) + ep.star

  # Do bootstrap regression
  lp <- model.frame(reg)[,2]
  nt <- model.frame(reg)[,3]
  bs.reg <- lm(y.star ~ lp:nt - 1)

  # Create bootstrapped adjusted residuals
  bs.lev <- influence(bs.reg)$hat
  bs.s   <- residuals(bs.reg)/sqrt(1-bs.lev)
  bs.s   <- bs.s - mean(bs.s)

  # Calculate draw on prediction error
  # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"] 
  xb.xb <- (coef(my.reg)[axType] - coef(bs.reg)[axType])*x_Np1
  return(unname(xb.xb + sample(bs.s, size=1)))
  
}

```


##Load data

```{r}
# Get file paths
axonNames <- list.files(paste("ProyeccionData/", sep=""), full.names=T)

# Load matlab file
axonFiles <- lapply(axonNames, function(x) readMat(x))
names(axonFiles) <- c("Type1", "Type2", "Type3", "Type4")

# VAR NAMES
# REAL_LENGTH, PROYECTION_LENGTH_XY, PROYECTION_LENGTH_XZ, PROYECTION_LENGTH_YZ

# Extract data
realLength   <- lapply(axonFiles, function(x) x$REAL.LENGTH)
planeXY      <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XY)
planeXZ      <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XZ)
planeYZ      <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.YZ)

# Get number of neurons per type
numberTypes  <- unlist(lapply(realLength, function(x) length(x)))

# Store in data frames by plane
xyData <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), LP = unlist(planeXY), Plane = rep("XY", sum(numberTypes)),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))

xzData <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), LP = unlist(planeXZ), Plane = rep("XZ", sum(numberTypes)),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))

yzData <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), LP = unlist(planeYZ), Plane = rep("YZ", sum(numberTypes)),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))

# Merge into a single data frame and sort by id
allData <- do.call("rbind", list(xyData, xzData, yzData))
allData <- allData[order(allData$id),]
# Add different number for every neuron-plane combination
allData$NeurPlane <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]), 
                       rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
allData$interact <-interaction(allData$neurType, allData$Plane, lex.order=T)
# Create data frame w/o Type4
dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)

# Data in short format
dataShort <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), XY = unlist(planeXY), XZ = unlist(planeXZ), YZ = unlist(planeYZ),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
```


##Error distributions for median(beta)

```{r}
# Estimate error per axon type and plane using median 
bootNeur <- readRDS("bootCoefs_perNeuron_OLS.rds")
colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")
medianAlpha <- apply(bootNeur, 2, median)
dataError <- allData
dataError$alpha <- dataError$LR/dataError$LP
dataError$neurError <- NA
pos <- 1

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  for (j in c("XY", "XZ", "YZ")) {
    # Subset and divide alpha
    alphaVec <- dataError[dataError$neurType == i & dataError$Plane == j, 8] 
    dataError[dataError$neurType == i & dataError$Plane == j, 9] <- 100*(1 - medianAlpha[pos]/alphaVec)
  }
  pos <- pos + 1
}
```

###Plot

####Fixed error 

```{r}
# Plot
png(filename=paste("errorDistributions_5error_ols.png", sep=""), type="cairo",
    units="in", width=8, height=10, pointsize=12,
    res=300)

ggplot(dataError, aes(x = `neurError`, y = `interact`, fill = ifelse(..x.. >=-5 & ..x.. <=5, ">= -5", "< 5"))) +
  scale_fill_manual(values = c("gray70", "coral2"), name = NULL) +
  geom_density_ridges_gradient(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none") + 
  xlab("% Error")

dev.off()
```


####Fixed error ribbon

```{r, fig.height=10, fig.width=6}
dataError$x <- dataError$neurError
# Density plot -----------------------------------------------
jj <- ggplot(dataError, aes(x = x, y = interact)) +
  stat_density_ridges(
    geom = "density_ridges_gradient") +
  theme_ridges() +
  theme(
    plot.margin = margin(t = 1, r = 1, b = 0.5, l = 0.5, "cm") 
  )


# Build ggplot and extract data
d <- ggplot_build(jj)$data[[1]]

# Add geom_ribbon for shaded area
jj +
  geom_ribbon(
    data = transform(subset(d, x >= -5 & x <= 5), interact = group),
    aes(x, ymin = ymin, ymax = ymax, group = group),
    fill = "red",
    alpha = 0.5) 
```

####Fixed quantiles

```{r, fig.height=10, fig.width=8}
# https://stackoverflow.com/questions/49961582/how-shade-area-under-ggridges-curve/49971690#49971690

png(filename=paste("errorDistributions_ols.png", sep=""), type="cairo",
    units="in", width=8, height=10, pointsize=12,
    res=300)

ggplot(dataError, aes(x = neurError, y = interact, fill = factor(stat(quantile)))) +
  stat_density_ridges(
    geom = "density_ridges_gradient",
    calc_ecdf = TRUE,
    quantiles = c(0.125, 0.875),
    quantile_lines = FALSE) +
  scale_fill_manual(
    # name = "Probability", values = c("#A0A0A0A0", "#FF0000A0", "#A0A0A0A0"),
    name = "Probability", values = c("gray70", "coral2", "gray70"),
    labels = c("(0, 0.125]", "(0.125, 0.875]", "(0.875, 1]")) +
  theme_ridges() +
  theme(legend.position = "none") +
  xlab("% Error") + 
  ylab("Axon:Projection combination")

dev.off()
```

###Error vs LP

```{r}
gplots::hist2d(dataError[, c(3,9)], nbins=50)
```


##Error distributions for all betas

```{r}
# Estimate error per axon type and plane using median 
bootNeur <- readRDS("ProyeccionAnalysis/bootCoefs_perNeuron_OLS.rds")
colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")
dataAllbetas <- allData
dataAllbetas$alpha <- dataAllbetas$LR/dataAllbetas$LP
axType <- 1
listPos <- 1
listContour <- list()


for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (j in c("XY", "XZ", "YZ")) {
    
    # Subset alpha
    alphaVec <- dataAllbetas[dataAllbetas$neurType == i & dataAllbetas$Plane == j, 8] 
    # Create empty matrix
    matError <- matrix(ncol = 2000, nrow = numberTypes[axType])
    colCount <- 1
    
    for (h in bootNeur[, axType]) {
      
      # Estimate errors for every beta
      matError[, colCount] <- 100*(1 - h/alphaVec)
      colCount <- colCount + 1
      
    }
    
    colnames(matError) <- bootNeur[, axType]
    listContour[[listPos]] <- matError
    listPos <- listPos + 1
    
  }
  
  axType <- axType + 1
  
}
```

###Reformat data

```{r}
# Add names to list
names(listContour) <- unique(dataAllbetas$interact)
# Store as vectors
fullVec <- lapply(listContour, function(x) as.vector(x))

# Trasnform into data frames by neurType
alphaType1 <- bootNeur[, 1]
axType1 <- data.frame(errors = unlist(fullVec[c(1,2,3)]), Plane = rep(c("XY", "XZ", "YZ"), each = numberTypes[1]*2000), 
                      alphaBoot = rep(rep(alphaType1, each = numberTypes[1]), 3), axType = rep("Type1", 3*numberTypes[1]*2000))

alphaType2 <- bootNeur[, 2]
axType2 <- data.frame(errors = unlist(fullVec[c(4,5,6)]), Plane = rep(c("XY", "XZ", "YZ"), each = numberTypes[2]*2000), 
                      alphaBoot = rep(rep(alphaType2, each = numberTypes[2]), 3), axType = rep("Type2", 3*numberTypes[2]*2000))

alphaType3 <- bootNeur[, 3]
axType3 <- data.frame(errors = unlist(fullVec[c(7,8,9)]), Plane = rep(c("XY", "XZ", "YZ"), each = numberTypes[3]*2000), 
                      alphaBoot = rep(rep(alphaType3, each = numberTypes[3]), 3), axType = rep("Type3", 3*numberTypes[3]*2000))

alphaType4 <- bootNeur[, 4]
axType4 <- data.frame(errors = unlist(fullVec[c(10,11,12)]), Plane = rep(c("XY", "XZ", "YZ"), each = numberTypes[4]*2000), 
                      alphaBoot = rep(rep(alphaType4, each = numberTypes[4]), 3), axType = rep("Type4", 3*numberTypes[4]*2000))

# Single data frame
axData <- rbind(axType1, axType2, axType3, axType4)
axData$axType <- factor(axData$axType)
```

###Plot 

```{r, fig.width=15, fig.height=15}
# https://www.r-graph-gallery.com/2d-density-plot-with-ggplot2.html#distr

# Area + contour global
g <- ggplot(axData, aes(x=alphaBoot, y=errors)) +
  stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
  theme_bw()

png(filename=paste("errorDistributions_contour_ols.png", sep=""), type="cairo",
    units="in", width=10, height=12, pointsize=12,
    res=300)

g + facet_grid(axType ~ Plane) + scale_fill_viridis_c()

dev.off()

# Area + contour by axType
ax1 <- ggplot(axType1, aes(x=alphaBoot, y=errors)) +
  stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
  theme_bw()

png(filename=paste("errorDistributions_contour_ax1_ols.png", sep=""), type="cairo",
    units="in", width=10, height=5, pointsize=12,
    res=300)

ax1 + facet_grid( ~ Plane) + scale_fill_viridis_c()

dev.off()

ax2 <- ggplot(axType2, aes(x=alphaBoot, y=errors)) +
  stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
  theme_bw()

png(filename=paste("errorDistributions_contour_ax2_ols.png", sep=""), type="cairo",
    units="in", width=10, height=5, pointsize=12,
    res=300)

ax2 + facet_grid( ~ Plane) + scale_fill_viridis_c()

dev.off()

ax3 <- ggplot(axType3, aes(x=alphaBoot, y=errors)) +
  stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
  theme_bw()

png(filename=paste("errorDistributions_contour_ax3_ols.png", sep=""), type="cairo",
    units="in", width=10, height=5, pointsize=12,
    res=300)

ax3 + facet_grid( ~ Plane) + scale_fill_viridis_c()

dev.off()

ax4 <- ggplot(axType4, aes(x=alphaBoot, y=errors)) +
  stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
  theme_bw()

png(filename=paste("errorDistributions_contour_ax4_ols.png", sep=""), type="cairo",
    units="in", width=10, height=5, pointsize=12,
    res=300)

ax4 + facet_grid( ~ Plane) + scale_fill_viridis_c()

dev.off()
```


##Bootstrap Pred Interval

###Example with mean LP

```{r}
# OLS
my.reg <- lm(LR ~ LP:neurType - 1, allData)
summary(my.reg)

# Mean LP per axonType
meanLP <- aggregate(LP ~ neurType, allData, mean)
meanLR <- aggregate(LR ~ neurType, allData, mean)

# Predict LR for neurType1
y.p <- coef(my.reg)[1]*meanLP[1,2]
y.p

# Create adjusted residuals
leverage <- influence(my.reg)$hat
my.s.resid <- residuals(my.reg)/sqrt(1-leverage)
my.s.resid <- my.s.resid - mean(my.s.resid)


reg <- my.reg
s <- my.s.resid

# Do bootstrap with 10,000 replications
set.seed(123456)
ep.draws <- replicate(n=100, the.replication(reg=my.reg, s=my.s.resid, x_Np1 = meanLP[1,2]))

# Create prediction interval
y.p + quantile(ep.draws, probs=c(0.025,0.975))

# prediction interval using normal assumption
predict(my.reg, 
        newdata=data.frame(neurType = "Type1", 
                           LP = meanLP[1,2]),interval="prediction", level=0.95)
```

###Example with all LPs

```{r}
# OLS
my.reg <- lm(LR ~ LP:neurType - 1, allData)

# List to store axon types
axList <- list()
# List to store random selection
axRand <- list()
axCount <- 1
# 1/3 random observations per plane, no replacement
numExtract <- floor(1/3*numberTypes)
# Original data frame copy
dataToBoot <- allData

# Ensure reproducibility
set.seed(123456)

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  # List to store planes
  planeList <- list()
  # List to store random selection
  planeRand <- list()
  planeCount <- 1
  
  for (j in c("XY", "XZ", "YZ")) {
    
    # Subset plane and axon type
    dataGroup <- dataToBoot[dataToBoot$neurType == i & dataToBoot$Plane == j, ]
    
    # Random sample of N=numExtract (variable per axon type)
    dataSub <- sample_n(dataGroup, numExtract[axCount], replace = FALSE)
    # Store selected for later
    planeRand[[planeCount]] <- dataSub
    # Remove from data so they can not be selected again
    # dataToBoot <- dataToBoot[! dataToBoot$id %in% dataSub$id, ]
    
    # List to store errors
    epList <- list()
    epCount <- 1
    
    for (k in dataSub[ , "LP"]) {
      
      # Predict LR for neurType axCount
      y.p <- coef(my.reg)[axCount]*k
      
      # Create adjusted residuals
      leverage <- influence(my.reg)$hat
      my.s.resid <- residuals(my.reg)/sqrt(1-leverage)
      my.s.resid <- my.s.resid - mean(my.s.resid)
      
      # Do bootstrap with 100 replications
      ep.draws <- replicate(n=100, the.replication.gen(reg = my.reg, s = my.s.resid, axCount, x_Np1 = k))
      
      # Store in list
      epList[[epCount]] <- ep.draws
      epCount <- epCount + 1
      
      print(epCount)
      
    }
    
    # Store in list
    planeList[[planeCount]] <- epList
    planeCount <- planeCount + 1
    
  }
  
  names(planeList) <- c("XY", "XZ", "YZ")
  names(planeRand) <- c("XY", "XZ", "YZ")
  
  # Store in list
  axList[[axCount]] <- planeList
  axRand[[axCount]] <- planeRand
  axCount <- axCount + 1
  
  # Renew original data for next axon type
  dataToBoot <- allData
  
}

names(axList) <- c("Type1", "Type2", "Type3", "Type4")
names(axRand) <- c("Type1", "Type2", "Type3", "Type4")

saveRDS(list(axList, axRand), "bootPI_full_Replacement.rds")
```

####Check convergence 

```{r}
# Load object
bootCheck <- readRDS("bootPI_full.rds")
# First list is prediction error, second is random subset
axList <- bootCheck[[1]]
axRand <- bootCheck[[2]]
# One example

# OLS
my.reg <- lm(LR ~ LP:neurType - 1, allData)

# Estimate and store in list
lrpList <- list()
lrpCount <- 1
axCount <- 1

for (i in c("Type1", "Type2", "Type3", "Type4")) {

  for (j in c("XY", "XZ", "YZ")) {
    
     # Extract LP and LR values
    lp <- axRand[[i]][[j]]$LP
    lr <- axRand[[i]][[j]]$LR
    
    for (k in 1:length(lp)) {
      
      # Predicted LR
      lr.p <- coef(my.reg)[axCount]*lp[k]
      
      # Extract absolute errors
      ae <- axList[[i]][[j]][[k]]
      
      # Empty vectors for storage
      lrp.Low <- numeric()
      lrp.Up <- numeric()
      
      # Estimate PI95 for boot n=10...100
      for (l in 4:100) {
        
        lrp <- lr.p + quantile(ae[1:l], probs = c(0.025,0.975))
        
        # Store in vectors
        lrp.Low <- c(lrp.Low, lrp[1])
        lrp.Up <- c(lrp.Up, lrp[2])
        
      }
      
      # Store in data frame
      lrpFrame <- data.frame(nboot = 4:100, lowLim = lrp.Low, upLim = lrp.Up, LR = rep(lr[k], length(4:100)))
      
      # Store in list
      lrpList[[lrpCount]] <- lrpFrame
      lrpCount <- lrpCount + 1
    }
    
  }
  
  axCount <- axCount + 1
  
}

```

#####Reformat

```{r}
# 1/3 random observations per plane, no replacement
numExtract <- floor(1/3*numberTypes)

checkFrame <- do.call(rbind, lrpList)
checkFrame$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), times = numExtract*(length(4:100))*3)
checkFrame$Plane <- c(rep(c("XY", "XZ", "YZ"), each = numExtract[1]*length(4:100)),
                     rep(c("XY", "XZ", "YZ"), each = numExtract[2]*length(4:100)),
                     rep(c("XY", "XZ", "YZ"), each = numExtract[3]*length(4:100)),
                     rep(c("XY", "XZ", "YZ"), each = numExtract[4]*length(4:100)))


lowMean <- aggregate(lowLim ~ neurType + Plane + nboot, checkFrame, mean)
upMean <- aggregate(upLim ~ neurType + Plane + nboot, checkFrame, mean)
LRmean <- aggregate(LR ~ neurType + Plane + nboot, checkFrame, mean)

cumError <- with(lowMean[lowMean$neurType == "Type1" & lowMean$Plane == "XY", ], 
             abs((lowLim[-1] - lowLim[-length(lowLim)])/lowLim[-length(lowLim)])*100)
```

#####Plot

```{r, fig.height=20, fig.width=30}
# Separate plots
png(filename=paste("convergence_bootPI_separate.png", sep=""), type="cairo",
    units="in", width=30, height=20, pointsize=12,
    res=300)

par(mfrow=c(4,6), cex = 1.1)

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (j in c("XY", "XZ", "YZ")) {
    
    plot(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2)
    plot(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
    
  }
  
}

dev.off()

# Same plot
png(filename=paste("convergence_bootPI_together.png", sep=""), type="cairo",
    units="in", width=15, height=20, pointsize=12,
    res=300)

par(mfrow=c(4,3), cex = 1.1)

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (j in c("XY", "XZ", "YZ")) {
    
    minY <- min(lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"])
    maxY <- max(upMean[upMean$neurType == i & upMean$Plane == j, "upLim"])
    plot(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2, ylim = c(minY, maxY))
    lines(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
    # lines(LR ~ nboot, LRmean[LRmean$neurType == i & LRmean$Plane == j, ], type = "l", lwd = 2, col = "red")
    
  }
  
}

dev.off()

# Diff plot
png(filename=paste("convergence_bootPI_Diff.png", sep=""), type="cairo",
    units="in", width=15, height=20, pointsize=12,
    res=300)

par(mfrow=c(4,3), cex = 1.1)

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (j in c("XY", "XZ", "YZ")) {
    
    minY <- min(lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"])
    maxY <- max(upMean[upMean$neurType == i & upMean$Plane == j, "upLim"])
    diflu <- upMean[upMean$neurType == i & upMean$Plane == j, "upLim"] - lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"]
    plot(4:100, diflu, type = "l", col = "red", lwd = 2)
    
  }
  
}

dev.off()

# Same plot random intervals

for (m in 1:4) {
  
  png(filename=paste("convergence_bootPI_together_example_rand", m, ".png", sep=""), type="cairo",
      units="in", width=15, height=20, pointsize=12,
      res=300)
  
  par(mfrow=c(4,3), cex = 1.1)
  
  for (i in c("Type1", "Type2", "Type3", "Type4")) {
    
    for (j in c("XY", "XZ", "YZ")) {
      
      subData <- checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ]
      randLR <- sample(subData[, "LR"], 1)
      # randLR <- sample(subData[which(subData$lowLim > subData$LR), "LR"], 1)
      minY <- min(subData[subData$LR == randLR, "lowLim"])
      maxY <- max(subData[subData$LR == randLR, "upLim"])
      plot(lowLim ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2, ylim = c(minY, maxY))
      lines(upLim ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2)
      lines(LR ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2, col = "red")
      
    }
    
  }
  
  dev.off()
  
}

# Separate with lines
png(filename=paste("convergence_bootPI_separate_FULL.png", sep=""), type="cairo",
    units="in", width=30, height=20, pointsize=12,
    res=300)

par(mfrow=c(4,6), cex = 1.1)

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (j in c("XY", "XZ", "YZ")) {
    
    plot(lowLim ~ nboot, checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ], type = "l", col = "lightgray")
    lines(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2)
    
    plot(upLim ~ nboot, checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ], type = "l", col = "lightgray")
    lines(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
    
  }
  
}

dev.off()


```

####Estimate porcentual errors

```{r, fig.width=8, fig.height=8}
# 1/3 random observations per plane, no replacement
numExtract <- floor(1/3*numberTypes)
# Load object
bootPI <- readRDS("ProyeccionAnalysis/bootPI_full_Replacement_TRUE.rds")
# First list is prediction error, second is random subset
axList <- bootPI[[1]]
axRand <- bootPI[[2]]

# Estimate and store in list
peList <- list()
peCount <- 1

for (i in c("Type1", "Type2", "Type3", "Type4")) {

  for (j in c("XY", "XZ", "YZ")) {
    
    # Extract LR values
    lr <- axRand[[i]][[j]]$LR
    
    for (k in 1:length(lr)) {
      
      # Divide absolute errors by LR to get porcentual errors
      pe <- axList[[i]][[j]][[k]] / lr[k]
      
      # Store in list
      peList[[peCount]] <- pe*100
      peCount <-peCount + 1
    }
    
  }
  
}

# Reformat as data frame
# for bootPI
# peFrame <- data.frame(pe = unlist(peList), 
#                       neurType = rep(c("Type1", "Type2", "Type3", "Type4"), each = 20*100*3),
#                       Plane = rep(rep(c("XY", "XZ", "YZ"), each = 20*100), 4))
# for bootPI_full
peFrame <- data.frame(pe = unlist(peList),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), times = numExtract*100*3),
                      Plane = c(rep(c("XY", "XZ", "YZ"), each = numExtract[1]*100),
                                rep(c("XY", "XZ", "YZ"), each = numExtract[2]*100),
                                rep(c("XY", "XZ", "YZ"), each = numExtract[3]*100),
                                rep(c("XY", "XZ", "YZ"), each = numExtract[4]*100)))
peFrame$interact <- interaction(peFrame$neurType, peFrame$Plane, lex.order=T)
```

#####Plot density and histogram

```{r}
# Plot
# peGroup <- peFrame %>% group_by(interact)
# peSample <- sample_n(peGroup, 100)

png(filename=paste("prederrorDistributions_ols_FULLsample_Repeat_TRUE.png", sep=""), type="cairo",
    units="in", width=8, height=10, pointsize=12,
    res=300)

(p <- ggplot(peFrame, aes(x = `pe`, y = `interact`, fill = ifelse(..x.. >=-5 & ..x.. <=5, ">= -5", "< 5"))) +
  scale_fill_manual(values = c("gray70", "coral2"), name = NULL) +
  geom_density_ridges_gradient(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none") + 
  xlab("% Error")) + 
  xlim(c(-40,40))

dev.off()

png(filename=paste("prederrorHistogram_ols_FULLsample_Repeat_TRUE.png", sep=""), type="cairo",
    units="in", width=8, height=10, pointsize=12,
    res=300)

# Histogram
# nint 300 bootPI
# nint 500 bootPI_full
# nint 400 bootPI_full_replace
(h <- histogram( ~ pe | Plane + neurType, peFrame, nint = 400, type ="density", xlim = c(-50,50)))

dev.off()
```

#####Plot errors vs cumul probability

```{r, fig.height=5, fig.width=20}
# Inverse quantile
quantInv <- function(distr, value) ecdf(distr)(value)

#ecdf plot example
# plot(ecdf(peFrame[peFrame$interact == "Type1.XY", "pe"]))
# lines(ecdf(peFrame[peFrame$interact == "Type1.XZ", "pe"]), col = "red")
# lines(ecdf(peFrame[peFrame$interact == "Type1.YZ", "pe"]), col = "blue")

probList <- list()
binProb <- 0.5
probSeq <- seq(binProb, 100, binProb)

for (i in unique(peFrame$interact)) {
  
  dataProb <- peFrame[peFrame$interact == i, "pe"]
  probVec <- numeric()
  
  for (j in probSeq) {
    
    errProb <- quantInv(dataProb, j) - quantInv(dataProb, -j)
    probVec <- c(probVec, errProb)
    
  }
  
  probList[[i]] <- probVec
  
}

# Plot with limited axis
setwd("ProyeccionAnalysis/")
png(filename=paste("errorProbabilityCum_bin", binProb, ".png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,1), xlim = c(0.5, 40),
       ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  abline(h = 0.95, lty = "dashed", col = "gray" )
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()

# Plot with free axis
png(filename=paste("errorProbabilityCum_bin", binProb, "_freeAxis.png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
       ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  abline(h = 0.95, lty = "dashed", col = "gray" )
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()
```

#####Plot errors vs inverse cumul probability

```{r, fig.height=5, fig.width=20}
# Inverse quantile
quantInv <- function(distr, value) ecdf(distr)(value)

#ecdf plot example
# plot(ecdf(peFrame[peFrame$interact == "Type1.XY", "pe"]))
# lines(ecdf(peFrame[peFrame$interact == "Type1.XZ", "pe"]), col = "red")
# lines(ecdf(peFrame[peFrame$interact == "Type1.YZ", "pe"]), col = "blue")

probList <- list()
binProb <- 0.5
probSeq <- seq(binProb, 100, binProb)

for (i in unique(peFrame$interact)) {
  
  dataProb <- peFrame[peFrame$interact == i, "pe"]
  probVec <- numeric()
  
  for (j in probSeq) {
    
    errProb <- 1 - (quantInv(dataProb, j) - quantInv(dataProb, -j))
    probVec <- c(probVec, errProb)
    
  }
  
  probList[[i]] <- probVec
  
}

# Plot with limited axis
setwd("ProyeccionAnalysis/")
png(filename=paste("errorProbabilityCumInv_bin", binProb, ".png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,1), xlim = c(0.5, 40),
       ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  abline(h = 0.05, lty = "dashed", col = "gray" )
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()

# Plot with free axis
png(filename=paste("errorProbabilityCumInv_bin", binProb, "_freeAxis.png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
       ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  abline(h = 0.05, lty = "dashed", col = "gray" )
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()
```

#####Plot errors vs probability

```{r, fig.height=5, fig.width=20}
# Inverse quantile
quantInv <- function(distr, value) ecdf(distr)(value)

#ecdf plot example
# plot(ecdf(peFrame[peFrame$interact == "Type1.XY", "pe"]))
# lines(ecdf(peFrame[peFrame$interact == "Type1.XZ", "pe"]), col = "red")
# lines(ecdf(peFrame[peFrame$interact == "Type1.YZ", "pe"]), col = "blue")

probList <- list()
binProb <- 1
probSeq <- seq(0, 100, binProb)
intProb <- 0.1

for (i in unique(peFrame$interact)) {
  
  dataProb <- peFrame[peFrame$interact == i, "pe"]
  probVec <- numeric()
  
  for (j in probSeq) {
    
    errProbPos <- quantInv(dataProb, j + intProb) - quantInv(dataProb, j - intProb)
    errProbNeg <- quantInv(dataProb, -j + intProb) - quantInv(dataProb, - j - intProb)
    probVec <- c(probVec, errProbPos + errProbNeg)
    
  }
  
  probList[[i]] <- probVec
  
}

# Plot with limited axis
setwd("ProyeccionAnalysis/")
png(filename=paste("errorProbability_bin", binProb, ".png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,0.07), xlim = c(0.5, 40),
       ylab = "Probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()

# Plot with free axis
png(filename=paste("errorProbability_bin", binProb, "_freeAxis.png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
       ylab = "Probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()
```