sergio.dh
/
Rubio_etal_2021


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846
							---
title: "Axonal Length"
author: "Sergio D?ez Hermano"
date: '`r format(Sys.Date(),"%e de %B, %Y")`'
output:
  html_document:
      highlight: tango
      toc: yes
      toc_depth: 4
      toc_float:
        collapsed: no
---

```{r setup, include=FALSE}
require(knitr)
# include this code chunk as-is to set options
opts_chunk$set(comment = NA, prompt = FALSE, fig.height=5, fig.width=5, dpi=300, fig.align = "center", echo = TRUE, message = FALSE, warning = FALSE, cache=FALSE)
Sys.setlocale("LC_TIME", "C")
```


##Load libraries and functions

```{r}
library(R.matlab)
library(lattice)
library(dplyr)
library(ggplot2)
library(ggridges)
library(hrbrthemes)
library(viridis)
library(ggpubr)
library(mltools)
library(data.table)
library(caret)
library(interactions)
library(performance)
library(MASS)
library(geepack)
library(pstools)
library(MXM)
library(rmcorr)
library(multcomp)
library(Hmisc)


options(digits = 10)

# https://rstudio-pubs-static.s3.amazonaws.com/297778_5fce298898d64c81a4127cf811a9d486.html


################
# Bootstrap PI #
################

the.replication <- function(reg, s, x_Np1 = 0){
  # Make bootstrap residuals
  ep.star <- sample(s, size=length(reg$residuals),replace=TRUE)

  # Make bootstrap Y
  y.star <- fitted(reg) + ep.star

  # Do bootstrap regression
  lp <- model.frame(reg)[,2]
  nt <- model.frame(reg)[,3]
  bs.reg <- lm(y.star ~ lp:nt - 1)

  # Create bootstrapped adjusted residuals
  bs.lev <- influence(bs.reg)$hat
  bs.s   <- residuals(bs.reg)/sqrt(1-bs.lev)
  bs.s   <- bs.s - mean(bs.s)

  # Calculate draw on prediction error
  # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"] 
  xb.xb <- (coef(my.reg)[1] - coef(bs.reg)[1])*x_Np1
  return(unname(xb.xb + sample(bs.s, size=1)))
  
}


############################
# Bootstrap PI generalized #
############################

the.replication.gen <- function(reg, s, axType, x_Np1 = 0){
  # Make bootstrap residuals
  ep.star <- sample(s, size=length(reg$residuals), replace=TRUE)

  # Make bootstrap Y
  y.star <- fitted(reg) + ep.star

  # Do bootstrap regression
  lp <- model.frame(reg)[,2]
  nt <- model.frame(reg)[,3]
  bs.reg <- lm(y.star ~ lp:nt - 1)

  # Create bootstrapped adjusted residuals
  bs.lev <- influence(bs.reg)$hat
  bs.s   <- residuals(bs.reg)/sqrt(1-bs.lev)
  bs.s   <- bs.s - mean(bs.s)

  # Calculate draw on prediction error
  # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"] 
  xb.xb <- (coef(my.reg)[axType] - coef(bs.reg)[axType])*x_Np1
  return(unname(xb.xb + sample(bs.s, size=1)))
  
}


##############################
# Bootstrap PI per axon type #
##############################

the.replication.type <- function(reg, s, x_Np1 = 0){
  # Make bootstrap residuals
  ep.star <- sample(s, size=length(reg$residuals),replace=TRUE)

  # Make bootstrap Y
  y.star <- fitted(reg) + ep.star

  # Do bootstrap regression
  lp <- model.frame(reg)[,2]
  bs.reg <- lm(y.star ~ lp - 1)

  # Create bootstrapped adjusted residuals
  bs.lev <- influence(bs.reg)$hat
  bs.s   <- residuals(bs.reg)/sqrt(1-bs.lev)
  bs.s   <- bs.s - mean(bs.s)

  # Calculate draw on prediction error
  # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"] 
  xb.xb <- (coef(my.reg) - coef(bs.reg))*x_Np1
  return(unname(xb.xb + sample(bs.s, size=1)))
  
}

##############################
# Stratified random sampling #
##############################

stratified <- function(df, group, size, select = NULL, 
                       replace = FALSE, bothSets = FALSE) {
  if (is.null(select)) {
    df <- df
  } else {
    if (is.null(names(select))) stop("'select' must be a named list")
    if (!all(names(select) %in% names(df)))
      stop("Please verify your 'select' argument")
    temp <- sapply(names(select),
                   function(x) df[[x]] %in% select[[x]])
    df <- df[rowSums(temp) == length(select), ]
  }
  df.interaction <- interaction(df[group], drop = TRUE)
  df.table <- table(df.interaction)
  df.split <- split(df, df.interaction)
  if (length(size) > 1) {
    if (length(size) != length(df.split))
      stop("Number of groups is ", length(df.split),
           " but number of sizes supplied is ", length(size))
    if (is.null(names(size))) {
      n <- setNames(size, names(df.split))
      message(sQuote("size"), " vector entered as:\n\nsize = structure(c(",
              paste(n, collapse = ", "), "),\n.Names = c(",
              paste(shQuote(names(n)), collapse = ", "), ")) \n\n")
    } else {
      ifelse(all(names(size) %in% names(df.split)),
             n <- size[names(df.split)],
             stop("Named vector supplied with names ",
                  paste(names(size), collapse = ", "),
                  "\n but the names for the group levels are ",
                  paste(names(df.split), collapse = ", ")))
    }
  } else if (size < 1) {
    n <- round(df.table * size, digits = 0)
  } else if (size >= 1) {
    if (all(df.table >= size) || isTRUE(replace)) {
      n <- setNames(rep(size, length.out = length(df.split)),
                    names(df.split))
    } else {
      message(
        "Some groups\n---",
        paste(names(df.table[df.table < size]), collapse = ", "),
        "---\ncontain fewer observations",
        " than desired number of samples.\n",
        "All observations have been returned from those groups.")
      n <- c(sapply(df.table[df.table >= size], function(x) x = size),
             df.table[df.table < size])
    }
  }
  temp <- lapply(
    names(df.split),
    function(x) df.split[[x]][sample(df.table[x],
                                     n[x], replace = replace), ])
  set1 <- do.call("rbind", temp)
  
  if (isTRUE(bothSets)) {
    set2 <- df[!rownames(df) %in% rownames(set1), ]
    list(SET1 = set1, SET2 = set2)
  } else {
    set1
  }
}

```


##Load data

```{r}
# Get file paths
axonNames <- list.files(paste("ProyeccionData/", sep=""), full.names=T)

# Load matlab file
axonFiles <- lapply(axonNames, function(x) readMat(x))
names(axonFiles) <- c("Type1", "Type2", "Type3", "Type4")

# VAR NAMES
# REAL_LENGTH, PROYECTION_LENGTH_XY, PROYECTION_LENGTH_XZ, PROYECTION_LENGTH_YZ

# Extract data
realLength   <- lapply(axonFiles, function(x) x$REAL.LENGTH)
planeXY      <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XY)
planeXZ      <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XZ)
planeYZ      <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.YZ)

# Get number of neurons per type
numberTypes  <- unlist(lapply(realLength, function(x) length(x)))

# Store in data frames by plane
xyData <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), LP = unlist(planeXY), Plane = rep("XY", sum(numberTypes)),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))

xzData <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), LP = unlist(planeXZ), Plane = rep("XZ", sum(numberTypes)),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))

yzData <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), LP = unlist(planeYZ), Plane = rep("YZ", sum(numberTypes)),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))

# Merge into a single data frame and sort by id
allData <- do.call("rbind", list(xyData, xzData, yzData))
allData <- allData[order(allData$id),]
# Add different number for every neuron-plane combination
allData$NeurPlane <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]), 
                       rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
allData$interact <-interaction(allData$neurType, allData$Plane, lex.order=T)
# Create data frame w/o Type4
dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)

# Data in short format
dataShort <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), XY = unlist(planeXY), XZ = unlist(planeXZ), YZ = unlist(planeYZ),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
```


##CV-Bootstrap Pred Interval

This code is meant to be run independently for every neurType and in parallel. RUN AT CREMOTO

```{r}
# Ensure reproducibility
set.seed(123456)

# Subset axon type
dataCV <- allData[allData$neurType == "Type1", ]

# List to store CV repetitions
cvList <- list ()
cvRand <- list()

for (i in 1:10) {
  
  # Stratified random training/test sets
  stratDat <- stratified(dataCV, "Plane", 0.7, bothSets = TRUE, replace = FALSE)
  trainSet <- stratDat$SET1
  testSet <- stratDat$SET2
  # Store train and test set
  cvRand[[i]] <- list(trainSet, testSet)
  
  # OLS with training set
  my.reg <- lm(LR ~ LP - 1, trainSet)
  # Create adjusted residuals
  leverage <- influence(my.reg)$hat
  my.s.resid <- residuals(my.reg)/sqrt(1-leverage)
  my.s.resid <- my.s.resid - mean(my.s.resid)
  
  # List to store planes
  planeList <- list()
  planeRand <- list()
  # Assign test data to bootstrapping
  dataToBoot <- testSet
    
  # Bootstrapping on test set
    for (j in c("XY", "XZ", "YZ")) {
      
      # Subset plane
      dataSub <- dataToBoot[dataToBoot$Plane == j, ]
      
      # List to store errors
      epList <- list()
      epCount <- 1
      
      for (k in dataSub[ , "LP"]) {
        
        # Predict LR
        y.p <- coef(my.reg)*k
        
        # Do bootstrap with 100 replications
        ep.draws <- replicate(n=100, the.replication.type(reg = my.reg, s = my.s.resid, x_Np1 = k))
        
        # Store in list
        epList[[epCount]] <- ep.draws
        epCount <- epCount + 1
        
        print(epCount)
        
      }
      
      # Store in list
      planeList[[j]] <- epList
      
      print(paste("CV", i, "Plane", j, "finished"))
      
    }
    
    # Store in list
    cvList[[i]] <- planeList
    print(paste("CV", i, "finished"))
    
  }
  
  # names(axList) <- c("Type1", "Type2", "Type3", "Type4")
  # names(axRand) <- c("Type1", "Type2", "Type3", "Type4")
  # 


# saveRDS(list(distList, distRand), paste("stereo_bootPI_full_Type", axCount, "_boot20.rds", sep = ""))
```


####Check convergence 

```{r}
# Load object
bootCheck <- readRDS("bootPI_full.rds")
# First list is prediction error, second is random subset
axList <- bootCheck[[1]]
axRand <- bootCheck[[2]]
# One example

# OLS
my.reg <- lm(LR ~ LP:neurType - 1, allData)

# Estimate and store in list
lrpList <- list()
lrpCount <- 1
axCount <- 1

for (i in c("Type1", "Type2", "Type3", "Type4")) {

  for (j in c("XY", "XZ", "YZ")) {
    
     # Extract LP and LR values
    lp <- axRand[[i]][[j]]$LP
    lr <- axRand[[i]][[j]]$LR
    
    for (k in 1:length(lp)) {
      
      # Predicted LR
      lr.p <- coef(my.reg)[axCount]*lp[k]
      
      # Extract absolute errors
      ae <- axList[[i]][[j]][[k]]
      
      # Empty vectors for storage
      lrp.Low <- numeric()
      lrp.Up <- numeric()
      
      # Estimate PI95 for boot n=10...100
      for (l in 4:100) {
        
        lrp <- lr.p + quantile(ae[1:l], probs = c(0.025,0.975))
        
        # Store in vectors
        lrp.Low <- c(lrp.Low, lrp[1])
        lrp.Up <- c(lrp.Up, lrp[2])
        
      }
      
      # Store in data frame
      lrpFrame <- data.frame(nboot = 4:100, lowLim = lrp.Low, upLim = lrp.Up, LR = rep(lr[k], length(4:100)))
      
      # Store in list
      lrpList[[lrpCount]] <- lrpFrame
      lrpCount <- lrpCount + 1
    }
    
  }
  
  axCount <- axCount + 1
  
}

```

#####Reformat

```{r}
# 1/3 random observations per plane, no replacement
numExtract <- floor(1/3*numberTypes)

checkFrame <- do.call(rbind, lrpList)
checkFrame$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), times = numExtract*(length(4:100))*3)
checkFrame$Plane <- c(rep(c("XY", "XZ", "YZ"), each = numExtract[1]*length(4:100)),
                     rep(c("XY", "XZ", "YZ"), each = numExtract[2]*length(4:100)),
                     rep(c("XY", "XZ", "YZ"), each = numExtract[3]*length(4:100)),
                     rep(c("XY", "XZ", "YZ"), each = numExtract[4]*length(4:100)))


lowMean <- aggregate(lowLim ~ neurType + Plane + nboot, checkFrame, mean)
upMean <- aggregate(upLim ~ neurType + Plane + nboot, checkFrame, mean)
LRmean <- aggregate(LR ~ neurType + Plane + nboot, checkFrame, mean)

cumError <- with(lowMean[lowMean$neurType == "Type1" & lowMean$Plane == "XY", ], 
             abs((lowLim[-1] - lowLim[-length(lowLim)])/lowLim[-length(lowLim)])*100)
```

#####Plot

```{r, fig.height=20, fig.width=30}
# Separate plots
png(filename=paste("convergence_bootPI_separate.png", sep=""), type="cairo",
    units="in", width=30, height=20, pointsize=12,
    res=300)

par(mfrow=c(4,6), cex = 1.1)

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (j in c("XY", "XZ", "YZ")) {
    
    plot(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2)
    plot(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
    
  }
  
}

dev.off()

# Same plot
png(filename=paste("convergence_bootPI_together.png", sep=""), type="cairo",
    units="in", width=15, height=20, pointsize=12,
    res=300)

par(mfrow=c(4,3), cex = 1.1)

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (j in c("XY", "XZ", "YZ")) {
    
    minY <- min(lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"])
    maxY <- max(upMean[upMean$neurType == i & upMean$Plane == j, "upLim"])
    plot(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2, ylim = c(minY, maxY))
    lines(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
    # lines(LR ~ nboot, LRmean[LRmean$neurType == i & LRmean$Plane == j, ], type = "l", lwd = 2, col = "red")
    
  }
  
}

dev.off()

# Diff plot
png(filename=paste("convergence_bootPI_Diff.png", sep=""), type="cairo",
    units="in", width=15, height=20, pointsize=12,
    res=300)

par(mfrow=c(4,3), cex = 1.1)

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (j in c("XY", "XZ", "YZ")) {
    
    minY <- min(lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"])
    maxY <- max(upMean[upMean$neurType == i & upMean$Plane == j, "upLim"])
    diflu <- upMean[upMean$neurType == i & upMean$Plane == j, "upLim"] - lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"]
    plot(4:100, diflu, type = "l", col = "red", lwd = 2)
    
  }
  
}

dev.off()

# Same plot random intervals

for (m in 1:4) {
  
  png(filename=paste("convergence_bootPI_together_example_rand", m, ".png", sep=""), type="cairo",
      units="in", width=15, height=20, pointsize=12,
      res=300)
  
  par(mfrow=c(4,3), cex = 1.1)
  
  for (i in c("Type1", "Type2", "Type3", "Type4")) {
    
    for (j in c("XY", "XZ", "YZ")) {
      
      subData <- checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ]
      randLR <- sample(subData[, "LR"], 1)
      # randLR <- sample(subData[which(subData$lowLim > subData$LR), "LR"], 1)
      minY <- min(subData[subData$LR == randLR, "lowLim"])
      maxY <- max(subData[subData$LR == randLR, "upLim"])
      plot(lowLim ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2, ylim = c(minY, maxY))
      lines(upLim ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2)
      lines(LR ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2, col = "red")
      
    }
    
  }
  
  dev.off()
  
}

# Separate with lines
png(filename=paste("convergence_bootPI_separate_FULL.png", sep=""), type="cairo",
    units="in", width=30, height=20, pointsize=12,
    res=300)

par(mfrow=c(4,6), cex = 1.1)

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (j in c("XY", "XZ", "YZ")) {
    
    plot(lowLim ~ nboot, checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ], type = "l", col = "lightgray")
    lines(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2)
    
    plot(upLim ~ nboot, checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ], type = "l", col = "lightgray")
    lines(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
    
  }
  
}

dev.off()


```


####Estimate porcentual errors

```{r, fig.width=8, fig.height=8}
# # Load objects
cvPI <- list()

for (i in c("Type1", "Type2", "Type3", "Type4")) {
  # Whithin each Type, element 1 are erros and element 2 are random samples, training and test
  cvPI[[i]] <- readRDS(paste("ProyeccionAnalysis/proyec_boot10CVPI_", i, ".rds", sep = ""))
}

# Estimate and store in list
peList <- list()
peCount <- 1
lrLength <- list()

for (h in c("Type1", "Type2", "Type3", "Type4")) {
  
  for (i in 1:10) {
    
    for (j in c("XY", "XZ", "YZ")) {
      
      # Extract LR values
      lr <- cvPI[[h]][[2]][[i]][[2]]
      lr <- lr[lr$Plane == j, "LR"]
      
      for (k in 1:length(lr)) {
        
        # Divide absolute errors by LR to get porcentual errors
        pe <- cvPI[[h]][[1]][[i]][[j]][[k]] / lr[k]
        
        # Store in list
        peList[[peCount]] <- pe*100
        peCount <-peCount + 1
      }
      
    }
    
  }
  
  lrLength[h] <- length(lr)
  
}

lrLength <- unlist(lrLength)
peFrame <- data.frame(pe = unlist(peList),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), times = lrLength*10*3*100),
                      Plane = c(rep(rep(c("XY", "XZ", "YZ"), each = lrLength[1]*100), 10),
                                rep(rep(c("XY", "XZ", "YZ"), each = lrLength[2]*100), 10),
                                rep(rep(c("XY", "XZ", "YZ"), each = lrLength[3]*100), 10),
                                rep(rep(c("XY", "XZ", "YZ"), each = lrLength[4]*100), 10)))

peFrame$interact <- interaction(peFrame$neurType, peFrame$Plane, lex.order=T)
```

#####Plot density and histogram

```{r}
# Plot
# peGroup <- peFrame %>% group_by(interact)
# peSample <- sample_n(peGroup, 100)

setwd("ProyeccionAnalysis/")
png(filename=paste("prederrorDistributions_ols_CV.png", sep=""), type="cairo",
    units="in", width=8, height=10, pointsize=12,
    res=300)

(p <- ggplot(peFrame, aes(x = `pe`, y = `interact`, fill = ifelse(..x.. >=-5 & ..x.. <=5, ">= -5", "< 5"))) +
  scale_fill_manual(values = c("gray70", "coral2"), name = NULL) +
  geom_density_ridges_gradient(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none") + 
  xlab("% Error")) + 
  xlim(c(-40,40))

dev.off()

png(filename=paste("prederrorHistogram_ols_CV.png", sep=""), type="cairo",
    units="in", width=8, height=10, pointsize=12,
    res=300)

# Histogram
# nint 300 bootPI
# nint 500 bootPI_full
# nint 400 bootPI_full_replace
(h <- histogram( ~ pe | Plane + neurType, peFrame, nint = 300, type ="density", xlim = c(-50,50)))

dev.off()
```


#####Plot errors vs cumul probability

```{r, fig.height=5, fig.width=20}
# Inverse quantile
quantInv <- function(distr, value) ecdf(distr)(value)

#ecdf plot example
# plot(ecdf(peFrame[peFrame$interact == "Type1.XY", "pe"]))
# lines(ecdf(peFrame[peFrame$interact == "Type1.XZ", "pe"]), col = "red")
# lines(ecdf(peFrame[peFrame$interact == "Type1.YZ", "pe"]), col = "blue")

probList <- list()
binProb <- 0.5
probSeq <- seq(binProb, 100, binProb)

for (i in unique(peFrame$interact)) {
  
  dataProb <- peFrame[peFrame$interact == i, "pe"]
  probVec <- numeric()
  
  for (j in probSeq) {
    
    errProb <- quantInv(dataProb, j) - quantInv(dataProb, -j)
    probVec <- c(probVec, errProb)
    
  }
  
  probList[[i]] <- probVec
  
}

# Plot with limited axis
setwd("ProyeccionAnalysis/")
png(filename=paste("errorProbabilityCum_bin", binProb, "_CV.png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,1), xlim = c(0.5, 40),
       ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  abline(h = 0.95, lty = "dashed", col = "gray" )
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()

# Plot with free axis
png(filename=paste("errorProbabilityCum_bin", binProb, "_freeAxis_CV.png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
       ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  abline(h = 0.95, lty = "dashed", col = "gray" )
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()
```

#####Random sample

```{r}
size <- 0.05
peRand <- peFrame %>% group_by(interact)
peSample <- sample_frac(peRand, size)
```


######Plot density and histogram

```{r}
setwd("ProyeccionAnalysis/")
png(filename=paste("prederrorDistributions_ols_CV_rand", size, ".png", sep=""), type="cairo",
    units="in", width=8, height=10, pointsize=12,
    res=300)

(p <- ggplot(peSample, aes(x = `pe`, y = `interact`, fill = ifelse(..x.. >=-5 & ..x.. <=5, ">= -5", "< 5"))) +
  scale_fill_manual(values = c("gray70", "coral2"), name = NULL) +
  geom_density_ridges_gradient(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none") + 
  xlab("% Error")) + 
  xlim(c(-40,40))

dev.off()

png(filename=paste("prederrorHistogram_ols_CV_rand", size, ".png", sep=""), type="cairo",
    units="in", width=8, height=10, pointsize=12,
    res=300)

# Histogram
# nint 300 bootPI
# nint 500 bootPI_full
# nint 400 bootPI_full_replace
(h <- histogram( ~ pe | Plane + neurType, peSample, nint = 300, type ="density", xlim = c(-50,50)))

dev.off()
```


######Plot errors vs cumul probability

```{r, fig.height=5, fig.width=20}
# Inverse quantile
quantInv <- function(distr, value) ecdf(distr)(value)

peSample <- as.data.frame(peSample)
probList <- list()
binProb <- 0.5
probSeq <- seq(binProb, 100, binProb)

for (i in unique(peSample$interact)) {
  
  dataProb <- peSample[peSample$interact == i, "pe"]
  probVec <- numeric()
  
  for (j in probSeq) {
    
    errProb <- quantInv(dataProb, j) - quantInv(dataProb, -j)
    probVec <- c(probVec, errProb)
    
  }
  
  probList[[i]] <- probVec
  
}

# Plot with limited axis
setwd("ProyeccionAnalysis/")
png(filename=paste("errorProbabilityCum_bin", binProb, "_CV_rand", size, ".png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,1), xlim = c(0.5, 40),
       ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  abline(h = 0.95, lty = "dashed", col = "gray" )
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()

# Plot with free axis
png(filename=paste("errorProbabilityCum_bin", binProb, "_freeAxis_CV_rand", size, ".png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

par(mfrow = c(1,4))
axCount <- 1

for (i in seq(1,10,3)) {
  plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
       ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
  lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
  lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
  abline(v = 5, lty = "dashed", col = "gray")
  abline(h = 0.95, lty = "dashed", col = "gray" )
  legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
  axCount <- axCount + 1
}

dev.off()
```

####Estimate mean porcentual errors

UNFINISHED

```{r, fig.width=8, fig.height=8}
# mm <- peFrame[peFrame$Plane == "XY", "pe"]
# mm.mat <- matrix(mm, 100, 270)
# meanMat <- apply(mm.mat, 1, mean)

```