---
title: "Axonal Length"
author: "Sergio D?ez Hermano"
date: '`r format(Sys.Date(),"%e de %B, %Y")`'
output:
  html_document:
      highlight: tango
      toc: yes
      toc_depth: 4
      toc_float:
        collapsed: no
---

```{r setup, include=FALSE}
require(knitr)
# include this code chunk as-is to set options
opts_chunk$set(comment = NA, prompt = FALSE, fig.height=5, fig.width=5, dpi=300, fig.align = "center", echo = TRUE, message = FALSE, warning = FALSE, cache=FALSE)
Sys.setlocale("LC_TIME", "C")
```


##Load libraries and functions

```{r}
library(R.matlab)
library(lattice)
library(dplyr)
library(ggplot2)
library(ggridges)
library(hrbrthemes)
library(viridis)
library(ggpubr)
library(mltools)
library(data.table)
library(caret)
library(interactions)
library(performance)
library(MASS)   #rlm
library(clickR) #rlm pvals
library(geepack)
library(pstools)
library(MXM)
library(rmcorr)
library(multcomp)

# https://rstudio-pubs-static.s3.amazonaws.com/297778_5fce298898d64c81a4127cf811a9d486.html

###########################
# Robust sanwich variance #
###########################
source("summaryRobust.R")


#########################################
# Deviation from empirical variance GEE #
#########################################
var.crit <- function(mymodel) sum(abs(mymodel[["geese"]]$vbeta.naiv - mymodel[["geese"]]$vbeta))

############################
# Distances between planes #
############################
distPlan <- function(mydata) {
  
  # Vector to store distances
  distPlanes <- vector()
  
  # Estimate distances between planes for each subject
  for (i in unique(mydata$id)) {
    LPvals <- mydata[mydata$id == i, 3]
    distPlanes <- c(distPlanes, 
                    (LPvals[1] - LPvals[2])/max(LPvals[1], LPvals[2]),
                    (LPvals[1] - LPvals[3])/max(LPvals[1], LPvals[3]),
                    (LPvals[2] - LPvals[3])/max(LPvals[2], LPvals[3]))
  }
  
   
  # Concatenate factor with substraction information
  numSubjects <- length(unique(mydata$id))
  subsFactor <- rep(c("XY - XZ", "XY - YZ", "YZ - XZ"), rep = numSubjects)
  distFrame <- data.frame(id = mydata$id, 
                          type = mydata$neurType,
                          distPlanes = distPlanes,
                          substraction = subsFactor)
  
  return(distFrame)
  
}


##########################
# Bootstrap coefficients #
##########################

bootCoef <- function(mydata, axontype, projection, corrMat, nreps) {
  # Select data
  modData <- mydata[mydata$neurType == axontype & mydata$Plane == projection, ]
  
  # Fit the model
  modGee <- geeglm(LR ~ LP, data = modData, id = id, corstr = corrMat)
  
  # Ensure reproducibility
  # set.seed(12345) 
  
  # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
  coefmat <- matrix(NA, nreps, 2)
  
  # We need the fitted values and the residuals from the model
  resids <- residuals(modGee)
  preds <- fitted(modGee)
  
  # Repeatedly generate bootstrapped responses
  for(i in 1:nreps) {
    booty <- preds + sample(resids, rep=TRUE)
    modGee2 <- update(modGee, booty ~ .)
    coefmat[i,] <- coef(modGee2)
  }
  
  # Create a dataframe to store predictors values
  colnames(coefmat) <- names(coef(modGee2))
  coefmat <- data.frame(coefmat)
  
  return(coefmat)
}

##############################
# Bootstrap coefficients gEE #
##############################

# For models with interactions

bootCoefint <- function(mydata, modForm, corrMat, nreps) {
  
  # Fit model
    modGee <- geeglm(formula(modForm), data = mydata, id = id, corstr = corrMat)

  # Ensure reproducibility
  # set.seed(12345) 
  
  # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
  coefmat <- matrix(NA, nreps, length(coef(modGee)))
  
  # We need the fitted values and the residuals from the model
  resids <- residuals(modGee)
  preds <- fitted(modGee)
  
  # Repeatedly generate bootstrapped responses
  for(i in 1:nreps) {
    booty <- preds + sample(resids, rep=TRUE)
    modGee2 <- update(modGee, booty ~ .)
    coefmat[i,] <- coef(modGee2)
    print(i)
  }
  
  # Create a dataframe to store predictors values
  colnames(coefmat) <- names(coef(modGee2))
  coefmat <- data.frame(coefmat)
  
  return(coefmat)
}

#############################
# Bootstrap coefficients LM #
#############################

# For models with interactions

bootCoefintLM <- function(mydata, modForm, nreps) {
  
  # Fit model
  modLM <- lm(formula(modForm), data = mydata)

  # Ensure reproducibility
  set.seed(12345) 
  
  # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
  coefmat <- matrix(NA, nreps, length(coef(modLM)))
  
  # We need the fitted values and the residuals from the model
  resids <- residuals(modLM)
  preds <- fitted(modLM)
  
  # Repeatedly generate bootstrapped responses
  for(i in 1:nreps) {
    booty <- preds + sample(resids, rep=TRUE)
    modLM2 <- update(modLM, booty ~ .)
    coefmat[i,] <- coef(modLM2)
    print(i)
  }
  
  # Create a dataframe to store predictors values
  colnames(coefmat) <- names(coef(modLM2))
  coefmat <- data.frame(coefmat)
  
  return(coefmat)
}

```


##Load data

```{r}
# Get file paths
axonNames <- list.files(paste("ProyeccionData/", sep=""), full.names=T)

# Load matlab file
axonFiles <- lapply(axonNames, function(x) readMat(x))
names(axonFiles) <- c("Type1", "Type2", "Type3", "Type4")

# VAR NAMES
# REAL_LENGTH, PROYECTION_LENGTH_XY, PROYECTION_LENGTH_XZ, PROYECTION_LENGTH_YZ

# Extract data
realLength   <- lapply(axonFiles, function(x) x$REAL.LENGTH)
planeXY      <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XY)
planeXZ      <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XZ)
planeYZ      <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.YZ)

# Get number of neurons per type
numberTypes  <- unlist(lapply(realLength, function(x) length(x)))

# Store in data frames by plane
xyData <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), LP = unlist(planeXY), Plane = rep("XY", sum(numberTypes)),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))

xzData <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), LP = unlist(planeXZ), Plane = rep("XZ", sum(numberTypes)),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))

yzData <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), LP = unlist(planeYZ), Plane = rep("YZ", sum(numberTypes)),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))

# Merge into a single data frame and sort by id
allData <- do.call("rbind", list(xyData, xzData, yzData))
allData <- allData[order(allData$id),]
# Add different number for every neuron-plane combination
allData$NeurPlane <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]), 
                       rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
allData$interact <-interaction(allData$neurType, allData$Plane, lex.order=T)
# Create data frame w/o Type4
dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)

# Data in short format
dataShort <- data.frame(id = 1:length(unlist(realLength)), 
                      LR = unlist(realLength), XY = unlist(planeXY), XZ = unlist(planeXZ), YZ = unlist(planeYZ),
                      neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
```


##Descriptive

###Histograms

```{r, fig.width=8, fig.height=8}
# Use dataShort for plotting
histogram( ~ YZ + XZ + XY + LR | neurType, dataShort, type ="density")
histogram( ~ LR | Plane, allData, type ="density")
```

####GGplot2

```{r, fig.width=8, fig.height=8}
#https://www.r-graph-gallery.com/violin_grouped_ggplot2.html

# New variables releveled for plotting reasons
allData$NPFac <- factor(allData$NeurPlane)
levels(allData$NPFac) <- paste(rep(levels(allData$neurType), each = 3), rep(levels(allData$Plane), 3), sep=".")
# allData$NPFac <- factor(allData$NPFac, levels = rev(levels(allData$NPFac)))
allData$typeFac <- factor(allData$neurType, levels = rev(levels(allData$neurType)))

# Plot
ggplot(allData, aes(x = `LP`, y = `NPFac`, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis(name = "LP", option = "D") +
  labs(title = 'Length 2D Proyections') +
  theme_ipsum() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    )

# Plot
ggplot(allData, aes(x = `LR`, y = `neurType`, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis(name = "LP", option = "D") +
  labs(title = 'Length 2D Proyections') +
  theme_ipsum() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    )

# Plot
ggplot(allData, aes(x = LP, y = NPFac, fill = neurType)) +
  geom_density_ridges() +
  theme_ridges() + 
  theme(legend.position = "none")

# All together gradient
# dataShort$normLR <- dataShort$LR/dataShort$LR
# dataShort$normXY <- dataShort$LR/dataShort$XY
# dataShort$normXZ <- dataShort$LR/dataShort$XZ
# dataShort$normYZ <- dataShort$LR/dataShort$YZ
joinData <- melt(setDT(dataShort), id.vars = c("id","neurType"), variable.name = "length")
joinData <- joinData[order(joinData$id), ]
joinData$NeurPlane <-interaction(joinData$neurType, joinData$length, lex.order=T)

png(filename=paste("gradientDistributions.png", sep=""), type="cairo",
    units="in", width=10, height=10, pointsize=12,
    res=300)

ggplot(joinData, aes(x = `value`, y = `NeurPlane`, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis(name = "LP", option = "D") +
  labs(title = 'Length 2D Proyections') +
  theme_ipsum() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    )

dev.off()

# All together gradient (LOG)
joinData <- melt(setDT(dataShort), id.vars = c("id","neurType"), variable.name = "length")
joinData <- joinData[order(joinData$id), ]
joinData$NeurPlane <- interaction(joinData$neurType, joinData$length, lex.order=T)
joinData$logValue <- log(joinData$value)


ggplot(joinData, aes(x = `logValue`, y = `NeurPlane`, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis(name = "LP", option = "D") +
  labs(title = 'Length 2D Proyections') +
  theme_bw() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    )



# All together color by type
ggplot(joinData, aes(x = `value`, y = `NeurPlane`, fill = neurType)) +
  geom_density_ridges(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none")
```

####GGPlot2 Stacked

```{r, fig.width=20, fig.height=6}
# All together overlapping
# levels(joinData$length) <- rev(levels(joinData$length))
joinData$estim <- joinData$value*1.27
joinData[joinData$length == "LR", 6] <- joinData[joinData$length == "LR", 4]

png(filename=paste("gradientStacked.png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

ggplot(data=joinData, aes(x=value, fill=length)) +
  geom_density(adjust=1.5, alpha = .4) +
  theme_ipsum() +
  facet_wrap(~neurType, ncol = 4) +
  xlim(0, 2.5e05) +
  theme(
    legend.position="bottom",
    panel.spacing = unit(1, "lines"),
    axis.ticks.x=element_blank()
  )

dev.off()

png(filename=paste("gradientEstimation.png", sep=""), type="cairo",
    units="in", width=20, height=5, pointsize=12,
    res=300)

ggplot(data=joinData, aes(x=estim, fill=length)) +
  geom_density(adjust=1.5, alpha = .4) +
  theme_ipsum() +
  facet_wrap(~neurType, ncol = 4) +
  xlim(0, 2.5e05) +
  theme(
    legend.position="bottom",
    panel.spacing = unit(1, "lines"),
    axis.ticks.x=element_blank()
  )
 dev.off()
```


###Boxplot

```{r, fig.width=20, fig.height=5}
# Use dataShort for plotting
lrBx <- ggplot(dataShort, aes(x=neurType, y=LR, fill = neurType)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.1, fill = "white") + theme_minimal()

xyBx <- ggplot(dataShort, aes(x=neurType, y=XY, fill = neurType)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.1, fill = "white") + theme_minimal()

xzBx <- ggplot(dataShort, aes(x=neurType, y=XZ, fill = neurType)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.1, fill = "white") + theme_minimal()

yzBx <- ggplot(dataShort, aes(x=neurType, y=YZ, fill = neurType)) +
  geom_violin(trim=FALSE) +
  geom_boxplot(width=0.1, fill = "white") + theme_minimal()

ggarrange(lrBx, xyBx, xzBx, yzBx, ncol = 4, nrow = 1)
```

###Correlations

####LR vs LP by Neuron

```{r, fig.width=20}
par(mfrow = c(1,4))
plot(LR ~ LP, data = allData[allData$neurType == "Type1", ], pch = 21, bg = "coral", main = "Type1", xlim = c(0,250000), ylim = c(0, 300000))
abline(h=1000, v=1000, col = "gray", lty = "dashed")
plot(LR ~ LP, data = allData[allData$neurType == "Type2", ], pch = 21, bg = "lightblue", main = "Type2", xlim = c(0,250000), ylim = c(0, 300000))
abline(h=1000, v=1000, col = "gray", lty = "dashed")
plot(LR ~ LP, data = allData[allData$neurType == "Type3", ], pch = 21, bg = "lightgreen", main = "Type3", xlim = c(0,250000), ylim = c(0, 300000))
abline(h=1000, v=1000, col = "gray", lty = "dashed")
plot(LR ~ LP, data = allData[allData$neurType == "Type4", ], pch = 21, bg = "violet", main = "Type4", xlim = c(0,250000), ylim = c(0, 300000))
abline(h=1000, v=1000, col = "gray", lty = "dashed")
```


####LR vs LP by Plane

```{r, fig.width=15}
par(mfrow = c(1,3))
plot(LR ~ LP, data = allData[allData$Plane == "XY", ], pch = 21, bg = "red", main = "XY", xlim = c(0,250000), ylim = c(0, 300000))
abline(h=1000, v=1000, col = "gray", lty = "dashed")
plot(LR ~ LP, data = allData[allData$Plane == "XZ", ], pch = 21, bg = "blue", main = "XZ", xlim = c(0,250000), ylim = c(0, 300000))
abline(h=1000, v=1000, col = "gray", lty = "dashed")
plot(LR ~ LP, data = allData[allData$Plane == "YZ", ], pch = 21, bg = "green", main = "YZ", xlim = c(0,250000), ylim = c(0, 300000))
abline(h=1000, v=1000, col = "gray", lty = "dashed")
```

####LR vs LP by Neuron*Plane

```{r}
# Duplicate data to avoid confusion
data3D <- allData
# Add a number identifyng every neuron-plane combination
data3D$plaNum <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]), rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
# Plot
car::scatter3d(x = data3D$LP, y = data3D$LR, z = data3D$plaNum, group = data3D$neurType, 
               surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)

# Plot
dataXY.1v4 <- data3D[data3D$Plane == "XY" & (data3D$neurType == "Type1" | data3D$neurType == "Type4"), ]
car::scatter3d(x = dataXY.1v4$LP, y = dataXY.1v4$LR, z = dataXY.1v4$plaNum, group = dataXY.1v4$neurType, 
               surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
```

####Planes distance

```{r, fig.width=20, fig.height=5}
distWithin <- distPlan(allData)
distWithin$Combin <- interaction(distWithin$type, distWithin$substraction, lex.order = TRUE)

boxplot(distPlanes ~ substraction + type, distWithin)

# Boxplot
png(filename=paste("boxDistance.png", sep=""), type="cairo",
    units="in", width=5, height=5, pointsize=12,
    res=300)

bx1 <- distWithin %>%
  ggplot( aes(x=Combin, y=distPlanes, fill=type)) +
  geom_boxplot() +
  scale_fill_viridis(discrete = TRUE, alpha=0.6) +
  theme_ipsum() +
  theme(
    legend.position="none",
    plot.title = element_text(size=11),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
  ) +
  ggtitle("Distances plane boxplot") +
  xlab("")

bx1

dev.off()

# Boxlot with jitter
bx2 <- distWithin %>%
  ggplot( aes(x=Combin, y=distPlanes, fill=type)) +
  geom_boxplot() +
  scale_fill_viridis(discrete = TRUE, alpha=0.6) +
  geom_jitter(color = "black", size=0.1, alpha=0.9) +
  scale_color_viridis(discrete = TRUE, alpha=0.6) +
  theme_ipsum() +
  theme(
    legend.position="none",
    plot.title = element_text(size=11),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
  ) +
  ggtitle("A boxplot with jitter") +
  xlab("")

# Vertical violin
bx3 <- distWithin %>%
  ggplot(aes(fill=substraction, y=distPlanes, x=type)) + 
  geom_violin(position="dodge", alpha=0.5, outlier.colour="transparent") +
  scale_fill_viridis(discrete=T, name="") +
  theme_ipsum()  +
  xlab("") +
  ylab("Distance Within Types")

# Horizontal violin
bx4 <- distWithin %>%
  ggplot( aes(x=Combin, y=distPlanes, fill=type)) +
  geom_violin(width=2, size=0.1) +
  scale_fill_viridis(discrete=TRUE) +
  scale_color_viridis(discrete=TRUE) +
  theme_ipsum() +
  theme(
    legend.position="none"
  ) +
  coord_flip() + # This switch X and Y axis and allows to get the horizontal version
  xlab("") +
  ylab("Distance Within Types")

ggarrange(bx1, bx2, bx3, bx4, ncol = 4, nrow = 1)

```

#####Distances ggplot

```{r, fig.width=15, fig.height=5}
rest1 <- ggplot(distWithin[distWithin$substraction == "XY - XZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) +
  scale_fill_viridis(name = "LP", option = "D") +
  labs(title = 'XY - XZ') +
  theme_ipsum() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    )

rest2 <- ggplot(distWithin[distWithin$substraction == "XY - YZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) +
  scale_fill_viridis(name = "LP", option = "D") +
  labs(title = 'XY - YZ') +
  theme_ipsum() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    )

rest3 <- ggplot(distWithin[distWithin$substraction == "YZ - XZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) +
  scale_fill_viridis(name = "LP", option = "D") +
  labs(title = 'YZ - XZ') +
  theme_ipsum() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    )

ggarrange(rest1, rest2, rest3,  ncol = 3, nrow = 1)


car::scatter3d(x = distWithin[distWithin$substraction == "XY - XZ", 3], 
               y = distWithin[distWithin$substraction == "XY - YZ", 3], 
               z = distWithin[distWithin$substraction == "YZ - XZ", 3], 
               group = factor(rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)), 
               surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)

plot(x = distWithin[distWithin$substraction == "XY - XZ", 3], 
     y = distWithin[distWithin$substraction == "XY - YZ", 3], 
     pch = 21,
     bg = factor(rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)))
```

##Empirical error

```{r, fig.width=15}
# https://stackoverflow.com/questions/12394321/r-what-algorithm-does-geom-density-use-and-how-to-extract-points-equation-of
# https://stackoverflow.com/questions/24985361/understanding-bandwidth-smoothing-in-ggplot2

dataEmpirical <- allData

dataEmpirical$alpha <- dataEmpirical$LR/dataEmpirical$LP

#######################################
# Estimate alphas and use grand alpha #
#######################################

# Grand alpha
dataEmpirical$error <- 100*(1 - mean(dataEmpirical$alpha)/dataEmpirical$alpha)

# Plot
ga <- ggplot(dataEmpirical, aes(x = `error`, y = `interact`, fill = neurType)) +
  geom_density_ridges(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none") +
  ggtitle("Grand Alpha")

#######################################
# Estimate alphas and use local alpha #
#######################################

# Find mean alpha per group
meanAlpha <- aggregate(alpha ~ neurType + Plane, dataEmpirical, mean)
dataEmpirical$localErr <- dataEmpirical$error

# Divide by max LR per axon type and plane
for (i in c("Type1", "Type2", "Type3", "Type4")) {
  for (j in c("XY", "XZ", "YZ")) {
    # Subset and divide alpha
    alphaVec <- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 8] 
    localAlpha <- meanAlpha[meanAlpha$neurType == i & meanAlpha$Plane == j, 3]
    dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 10] <- 100*(1 - localAlpha/alphaVec)
  }
}

# Plot
la <-ggplot(dataEmpirical, aes(x = `localErr`, y = `interact`, fill = neurType)) +
  geom_density_ridges(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none") +
  ggtitle("Local Alpha")


###############
# Fixed alpha #
###############

dataEmpirical$fixedError <- 100*(1 - 1.268/dataEmpirical$alpha)

fa <- ggplot(dataEmpirical, aes(x = `fixedError`, y = `interact`, fill = neurType)) +
  geom_density_ridges(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none") +
  ggtitle("Fixed Alpha")


##############
# Joint plot #
##############

png(filename=paste("alphasComparison_LRbyLP.png", sep=""), type="cairo",
    units="in", width=15, height=6, pointsize=12,
    res=300)

ggarrange(ga, la, fa, ncol=3, nrow=1)

dev.off()
```

###Equal Bandwidth

```{r, fig.width=15}
# https://stackoverflow.com/questions/12394321/r-what-algorithm-does-geom-density-use-and-how-to-extract-points-equation-of
# https://stackoverflow.com/questions/24985361/understanding-bandwidth-smoothing-in-ggplot2

dataEmpirical <- allData

dataEmpirical$alpha <- dataEmpirical$LR/dataEmpirical$LP

bw <- 1

#######################################
# Estimate alphas and use grand alpha #
#######################################

# Grand alpha
dataEmpirical$error <- 100*(1 - mean(dataEmpirical$alpha)/dataEmpirical$alpha)

# Plot
ga <- ggplot(dataEmpirical, aes(x = `error`, y = `interact`, fill = neurType)) +
  geom_density_ridges(alpha = 0.8, bandwidth = bw) +
  theme_ridges() + 
  theme(legend.position = "none") +
  ggtitle("Grand Alpha")

#######################################
# Estimate alphas and use local alpha #
#######################################

# Find mean alpha per group
meanAlpha <- aggregate(alpha ~ neurType + Plane, dataEmpirical, mean)
dataEmpirical$localErr <- dataEmpirical$error

# Divide by max LR per axon type and plane
for (i in c("Type1", "Type2", "Type3", "Type4")) {
  for (j in c("XY", "XZ", "YZ")) {
    # Subset and divide alpha
    alphaVec <- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 8] 
    localAlpha <- meanAlpha[meanAlpha$neurType == i & meanAlpha$Plane == j, 3]
    dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 10] <- 100*(1 - localAlpha/alphaVec)
  }
}

# Plot
la <-ggplot(dataEmpirical, aes(x = `localErr`, y = `interact`, fill = neurType)) +
  geom_density_ridges(alpha = 0.8, bandwidth = bw) +
  theme_ridges() + 
  theme(legend.position = "none") +
  ggtitle("Local Alpha")


###############
# Fixed alpha #
###############

dataEmpirical$fixedError <- 100*(1 - 1.268/dataEmpirical$alpha)

fa <- ggplot(dataEmpirical, aes(x = `fixedError`, y = `interact`, fill = neurType)) +
  geom_density_ridges(alpha = 0.8, bandwidth = bw) +
  theme_ridges() + 
  theme(legend.position = "none") +
  ggtitle("Fixed Alpha")


##############
# Joint plot #
##############

png(filename=paste("alphasComparison_LRbyLP_bw", bw, ".png", sep=""), type="cairo",
    units="in", width=15, height=6, pointsize=12,
    res=300)

ggarrange(ga, la, fa, ncol=3, nrow=1)

dev.off()
```

###Histograms

```{r, fig.width=8, fig.height=8}
# Use dataShort for plotting
histogram( ~ error | Plane + neurType, dataEmpirical, nint = 200, type ="density")
histogram( ~ localErr | Plane + neurType, dataEmpirical, nint = 200, type ="density")
histogram( ~ fixedError | Plane + neurType, dataEmpirical, nint = 200, type ="density")
```


###Error vs LP

```{r}
##  Create cuts:
x_c <- cut(dataEmpirical$LP, 50)
y_c <- cut(dataEmpirical$fixedError, 50)

##  Calculate joint counts at cut levels:
z <- table(x_c, y_c)

##  Plot as a 3D histogram:
plot3D::hist3D(dataEmpirical$LP, dataEmpirical$fixedError, border="black")

##  Plot as a 2D heatmap:
plot3D::image2D(z=z, border="black")


gplots::hist2d(dataEmpirical[, c(3,11)], nbins=50)
```



##OLS

```{r, fig.width=15, fig.height=10}
##############
# Fit models #
##############

# mod3 <- lm(LR ~ LP*neurType*Plane, data = allData[allData$neurType != "Type4", ])
mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData[allData$neurType != "Type4", ])
mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData)
mod2 <- lm(LR ~ LP*neurType + LP*Plane + neurType*Plane, data = allData)
mod1 <- lm(LR ~ LP + neurType + Plane, data = allData)

# Summmaries and anovas
anova(mod3, mod2) # 3rd level interaction seems significant
summary(mod3)
car::Anova(mod3)
# RLS2 robust variance
summaryRobust(mod3, robust = TRUE)


# Do some plotting to check lines
interact_plot(model = mod3, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, 
                    mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))

interact_plot(model = mod3, pred = LP, modx = neurType, mod2 = Plane, plot.points = TRUE, interval = T, 
                    modx.labels = c("Type1", "Type2", "Type3", "Type4"), mod2.labels = c("XY", "XZ", "YZ"))

# Pruebas
mod3.XY <- lm(LR ~ LP*neurType - 1, data = allData[allData$Plane == "XY" & allData$neurType != "Type4", ])
car::Anova(mod3.XY)
summary(mod3.XY)

#######
# NAs #
#######
# Test that NAs happen, when no intercept or main effects are present, for the dummy factor level that is not used as reference because one level is a linear combination of the others
mod.Intercept <- lm(LR ~ LP + LP:neurType + LP:Plane + neurType:Plane + LP:neurType:Plane, data = allData)
modNA.neur4 <- lm(LR ~ LP + LP:neurType + LP:Plane + neurType:Plane + LP:neurType:Plane - 1, data = allData)
modNA.planYZ <- lm(LR ~ LP + LP:Plane + LP:neurType + neurType:Plane + LP:Plane:neurType - 1, data = allData)

summary(modNA.neur4)

# Do some plotting to check lines
interact_plot(model = mod.Intercept, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, 
                    mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
interact_plot(model = modNA.neur4, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, 
                    mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
```

###Diagnosis

```{r, fig.width=15, fig.height=10}
# Diagnosis
par(mfrow = c(2, 3))
plot(mod3, 1:5)
abline(h=c(-3,3), col="gray", lty="dashed")

############################
# Discard by Cook distance #
############################
# Augment model with diagnosis
mod3.diag.metrics <- broom::augment(mod3)
# Find observations that are highly influential
cookDist <- mod3.diag.metrics[mod3.diag.metrics$.cooksd > 0.02, ]
# Find rows and IDs of those observations to eliminate its participation in all planes
remRows <- sapply(cookDist$.rownames, function(x) which(rownames(allData) == x))
remIDs <- sapply(cookDist$.rownames, function(x) allData[which(rownames(allData) == x), 1])
remRowsIDs <- as.vector(sapply(unique(remIDs), function(x) which(allData$id == x)))
# Discard highly influential observations
allData2 <- allData[-remRowsIDs, ]
# Refit
mod3.2 <- lm(LR ~ LP + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData2)
summary(mod3.2)
# Diagnosis
par(mfrow = c(2, 3))
plot(mod3.2, 1:5)
abline(h=c(-3,3), col="gray", lty="dashed")

############################
# Discard by Strd Residual #
############################
# Augment model with diagnosis
mod3.diag.metrics <- broom::augment(mod3)
# Find observations that are highly influential
stdResid <- mod3.diag.metrics[mod3.diag.metrics$.std.resid > 3, ]
# Find rows and IDs of those observations to eliminate its participation in all planes
remRows <- sapply(stdResid$.rownames, function(x) which(rownames(allData) == x))
remIDs <- sapply(stdResid$.rownames, function(x) allData[which(rownames(allData) == x), 1])
remRowsIDs <- as.vector(sapply(unique(remIDs), function(x) which(allData$id == x)))
# Discard highly influential observations
allData3 <- allData[-remRowsIDs, ]
# Refit
mod3.3 <- lm(LR ~ LP*neurType*Plane, data = allData3)
summary(mod3.3)
# Diagnosis
par(mfrow = c(2, 3))
plot(mod3.3, 1:5)
abline(h=c(-3,3), col="gray", lty="dashed")

####################
# Discard outliers #
####################
# Use multivariate Mahalanobis distance and add to allData
mahalDist <- by(allData, allData$neurType, function(x) mahalanobis(x[,2:3], colMeans(x[,2:3]), cov(x[,2:3])))
allData$mahalDist <- unlist(mahalDist)
# Sort from higher from lower Mahalanobis distance per neuron type
mahalSort <- by(allData, allData$neurType, function(x) x[order(x$mahalDist, decreasing = TRUE),])
# Store as data frame
mahalSort <- do.call("rbind", mahalSort)
# Discard observations with Mahalanobis distance >= 30
allData4 <- mahalSort[mahalSort$mahalDist < 20, ]
# Refit
mod3.4 <- lm(LR ~ LP*neurType*Plane, data = allData4)
summary(mod3.4)
# Diagnosis
par(mfrow = c(2, 3))
plot(mod3.4, 1:5)
abline(h=c(-3,3), col="gray", lty="dashed")
# 3D PLOT Add a number identifyng every neuron-plane combination
data3D$mahalDist <- unlist(mahalDist)
data3Dmahal <- data3D[data3D$mahalDist < 20, ]
rgl::open3d()
car::scatter3d(x = data3Dmahal$LP, y = data3Dmahal$LR, z = data3Dmahal$plaNum, group = data3Dmahal$neurType, 
               surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
# Do some plotting to check lines
interact_plot(model = mod3.4, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, 
                    mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))

###########################
# Discard outliers by iqr #
###########################
# https://easystats.github.io/performance/reference/check_outliers.html
iqr <- by(allData[,2:3], attributes(check_outliers(mod3, method = "mahalanobis")))
mod3.5 <- update(mod3, .~., data = allData[-as.numeric(which(iqr$data$Outlier == TRUE)), ])
summary(mod3.5)
# Diagnosis
par(mfrow = c(2, 3))
plot(mod3.5, 1:5)
abline(h=c(-3,3), col="gray", lty="dashed")
# 3D PLOT Add a number identifyng every neuron-plane combination
data3Diqr <- data3D[-as.numeric(which(iqr$data$Outlier == TRUE)), ]
rgl::open3d()
car::scatter3d(x = data3Diqr$LP, y = data3Diqr$LR, z = data3Diqr$plaNum, group = data3Diqr$neurType, 
               surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
# Do some plotting to check lines
interact_plot(model = mod3.5, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, 
                    mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
```

###One hot coding

```{r, fig.width=8, fig.height=8}
onehotData <- one_hot(as.data.table(allData))
  
# Fit models
mod1 <- lm(LR ~ LP + neurType_Type2 + neurType_Type3 + neurType_Type4 + Plane_XZ + Plane_YZ + LP:neurType_Type2 + LP:neurType_Type3, data = onehotData)

# Summmaries and anovas
summary(mod1)

# Diagnosis
par(mfrow = c(2, 2))
plot(mod1)
```

###Partial regression

```{r}
# http://www.colbyimaging.com/wiki/statistics/multiple-regression
lm.lr <- lm(LR ~ neurType*Plane, data = allData)
lm.lp <- lm(LP ~ neurType*Plane, data = allData)
lm.Res <- lm(lm.lr$res ~ lm.lp$res:as.factor(allData$NeurPlane) + 0)
summary(lm.Res)
```


###Fixed/variable intercept/slopes

```{r}
############################
# Allow variable intercept #
############################
modIncp.neur  <- lm(LR ~ LP + neurType, data = allData)
modIncp.plane  <- lm(LR ~ LP + Plane, data = allData)
modIncp  <- lm(LR ~ LP + neurType + Plane, data = allData)

summary(modIncp.neur)
summary(modIncp.plane)
summary(modIncp)

aggregate(LR ~ neurType, allData, mean)
aggregate(LP ~ neurType, allData, mean)
aggregate(LR ~ Plane, allData, mean)

############################
# Allow variable slope #
############################
modSlope.neur  <- lm(LR ~ LP + LP:neurType, data = allData)
modSlope.plane  <- lm(LR ~ LP + LP:Plane, data = allData)
modSlope  <- lm(LR ~ LP + LP:neurType + LP:Plane, data = allData)

summary(modSlope.neur)
summary(modSlope.plane)
summary(modSlope)
```



###Contr sum

```{r}
dataSum <- allData
contr.sum(5)
contrasts(dataSum$neurType) <- contr.sum(4)
contrasts(dataSum$Plane) <- contr.sum(3)

mod.Sum <- lm(LR ~ LP + LP:neurType + LP:Plane + LP:neurType:Plane, data = dataSum)
summary(mod.Sum)
```

```{r}
nT <- allData$neurType
p <- allData$Plane
l <- allData$LP
lr <- allData$LR
X0 <- model.matrix(~ nT + p,
                   contrasts.arg = list(nT = contr.treatment(n = 4, contrasts = FALSE),
                                        p = contr.treatment(n = 3, contrasts = FALSE)))

f <- sample(gl(3, 4, labels = letters[1:3]))

unname( rowSums(X0[, c("nT1", "nT2", "nT3", "nT4")]) )

unname( rowSums(X0[, c("p1", "p2", "p3")]) )

qr(X0)$rank

summary(lm(lr ~ l + X0 - 1)) 
```


##RLS1

```{r, fig.width=15, fig.height=10}
# Weight observations by residual size
# https://stats.idre.ucla.edu/r/dae/robust-regression/
# mod3 <- lm(LR ~ LP*neurType*Plane, data = allData[allData$neurType != "Type4", ])
# mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData[allData$neurType != "Type4", ])
mod3R <- rlm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData)
mod2R <- rlm(LR ~ LP*neurType + LP*Plane + neurType*Plane - 1, data = allData)
mod1R <- rlm(LR ~ LP + neurType + Plane - 1, data = allData)

# Summmaries and anovas
(sumMod <- summary(mod3R))
# Get which coefficients are significative, although it may not be correct to use t distribution for H0 in RLS
# https://stat.ethz.ch/pipermail/r-help/2006-July/108659.html
attributes(sumMod$coefficients)$dimnames[[1]][which(rob.pvals(mod3R) < 0.05)]
car::Anova(mod3R)

# Do some plotting to check lines
interact_plot(model = mod3R, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, 
                    mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))

interact_plot(model = mod3R, pred = LP, modx = neurType, mod2 = Plane, plot.points = TRUE, interval = T, 
                    modx.labels = c("Type1", "Type2", "Type3", "Type4"), mod2.labels = c("XY", "XZ", "YZ"))
```

##RLS2

```{r}
# Robust variance and clustering
mod3 <- estimatr::lm_robust(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData, clusters = id)
summary(mod3)
```


###Diagnosis

```{r}
# Diagnosis
par(mfrow = c(2, 3))
plot(mod3R, 1:5)
abline(h=c(-3,3), col="gray", lty="dashed")
```


##GEE

```{r, fig.width=15, fig.height=10}
# Fit models
strucor <- "independence"
strucor <- "exchangeable"
dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)


mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataNo4, id = id, corstr = strucor)
# mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData, id = id, corstr = strucor)
mod2Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane - 1, data = dataNo4, id = id)
mod1Gee <- geeglm(LR ~ LP + neurType + Plane - 1, data = dataNo4, id = id)

# Summmaries and anovas
coef(mod3Gee)
anova(mod3Gee, mod2Gee, test = "F")
summary(mod3Gee)
anova(mod3Gee, test = "Chi")

# Varriable selection
gee_stepper(mod1Gee, formula(mod3Gee)) # also needs strucor
```

###Change ref level

```{r}
# Change reference level
dataNo4$neurType <- relevel(dataNo4$neurType, ref="Type2")
dataNo4$Plane <- relevel(dataNo4$Plane, ref="YZ")

mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataNo4, id = id, corstr = strucor)
summary(mod3Gee)
```


###Check correlation matrix

We can also use the naive and robust variance estimates to select a correlation model. The model whose robust variance estimates most closely resembles its naive variance estimates is the better correlation model. To obtain a single summary statistic for this comparison I use the entire parameter covariance matrix and sum the absolute differences between the naive and robust covariance matrices.

Link: https://sakai.unc.edu/access/content/group/2842013b-58f5-4453-aa8d-3e01bacbfc3d/public/Ecol562_Spring2012/docs/lectures/lecture23.htm#checking

```{r}
# Select data
datToMod <- allData

# Check empirical correlation matrix
mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod)
xyRes <- mod3$residuals[datToMod$Plane == "XY"]
xzRes <- mod3$residuals[datToMod$Plane == "XZ"]
yzRes <- mod3$residuals[datToMod$Plane == "YZ"]
Hmisc::rcorr(cbind(xyRes, xzRes, yzRes))

# Fit models with differente corr mat
mod3Gee.In <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod, 
                     id = id, corstr = "independence")
mod3Gee.Ex <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod, 
                     id = id, corstr = "exchangeable")
mod3Gee.Ar <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod, 
                     id = id, corstr = "ar1")
mod3Gee.Un <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod, 
                     id = id, corstr = "unstructured")

# Deviation from naive variance
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)

# QIC criteria
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))

# Check multiple comparisons
K1 <- glht(mod3Gee.Ex, mcp(neurType = "Tukey"))$linfct
K2 <- glht(mod3Gee.Ex, mcp(Plane = "Tukey"))$linfct
summary(glht(mod3Gee.Ex, linfct = rbind(K1, K2)))

mod3Gee.check <- geeglm(LR ~ LP + interact + LP:interact, data = datToMod, 
                     id = id, corstr = "exchangeable")
summary(glht(mod3Gee.check, linfct = mcp(interact = "Tukey")))

# Calculate means
meanCalc <- aggregate(LR ~ neurType + Plane, data = allData, mean)
summary(geeglm(LR ~ LP + neurType + Plane, data = datToMod, 
                     id = id, corstr = "exchangeable"))
```

###log transformation

```{r}
# Select data
datToMod <- dataNo4
datToMod$logLP <- log(datToMod$LP)
datToMod$logLR <- log(datToMod$LR)

# Fit models with differente corr mat
mod3Gee.In <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1, 
                     data = datToMod, id = id, corstr = "independence")
mod3Gee.Ex <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1, 
                     data = datToMod, id = id, corstr = "exchangeable")
mod3Gee.Ar <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1, 
                     data = datToMod, id = id, corstr = "ar1")
mod3Gee.Un <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1, 
                     data = datToMod, id = id, corstr = "unstructured")

# Deviation from naive variance
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)

# QIC criteria
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))

# Check the best option
summary(mod3Gee.Ex)
anova(mod3Gee.Ex)
```

###Standarize

```{r}
# List order is: Type1XY, Type2XY, Type3XY, Type1XZ...
LPstand <- by(dataNo4$LP, list(dataNo4$neurType, dataNo4$Plane), scale)
LRstand <- by(dataNo4$LR, list(dataNo4$neurType, dataNo4$Plane), scale)

# Number of observations
numObs <- unlist(lapply(LPstand[1:3], function(x) length(x)))

# Create data frame
dataStand <- data.frame(id = rep(1:sum(numObs), 3), 
                        LR = unlist(LRstand), 
                        LP = unlist(LPstand),
                        Plane = rep(c("XY", "XZ", "YZ"), each = sum(numObs)),
                        neurType = rep(rep(c("Type1", "Type2", "Type3"), times = numObs), 3))
dataStand <- dataStand[order(dataStand$id), ]
dataStand$interact <- interaction(dataStand$neurType, dataStand$Plane, lex.order=T)

# Plot
ggplot(dataStand, aes(x = `LP`, y = `interact`, fill = neurType)) +
  geom_density_ridges(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none")

# Fit Gee
# dataStand$neurType <- relevel(dataNo4$neurType, ref="Type1")
# dataStand$Plane <- relevel(dataNo4$Plane, ref="XY")
standGee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataStand, id = id, corstr = "exchangeable")
standMain <- geeglm(LR ~ LP + neurType + Plane - 1, data = dataStand, id = id, corstr = "exchangeable")
summary(standGee)
anova(standGee)
anova(standGee, standMain)
```

####Bootstrap

```{r, fig.width=10, fig.height=5}
# Bootstrap for inference
# Ensure reproducibility
# set.seed(12345)
# bootInter <- bootCoefint(dataStand, fullMod = T, "exchangeable", 500)

# saveRDS(bootInter, "bootCoefsInteraction.rds")

# Store as data frame
options(digits=10)
bootFrame <- data.frame(Type1XY = bootInter$LP, 
                        Type1XZ = bootInter$LP + bootInter$LP.PlaneXZ, 
                        Type1YZ = bootInter$LP + bootInter$LP.PlaneYZ,
                        Type2XY = bootInter$LP + bootInter$LP.neurTypeType2, 
                        Type2XZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneXZ,
                        Type2YZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneYZ,
                        Type3XY = bootInter$LP + bootInter$LP.neurTypeType3, 
                        Type3XZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneXZ,
                        Type3YZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneYZ)
bootMelt <- reshape2::melt(bootFrame)
bootMelt$neurType <- rep(c("Type1", "Type2", "Type3"), each = 1500)
bootMelt$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = 500), 3)

#Represent LP
ggNeur <- ggplot(data=bootMelt, aes(x=value, fill=Plane)) +
  geom_density(adjust=1.5, alpha = .4) +
  theme_bw() +
  facet_wrap(~neurType, ncol = 3) +
  theme(
    legend.position="bottom",
    panel.spacing = unit(1, "lines"),
    axis.ticks.x=element_blank()
  )

ggPlanes <- ggplot(data=bootMelt, aes(x=value, fill=neurType)) +
  geom_density(adjust=1.5, alpha = .4) +
  theme_bw() +
  facet_wrap(~Plane, ncol = 3) +
  theme(
    legend.position="bottom",
    panel.spacing = unit(1, "lines"),
    axis.ticks.x=element_blank()
  )

ggarrange(ggNeur, ggPlanes, ncol = 2, nrow = 1)
```

#####CI95 plot

```{r}
# Extract the quantiles we need
quant <- apply(bootFrame, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7))
quantFrame <- reshape2::melt(quant)
quantFrame$y <- rep(1:9, each=3)

# Plot CI95
plot(0, type="n", xlim=c(0.9,1.1), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
ytick <- seq(1, 9, by=1)
axis(side=2, at=ytick, labels = FALSE)
text(par("usr")[1], ytick,  
     labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)

for (i in seq(1, 27,3)) {
  segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
  points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
}
abline(v=c(1.268), lty = "dashed", col = "gray")
text(x=1.2765, y=10.1, "1.268",col="gray", font=2, cex=0.6)
```

###Divide max LR

```{r}
# Find max LR
maxLR <- aggregate(LR ~ neurType + Plane, dataNo4, max)
dataMax <- dataNo4

# Divide by max LR per axon type and plane
for (i in c("Type1", "Type2", "Type3")) {
  for (j in c("XY", "XZ", "YZ")) {
    # Subset and divide LR
    lr <- dataMax[dataMax$neurType == i & dataMax$Plane == j, 2] 
    dataMax[dataMax$neurType == i & dataMax$Plane == j, 2] <- lr/maxLR[maxLR$neurType == i & maxLR$Plane == j, 3]
    
    # Subset and divide LP
    lp <- dataMax[dataMax$neurType == i & dataMax$Plane == j, 3] 
    dataMax[dataMax$neurType == i & dataMax$Plane == j, 3] <- lp/maxLR[maxLR$neurType == i & maxLR$Plane == j, 3]
  }
}

# Plot
ggplot(dataMax, aes(x = `LP`, y = `interact`, fill = neurType)) +
  geom_density_ridges(alpha = 0.8) +
  theme_ridges() + 
  theme(legend.position = "none")

# Fit GEE
maxGee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataMax, id = id, corstr = "exchangeable")
summary(maxGee)
anova(maxGee)

```


###neurType*Plane included

```{r}
# Select data
datToMod <- dataNo4
contrasts(datToMod$neurType) <- contr.treatment
contrasts(datToMod$Plane) <- contr.treatment

# Fit models with differente corr mat
mod3Gee.In <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "independence")
mod3Gee.Ex <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "exchangeable")
mod3Gee.Ar <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "ar1")
mod3Gee.Un <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "unstructured")

# Deviation from naive variance
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)

# QIC criteria
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))

# Exchangeable seems the best option
mod3Gee.Ex.Int <- geeglm(LR ~ LP*interact, data = datToMod, id = id, corstr = "exchangeable")
# Multiple comparisons FWER
comparMult <- glht(mod3Gee.Ex.Int, linfct = mcp(interact = "Tukey"))
summary(comparMult)

# Multiple comparisons FDR
summary(comparMult, adjust = "bonferroni")

# Variable selection
mod1Gee <- geeglm(LR ~ 1, data = datToMod, id = id, corstr = "exchangeable")
gee_stepper(mod1Gee, formula(mod3Gee.Ex))
summary(mod3Gee.Ex)
anova(mod3Gee.Ex)
```

####log transformation

```{r}
# Select data
datToMod <- dataNo4
datToMod$interact <- factor(datToMod$interact)
datToMod$logLP <- log(datToMod$LP)
datToMod$logLR <- log(datToMod$LR)
contrasts(datToMod$neurType) <- contr.treatment
contrasts(datToMod$Plane) <- contr.treatment

# Fit models with differente corr mat
mod3Gee.In <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "independence")
mod3Gee.Ex <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "exchangeable")
mod3Gee.Ar <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "ar1")
mod3Gee.Un <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "unstructured")

# Deviation from naive variance
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)

# QIC criteria
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))

# Check the best option
summary(mod3Gee.Ex)
```

####1/(y^2)

```{r}
# Select data
datToMod <- allData
datToMod$interact <- factor(datToMod$interact)
datToMod$wei <- 1/((datToMod$LR)^2)
contrasts(datToMod$neurType) <- contr.treatment
contrasts(datToMod$Plane) <- contr.treatment

# Fit models with differente corr mat
mod3Gee.In <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "independence")
mod3Gee.Ex <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "exchangeable")
mod3Gee.Ar <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "ar1")
mod3Gee.Un <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "unstructured")

# Deviation from naive variance
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)

# QIC criteria
sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))

# Check the best option
summary(mod3Gee.Un)
```


##Bootstrapped coefficients

```{r}
# Store bootstrapped coefficients in an empty list
listBoot <- list()
count <- 1
reps <- 4000

# Iterate through all axon types and planes
# Ensure reproducibility
# set.seed(12345)
for (i in c("Type1", "Type2", "Type3", "Type4")) {
  for (j in c("XY", "XZ", "YZ")) {
    listBoot[[count]] <- bootCoef(allData, i, j, "exchangeable", reps)
    count <- count + 1
  }
}
# saveRDS(listBoot, "bootCoefs400reps.rds")

# Store as data frame
bootFrame <- do.call(rbind, listBoot)
bootFrame$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), each = reps*3)
bootFrame$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = reps), 4)
bootFrame$NeurPlane <- interaction(bootFrame$neurType, bootFrame$Plane, lex.order=T)

# Extract the quantiles we need
listQuant <- list()
count <- 1
quantToEst <- c(0.025,0.05,0.95,0.975)

# Iterate through all axon types and planes
for (i in c("Type1", "Type2", "Type3", "Type4")) {
  for (j in c("XY", "XZ", "YZ")) {
    listQuant[[count]] <- quantile(bootFrame[bootFrame$neurType == i & bootFrame$Plane == j, 2], c(0.025,0.05,0.95,0.975))
    count <- count + 1
  }
}

# Store quantiles in data frame
quantMat <- as.data.frame(do.call(rbind, listQuant))
quantMat$NeurPlane <- factor(paste(rep(c("Type1", "Type2", "Type3", "Type4"), each = 3), rep(c("XY", "XZ", "YZ"), each = 4), sep="."))
quantFrame <- reshape2::melt(quantMat)
```

```{r}
#Represent the CI
ggplot(data=bootFrame, aes(x=LP, fill=Plane)) +
  geom_density(adjust=1.5, alpha = .4) +
  theme_bw() +
  facet_wrap(~neurType, ncol = 4) +
  theme(
    legend.position="bottom",
    panel.spacing = unit(1, "lines"),
    axis.ticks.x=element_blank()
  )
```



###Simulate experimental noise

####Check intra-cluster variance

```{r}
intraVar <- aggregate(LP ~ id, data = allData, function(x) sd(x)/mean(x))
intraVar$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)

png(filename=paste("intraclusterCV.png", sep=""), type="cairo",
    units="in", width=5, height=5, pointsize=12,
    res=300)

intraVar %>%
  ggplot( aes(x=neurType, y=LP, fill=neurType)) +
  geom_boxplot() +
  scale_fill_viridis(discrete = TRUE, alpha=0.6) +
  theme_bw() + 
  theme(
    legend.position="none",
    plot.title = element_text(size=11),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
  ) +
  xlab("Axonal type") +
  ylab("Intracluster Variance")

dev.off()
```

####Add normal noise

```{r}
set.seed(12345)

# type1Noise <- rnorm()
```

##Figure scripts OLD

####Intracluster variance

Same chunk as "Check intra-cluster variance" within "Bootstrapped coefficients"

```{r}
intraVar <- aggregate(LP ~ id, data = allData, function(x) sd(x)/mean(x))
intraVar$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)

#Ggplot
png(filename=paste("intraclusterCV.png", sep=""), type="cairo",
    units="in", width=5, height=5, pointsize=12,
    res=300)

intraVar %>%
  ggplot( aes(x=neurType, y=LP, fill=neurType)) +
  geom_boxplot() +
  scale_fill_viridis(discrete = TRUE, alpha=0.6) +
  theme_bw() + 
  theme(
    legend.position="none",
    plot.title = element_text(size=11),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
  ) +
  xlab("Axonal type") +
  ylab("Intracluster Variance")

dev.off()

#Base R
png(filename=paste("intraclusterCV_baseR.png", sep=""), type="cairo",
    units="in", width=5, height=5, pointsize=12,
    res=300)

boxplot(LP ~ neurType, data = intraVar, col= c("violet", "lightblue", "lightgreen", "tan1"), 
        outpch = 19, outbg = "black", outcol = "black", cex = .7, ylim = c(0,0.4))

dev.off()
```

###Inference

#####Original model bootstrap

Same chunk as "Check correlation matrix" within "GEE"

```{r, fig.width=10, fig.height=5}
# Bootstrap for inference
# Ensure reproducibility
# set.seed(12345)
# bootInter <- bootCoefint(dataNo4, "LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1", "exchangeable", 2000)

# saveRDS(bootInter, "bootCoefsInteraction.rds")
bootInter <- readRDS("bootCoefsInteraction.rds")

# Store as data frame
options(digits=10)
bootFrame <- data.frame(Type1XY = bootInter$LP, 
                        Type1XZ = bootInter$LP + bootInter$LP.PlaneXZ, 
                        Type1YZ = bootInter$LP + bootInter$LP.PlaneYZ,
                        Type2XY = bootInter$LP + bootInter$LP.neurTypeType2, 
                        Type2XZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneXZ,
                        Type2YZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneYZ,
                        Type3XY = bootInter$LP + bootInter$LP.neurTypeType3, 
                        Type3XZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneXZ,
                        Type3YZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneYZ)
bootMelt <- reshape2::melt(bootFrame)
bootMelt$neurType <- rep(c("Type1", "Type2", "Type3"), each = 6000)
bootMelt$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = 2000), 3)

#Represent LP
ggNeur <- ggplot(data=bootMelt, aes(x=value, fill=Plane)) +
  geom_density(adjust=1.5, alpha = .4) +
  theme_bw() +
  facet_wrap(~neurType, ncol = 3) +
  theme(
    legend.position="bottom",
    panel.spacing = unit(1, "lines"),
    axis.ticks.x=element_blank()
  )

ggPlanes <- ggplot(data=bootMelt, aes(x=value, fill=neurType)) +
  geom_density(adjust=1.5, alpha = .4) +
  theme_bw() +
  facet_wrap(~Plane, ncol = 3) +
  theme(
    legend.position="bottom",
    panel.spacing = unit(1, "lines"),
    axis.ticks.x=element_blank()
  )

png(filename=paste("bootstrapLP_neurvsplane.png", sep=""), type="cairo",
    units="in", width=10, height=3, pointsize=12,
    res=300)
ggarrange(ggNeur, ggPlanes, ncol = 2, nrow = 1)
dev.off()
```

#####CI95 plot

```{r}
# Extract the quantiles we need
quant <- apply(bootFrame, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7))
quantFrame <- reshape2::melt(quant)
quantFrame$y <- rep(1:9, each=3)

# Plot CI95
png(filename=paste("ci95.png", sep=""), type="cairo",
    units="in", width=5, height=5, pointsize=12,
    res=300)

plot(0, type="n", xlim=c(1.20,1.35), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
ytick <- seq(1, 9, by=1)
axis(side=2, at=ytick, labels = FALSE)
text(par("usr")[1], ytick,  
     labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)

for (i in seq(1, 27,3)) {
  segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
  points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
}
abline(v=c(1.268), lty = "dashed", col = "gray")
text(x=1.2765, y=10.1, "1.268",col="gray", font=2, cex=0.6)

dev.off()

# Zoom to check whether 2 and 7 overlap or not
png(filename=paste("ci95zoom.png", sep=""), type="cairo",
    units="in", width=5, height=5, pointsize=12,
    res=300)

plot(0,type="n", xlim=c(1.26,1.27), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
ytick<-seq(1, 9, by=1)
axis(side=2, at=ytick, labels = FALSE)
text(par("usr")[1], ytick,  
     labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)

for (i in seq(1, 27,3)) {
  segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
  points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
}
abline(v=c(1.268), lty = "dashed", col = "gray")

dev.off()
```

#####Global LP

```{r}
# Grand LP by sum of bootstrapped coeffients
globalLP <- data.frame(LP = bootInter$LP + bootInter$LP.PlaneXZ + bootInter$LP.PlaneYZ + bootInter$LP.neurTypeType2 + bootInter$LP.neurTypeType2.PlaneXZ + bootInter$LP.neurTypeType2.PlaneYZ + bootInter$LP.neurTypeType3 + bootInter$LP.neurTypeType3.PlaneXZ + bootInter$LP.neurTypeType3.PlaneYZ)
globalQuant <- quantile(globalLP$LP, c(0.025, 0.5, 0.975), type=7)

sumLP <- ggplot(data=globalLP, aes(x=LP)) +
  geom_density(adjust=1.5, alpha = .4) +
  geom_vline(xintercept = globalQuant, linetype="dotted", 
                color = "blue") + 
  theme_bw() +
  ggtitle("Sum LP") +
  theme(
    legend.position="bottom",
    panel.spacing = unit(1, "lines"),
    axis.ticks.x=element_blank()
  )

# Grand LP by bootstrapping
# Ensure reproducibility
# set.seed(12345)
# bootLP <- bootCoefint(dataNo4, "LR ~ LP - 1", "exchangeable", 2000)
bootLPQuant <- quantile(bootLP$LP, c(0.025, 0.5, 0.975), type=7)

bootLP <- ggplot(data=bootLP, aes(x=LP)) +
  geom_density(adjust=1.5, alpha = .4) +
  geom_vline(xintercept = bootLPQuant, linetype="dotted", 
                color = "blue") + 
  theme_bw() +
  ggtitle("Boot LP") +
  theme(
    legend.position="bottom",
    panel.spacing = unit(1, "lines"),
    axis.ticks.x=element_blank()
  )

png(filename=paste("LPestimations.png", sep=""), type="cairo",
    units="in", width=10, height=5, pointsize=12,
    res=300)

ggarrange(sumLP, bootLP, ncol = 2, nrow = 1)

dev.off()
```


##Figure scripts NEW

###Estimate alpha per neuron type

```{r}
# Alpha per neurType
options(digits = 10)

#######
# GEE #
#######

# Check correlation matrix
geeNeur.in <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "independence")
geeNeur.ex <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "exchangeable")
geeNeur.ar <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "ar1")
geeNeur.un <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "unstructured")

# Deviation from naive variance
sapply(list(geeNeur.in, geeNeur.ex, geeNeur.ar, geeNeur.un), var.crit)

# Bootstrap
# Ensure reproducibility
# set.seed(12345)
# bootNeur <- bootCoefint(allData, "LR ~ LP:neurType - 1", "exchangeable", 2000)
# saveRDS(bootNeur, "bootCoefs_perNeuron.rds")
# bootNeur <- readRDS("bootCoefs_perNeuron.rds")
# colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")

#######
# OLS #
#######

# Bootstrap
# Ensure reproducibility
# set.seed(12345)
# bootNeur <- bootCoefintLM(allData, "LR ~ LP:neurType - 1", 2000)
# saveRDS(bootNeur, "bootCoefs_perNeuron_OLS.rds")
bootNeur <- readRDS("ProyeccionAnalysis/bootCoefs_perNeuron_OLS.rds")
colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")

# Get R2 value
summary(lm(LR ~ LP:neurType - 1, allData))
```

####CI95 plot

```{r}
# Extract the quantiles we need
quant <- apply(bootNeur, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7))
quantFrame <- reshape2::melt(quant)
quantFrame$y <- rep(1:4, each=3)

# Plot CI95
png(filename=paste("ci95_perNeuron_OLS.png", sep=""), type="cairo",
    units="in", width=5, height=5, pointsize=12,
    res=300)

plot(0, type="n", xlim=c(1.265, 1.293), ylim=c(0.5,4.5), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
ytick <- seq(1, 4, by=1)
axis(side=2, at=ytick, labels = FALSE)
text(par("usr")[1], ytick,  
     labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)

for (i in seq(1, 12,3)) {
  segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
  points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
}
rect(quantFrame[quantFrame$Var1 == "2.5%" & quantFrame$Var2 == "Type4", 3], -1,
     quantFrame[quantFrame$Var1 == "97.5%" & quantFrame$Var2 == "Type3", 3], 6,
     col = rgb(0.5,0.5,0.5,1/4), lty = "dashed", border = "darkgray")

dev.off()
```

####Line with shades

```{r, fig.width=8, fig.height=8}
png(filename=paste("lines_perNeuron_OLS.png", sep=""), type="cairo",
    units="in", width=5, height=5, pointsize=12,
    res=300)

op <- par(mfrow = c(2,2),
          oma = c(0,0,0,0) + 0.1,
          mar = c(0,0,1,1) + 0.1)

for (i in (1:4)) {
  # Get axis range
  xRange <- allData[allData$neurType ==  paste("Type", i, sep=""), 3]
  yRange <- allData[allData$neurType ==  paste("Type", i, sep=""), 2]
  # Get max, mean and min alphas
  alphaVec <- c(max(bootNeur[,i]), median(bootNeur[,i]), min(bootNeur[,1]))
  # Plot observations, mean line and shade with span of all booted lines
  plot(LR ~ LP, data = allData[allData$neurType == paste("Type", i, sep=""), ], pch = 21, cex = 0.5, bg = "gray", col = NULL,
       xlab = "Projected 2D Length", ylab = "Real 3D Length", xaxt="n", yaxt = "n")
  lines(xRange, alphaVec[2]*xRange, col = "red", lwd = 0.5)
  polygon(x = c(min(xRange), min(xRange), max(xRange), max(xRange)),
          y = c(min(alphaVec[3]*xRange), min(alphaVec[1]*xRange), max(alphaVec[3]*xRange), max(alphaVec[1]*xRange)),
          col = rgb(1,0,0, alpha = 0.3), border = NA)
  # plotrix::corner.label(x = -1, y = 1, paste("Type", i, sep=""))
  
}

par(op)

dev.off()
```