12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202 |
- ---
- title: "Axonal Length"
- author: "Sergio D?ez Hermano"
- date: '`r format(Sys.Date(),"%e de %B, %Y")`'
- output:
- html_document:
- highlight: tango
- toc: yes
- toc_depth: 4
- toc_float:
- collapsed: no
- ---
- ```{r setup, include=FALSE}
- require(knitr)
- # include this code chunk as-is to set options
- opts_chunk$set(comment = NA, prompt = FALSE, fig.height=5, fig.width=5, dpi=300, fig.align = "center", echo = TRUE, message = FALSE, warning = FALSE, cache=FALSE)
- Sys.setlocale("LC_TIME", "C")
- ```
- ##Load libraries and functions
- ```{r}
- library(R.matlab)
- library(lattice)
- library(dplyr)
- library(ggplot2)
- library(ggridges)
- library(hrbrthemes)
- library(viridis)
- library(ggpubr)
- library(mltools)
- library(data.table)
- library(caret)
- library(interactions)
- library(performance)
- library(MASS)
- library(geepack)
- library(pstools)
- library(MXM)
- library(rmcorr)
- library(multcomp)
- library(Hmisc)
- options(digits = 10)
- # https://rstudio-pubs-static.s3.amazonaws.com/297778_5fce298898d64c81a4127cf811a9d486.html
- #########################################
- # Deviation from empirical variance GEE #
- #########################################
- var.crit <- function(mymodel) sum(abs(mymodel[["geese"]]$vbeta.naiv - mymodel[["geese"]]$vbeta))
- ############################
- # Distances between planes #
- ############################
- distPlan <- function(mydata) {
-
- # Vector to store distances
- distPlanes <- vector()
-
- # Estimate distances between planes for each subject
- for (i in unique(mydata$id)) {
- LPvals <- mydata[mydata$id == i, 3]
- distPlanes <- c(distPlanes,
- (LPvals[1] - LPvals[2])/max(LPvals[1], LPvals[2]),
- (LPvals[1] - LPvals[3])/max(LPvals[1], LPvals[3]),
- (LPvals[2] - LPvals[3])/max(LPvals[2], LPvals[3]))
- }
-
-
- # Concatenate factor with substraction information
- numSubjects <- length(unique(mydata$id))
- subsFactor <- rep(c("XY - XZ", "XY - YZ", "YZ - XZ"), rep = numSubjects)
- distFrame <- data.frame(id = mydata$id,
- type = mydata$neurType,
- distPlanes = distPlanes,
- substraction = subsFactor)
-
- return(distFrame)
-
- }
- ##########################
- # Bootstrap coefficients #
- ##########################
- bootCoef <- function(mydata, axontype, projection, corrMat, nreps) {
- # Select data
- modData <- mydata[mydata$neurType == axontype & mydata$Plane == projection, ]
-
- # Fit the model
- modGee <- geeglm(LR ~ LP, data = modData, id = id, corstr = corrMat)
-
- # Ensure reproducibility
- set.seed(12345)
-
- # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
- coefmat <- matrix(NA, nreps, 2)
-
- # We need the fitted values and the residuals from the model
- resids <- residuals(modGee)
- preds <- fitted(modGee)
-
- # Repeatedly generate bootstrapped responses
- for(i in 1:nreps) {
- booty <- preds + sample(resids, rep=TRUE)
- modGee2 <- update(modGee, booty ~ .)
- coefmat[i,] <- coef(modGee2)
- }
-
- # Create a dataframe to store predictors values
- colnames(coefmat) <- names(coef(modGee2))
- coefmat <- data.frame(coefmat)
-
- return(coefmat)
- }
- ##############################
- # Bootstrap coefficients gEE #
- ##############################
- # For models with interactions
- bootCoefint <- function(mydata, modForm, corrMat, nreps) {
-
- # Fit model
- modGee <- geeglm(formula(modForm), data = mydata, id = id, corstr = corrMat)
- # Ensure reproducibility
- # set.seed(12345)
-
- # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
- coefmat <- matrix(NA, nreps, length(coef(modGee)))
-
- # We need the fitted values and the residuals from the model
- resids <- residuals(modGee)
- preds <- fitted(modGee)
-
- # Repeatedly generate bootstrapped responses
- for(i in 1:nreps) {
- booty <- preds + sample(resids, rep=TRUE)
- modGee2 <- update(modGee, booty ~ .)
- coefmat[i,] <- coef(modGee2)
- print(i)
- }
-
- # Create a dataframe to store predictors values
- colnames(coefmat) <- names(coef(modGee2))
- coefmat <- data.frame(coefmat)
-
- return(coefmat)
- }
- #############################
- # Bootstrap coefficients LM #
- #############################
- # For models with interactions
- bootCoefintLM <- function(mydata, modForm, nreps) {
-
- # Fit model
- modLM <- lm(formula(modForm), data = mydata)
- # Ensure reproducibility
- # set.seed(12345)
-
- # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
- coefmat <- matrix(NA, nreps, length(coef(modLM)))
-
- # We need the fitted values and the residuals from the model
- resids <- residuals(modLM)
- preds <- fitted(modLM)
-
- # Repeatedly generate bootstrapped responses
- for(i in 1:nreps) {
- booty <- preds + sample(resids, rep=TRUE)
- modLM2 <- update(modLM, booty ~ .)
- coefmat[i,] <- coef(modLM2)
- print(i)
- }
-
- # Create a dataframe to store predictors values
- colnames(coefmat) <- names(coef(modLM2))
- coefmat <- data.frame(coefmat)
-
- return(coefmat)
- }
- ################
- # Bootstrap PI #
- ################
- the.replication <- function(reg, s, x_Np1 = 0){
- # Make bootstrap residuals
- ep.star <- sample(s, size=length(reg$residuals),replace=TRUE)
- # Make bootstrap Y
- y.star <- fitted(reg) + ep.star
- # Do bootstrap regression
- lp <- model.frame(reg)[,2]
- nt <- model.frame(reg)[,3]
- bs.reg <- lm(y.star ~ lp:nt - 1)
- # Create bootstrapped adjusted residuals
- bs.lev <- influence(bs.reg)$hat
- bs.s <- residuals(bs.reg)/sqrt(1-bs.lev)
- bs.s <- bs.s - mean(bs.s)
- # Calculate draw on prediction error
- # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"]
- xb.xb <- (coef(my.reg)[1] - coef(bs.reg)[1])*x_Np1
- return(unname(xb.xb + sample(bs.s, size=1)))
-
- }
- ############################
- # Bootstrap PI generalized #
- ############################
- the.replication.gen <- function(reg, s, axType, x_Np1 = 0){
- # Make bootstrap residuals
- ep.star <- sample(s, size=length(reg$residuals), replace=TRUE)
- # Make bootstrap Y
- y.star <- fitted(reg) + ep.star
- # Do bootstrap regression
- lp <- model.frame(reg)[,2]
- nt <- model.frame(reg)[,3]
- bs.reg <- lm(y.star ~ lp:nt - 1)
- # Create bootstrapped adjusted residuals
- bs.lev <- influence(bs.reg)$hat
- bs.s <- residuals(bs.reg)/sqrt(1-bs.lev)
- bs.s <- bs.s - mean(bs.s)
- # Calculate draw on prediction error
- # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"]
- xb.xb <- (coef(my.reg)[axType] - coef(bs.reg)[axType])*x_Np1
- return(unname(xb.xb + sample(bs.s, size=1)))
-
- }
- ```
- ##Load data
- ```{r}
- # Get file paths
- axonNames <- list.files(paste("ProyeccionData/", sep=""), full.names=T)
- # Load matlab file
- axonFiles <- lapply(axonNames, function(x) readMat(x))
- names(axonFiles) <- c("Type1", "Type2", "Type3", "Type4")
- # VAR NAMES
- # REAL_LENGTH, PROYECTION_LENGTH_XY, PROYECTION_LENGTH_XZ, PROYECTION_LENGTH_YZ
- # Extract data
- realLength <- lapply(axonFiles, function(x) x$REAL.LENGTH)
- planeXY <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XY)
- planeXZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XZ)
- planeYZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.YZ)
- # Get number of neurons per type
- numberTypes <- unlist(lapply(realLength, function(x) length(x)))
- # Store in data frames by plane
- xyData <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), LP = unlist(planeXY), Plane = rep("XY", sum(numberTypes)),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- xzData <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), LP = unlist(planeXZ), Plane = rep("XZ", sum(numberTypes)),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- yzData <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), LP = unlist(planeYZ), Plane = rep("YZ", sum(numberTypes)),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- # Merge into a single data frame and sort by id
- allData <- do.call("rbind", list(xyData, xzData, yzData))
- allData <- allData[order(allData$id),]
- # Add different number for every neuron-plane combination
- allData$NeurPlane <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]),
- rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
- allData$interact <-interaction(allData$neurType, allData$Plane, lex.order=T)
- # Create data frame w/o Type4
- dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)
- # Data in short format
- dataShort <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), XY = unlist(planeXY), XZ = unlist(planeXZ), YZ = unlist(planeYZ),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- ```
- ##Error distributions for median(beta)
- ```{r}
- # Estimate error per axon type and plane using median
- bootNeur <- readRDS("bootCoefs_perNeuron_OLS.rds")
- colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")
- medianAlpha <- apply(bootNeur, 2, median)
- dataError <- allData
- dataError$alpha <- dataError$LR/dataError$LP
- dataError$neurError <- NA
- pos <- 1
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
- for (j in c("XY", "XZ", "YZ")) {
- # Subset and divide alpha
- alphaVec <- dataError[dataError$neurType == i & dataError$Plane == j, 8]
- dataError[dataError$neurType == i & dataError$Plane == j, 9] <- 100*(1 - medianAlpha[pos]/alphaVec)
- }
- pos <- pos + 1
- }
- ```
- ###Plot
- ####Fixed error
- ```{r}
- # Plot
- png(filename=paste("errorDistributions_5error_ols.png", sep=""), type="cairo",
- units="in", width=8, height=10, pointsize=12,
- res=300)
- ggplot(dataError, aes(x = `neurError`, y = `interact`, fill = ifelse(..x.. >=-5 & ..x.. <=5, ">= -5", "< 5"))) +
- scale_fill_manual(values = c("gray70", "coral2"), name = NULL) +
- geom_density_ridges_gradient(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none") +
- xlab("% Error")
- dev.off()
- ```
- ####Fixed error ribbon
- ```{r, fig.height=10, fig.width=6}
- dataError$x <- dataError$neurError
- # Density plot -----------------------------------------------
- jj <- ggplot(dataError, aes(x = x, y = interact)) +
- stat_density_ridges(
- geom = "density_ridges_gradient") +
- theme_ridges() +
- theme(
- plot.margin = margin(t = 1, r = 1, b = 0.5, l = 0.5, "cm")
- )
- # Build ggplot and extract data
- d <- ggplot_build(jj)$data[[1]]
- # Add geom_ribbon for shaded area
- jj +
- geom_ribbon(
- data = transform(subset(d, x >= -5 & x <= 5), interact = group),
- aes(x, ymin = ymin, ymax = ymax, group = group),
- fill = "red",
- alpha = 0.5)
- ```
- ####Fixed quantiles
- ```{r, fig.height=10, fig.width=8}
- # https://stackoverflow.com/questions/49961582/how-shade-area-under-ggridges-curve/49971690#49971690
- png(filename=paste("errorDistributions_ols.png", sep=""), type="cairo",
- units="in", width=8, height=10, pointsize=12,
- res=300)
- ggplot(dataError, aes(x = neurError, y = interact, fill = factor(stat(quantile)))) +
- stat_density_ridges(
- geom = "density_ridges_gradient",
- calc_ecdf = TRUE,
- quantiles = c(0.125, 0.875),
- quantile_lines = FALSE) +
- scale_fill_manual(
- # name = "Probability", values = c("#A0A0A0A0", "#FF0000A0", "#A0A0A0A0"),
- name = "Probability", values = c("gray70", "coral2", "gray70"),
- labels = c("(0, 0.125]", "(0.125, 0.875]", "(0.875, 1]")) +
- theme_ridges() +
- theme(legend.position = "none") +
- xlab("% Error") +
- ylab("Axon:Projection combination")
- dev.off()
- ```
- ###Error vs LP
- ```{r}
- gplots::hist2d(dataError[, c(3,9)], nbins=50)
- ```
- ##Error distributions for all betas
- ```{r}
- # Estimate error per axon type and plane using median
- bootNeur <- readRDS("ProyeccionAnalysis/bootCoefs_perNeuron_OLS.rds")
- colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")
- dataAllbetas <- allData
- dataAllbetas$alpha <- dataAllbetas$LR/dataAllbetas$LP
- axType <- 1
- listPos <- 1
- listContour <- list()
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- # Subset alpha
- alphaVec <- dataAllbetas[dataAllbetas$neurType == i & dataAllbetas$Plane == j, 8]
- # Create empty matrix
- matError <- matrix(ncol = 2000, nrow = numberTypes[axType])
- colCount <- 1
-
- for (h in bootNeur[, axType]) {
-
- # Estimate errors for every beta
- matError[, colCount] <- 100*(1 - h/alphaVec)
- colCount <- colCount + 1
-
- }
-
- colnames(matError) <- bootNeur[, axType]
- listContour[[listPos]] <- matError
- listPos <- listPos + 1
-
- }
-
- axType <- axType + 1
-
- }
- ```
- ###Reformat data
- ```{r}
- # Add names to list
- names(listContour) <- unique(dataAllbetas$interact)
- # Store as vectors
- fullVec <- lapply(listContour, function(x) as.vector(x))
- # Trasnform into data frames by neurType
- alphaType1 <- bootNeur[, 1]
- axType1 <- data.frame(errors = unlist(fullVec[c(1,2,3)]), Plane = rep(c("XY", "XZ", "YZ"), each = numberTypes[1]*2000),
- alphaBoot = rep(rep(alphaType1, each = numberTypes[1]), 3), axType = rep("Type1", 3*numberTypes[1]*2000))
- alphaType2 <- bootNeur[, 2]
- axType2 <- data.frame(errors = unlist(fullVec[c(4,5,6)]), Plane = rep(c("XY", "XZ", "YZ"), each = numberTypes[2]*2000),
- alphaBoot = rep(rep(alphaType2, each = numberTypes[2]), 3), axType = rep("Type2", 3*numberTypes[2]*2000))
- alphaType3 <- bootNeur[, 3]
- axType3 <- data.frame(errors = unlist(fullVec[c(7,8,9)]), Plane = rep(c("XY", "XZ", "YZ"), each = numberTypes[3]*2000),
- alphaBoot = rep(rep(alphaType3, each = numberTypes[3]), 3), axType = rep("Type3", 3*numberTypes[3]*2000))
- alphaType4 <- bootNeur[, 4]
- axType4 <- data.frame(errors = unlist(fullVec[c(10,11,12)]), Plane = rep(c("XY", "XZ", "YZ"), each = numberTypes[4]*2000),
- alphaBoot = rep(rep(alphaType4, each = numberTypes[4]), 3), axType = rep("Type4", 3*numberTypes[4]*2000))
- # Single data frame
- axData <- rbind(axType1, axType2, axType3, axType4)
- axData$axType <- factor(axData$axType)
- ```
- ###Plot
- ```{r, fig.width=15, fig.height=15}
- # https://www.r-graph-gallery.com/2d-density-plot-with-ggplot2.html#distr
- # Area + contour global
- g <- ggplot(axData, aes(x=alphaBoot, y=errors)) +
- stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
- theme_bw()
- png(filename=paste("errorDistributions_contour_ols.png", sep=""), type="cairo",
- units="in", width=10, height=12, pointsize=12,
- res=300)
- g + facet_grid(axType ~ Plane) + scale_fill_viridis_c()
- dev.off()
- # Area + contour by axType
- ax1 <- ggplot(axType1, aes(x=alphaBoot, y=errors)) +
- stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
- theme_bw()
- png(filename=paste("errorDistributions_contour_ax1_ols.png", sep=""), type="cairo",
- units="in", width=10, height=5, pointsize=12,
- res=300)
- ax1 + facet_grid( ~ Plane) + scale_fill_viridis_c()
- dev.off()
- ax2 <- ggplot(axType2, aes(x=alphaBoot, y=errors)) +
- stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
- theme_bw()
- png(filename=paste("errorDistributions_contour_ax2_ols.png", sep=""), type="cairo",
- units="in", width=10, height=5, pointsize=12,
- res=300)
- ax2 + facet_grid( ~ Plane) + scale_fill_viridis_c()
- dev.off()
- ax3 <- ggplot(axType3, aes(x=alphaBoot, y=errors)) +
- stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
- theme_bw()
- png(filename=paste("errorDistributions_contour_ax3_ols.png", sep=""), type="cairo",
- units="in", width=10, height=5, pointsize=12,
- res=300)
- ax3 + facet_grid( ~ Plane) + scale_fill_viridis_c()
- dev.off()
- ax4 <- ggplot(axType4, aes(x=alphaBoot, y=errors)) +
- stat_density_2d(aes(fill = after_stat(nlevel)), geom = "polygon") +
- theme_bw()
- png(filename=paste("errorDistributions_contour_ax4_ols.png", sep=""), type="cairo",
- units="in", width=10, height=5, pointsize=12,
- res=300)
- ax4 + facet_grid( ~ Plane) + scale_fill_viridis_c()
- dev.off()
- ```
- ##Bootstrap Pred Interval
- ###Example with mean LP
- ```{r}
- # OLS
- my.reg <- lm(LR ~ LP:neurType - 1, allData)
- summary(my.reg)
- # Mean LP per axonType
- meanLP <- aggregate(LP ~ neurType, allData, mean)
- meanLR <- aggregate(LR ~ neurType, allData, mean)
- # Predict LR for neurType1
- y.p <- coef(my.reg)[1]*meanLP[1,2]
- y.p
- # Create adjusted residuals
- leverage <- influence(my.reg)$hat
- my.s.resid <- residuals(my.reg)/sqrt(1-leverage)
- my.s.resid <- my.s.resid - mean(my.s.resid)
- reg <- my.reg
- s <- my.s.resid
- # Do bootstrap with 10,000 replications
- set.seed(123456)
- ep.draws <- replicate(n=100, the.replication(reg=my.reg, s=my.s.resid, x_Np1 = meanLP[1,2]))
- # Create prediction interval
- y.p + quantile(ep.draws, probs=c(0.025,0.975))
- # prediction interval using normal assumption
- predict(my.reg,
- newdata=data.frame(neurType = "Type1",
- LP = meanLP[1,2]),interval="prediction", level=0.95)
- ```
- ###Example with all LPs
- ```{r}
- # OLS
- my.reg <- lm(LR ~ LP:neurType - 1, allData)
- # List to store axon types
- axList <- list()
- # List to store random selection
- axRand <- list()
- axCount <- 1
- # 1/3 random observations per plane, no replacement
- numExtract <- floor(1/3*numberTypes)
- # Original data frame copy
- dataToBoot <- allData
- # Ensure reproducibility
- set.seed(123456)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- # List to store planes
- planeList <- list()
- # List to store random selection
- planeRand <- list()
- planeCount <- 1
-
- for (j in c("XY", "XZ", "YZ")) {
-
- # Subset plane and axon type
- dataGroup <- dataToBoot[dataToBoot$neurType == i & dataToBoot$Plane == j, ]
-
- # Random sample of N=numExtract (variable per axon type)
- dataSub <- sample_n(dataGroup, numExtract[axCount], replace = FALSE)
- # Store selected for later
- planeRand[[planeCount]] <- dataSub
- # Remove from data so they can not be selected again
- # dataToBoot <- dataToBoot[! dataToBoot$id %in% dataSub$id, ]
-
- # List to store errors
- epList <- list()
- epCount <- 1
-
- for (k in dataSub[ , "LP"]) {
-
- # Predict LR for neurType axCount
- y.p <- coef(my.reg)[axCount]*k
-
- # Create adjusted residuals
- leverage <- influence(my.reg)$hat
- my.s.resid <- residuals(my.reg)/sqrt(1-leverage)
- my.s.resid <- my.s.resid - mean(my.s.resid)
-
- # Do bootstrap with 100 replications
- ep.draws <- replicate(n=100, the.replication.gen(reg = my.reg, s = my.s.resid, axCount, x_Np1 = k))
-
- # Store in list
- epList[[epCount]] <- ep.draws
- epCount <- epCount + 1
-
- print(epCount)
-
- }
-
- # Store in list
- planeList[[planeCount]] <- epList
- planeCount <- planeCount + 1
-
- }
-
- names(planeList) <- c("XY", "XZ", "YZ")
- names(planeRand) <- c("XY", "XZ", "YZ")
-
- # Store in list
- axList[[axCount]] <- planeList
- axRand[[axCount]] <- planeRand
- axCount <- axCount + 1
-
- # Renew original data for next axon type
- dataToBoot <- allData
-
- }
- names(axList) <- c("Type1", "Type2", "Type3", "Type4")
- names(axRand) <- c("Type1", "Type2", "Type3", "Type4")
- saveRDS(list(axList, axRand), "bootPI_full_Replacement.rds")
- ```
- ####Check convergence
- ```{r}
- # Load object
- bootCheck <- readRDS("bootPI_full.rds")
- # First list is prediction error, second is random subset
- axList <- bootCheck[[1]]
- axRand <- bootCheck[[2]]
- # One example
- # OLS
- my.reg <- lm(LR ~ LP:neurType - 1, allData)
- # Estimate and store in list
- lrpList <- list()
- lrpCount <- 1
- axCount <- 1
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
- for (j in c("XY", "XZ", "YZ")) {
-
- # Extract LP and LR values
- lp <- axRand[[i]][[j]]$LP
- lr <- axRand[[i]][[j]]$LR
-
- for (k in 1:length(lp)) {
-
- # Predicted LR
- lr.p <- coef(my.reg)[axCount]*lp[k]
-
- # Extract absolute errors
- ae <- axList[[i]][[j]][[k]]
-
- # Empty vectors for storage
- lrp.Low <- numeric()
- lrp.Up <- numeric()
-
- # Estimate PI95 for boot n=10...100
- for (l in 4:100) {
-
- lrp <- lr.p + quantile(ae[1:l], probs = c(0.025,0.975))
-
- # Store in vectors
- lrp.Low <- c(lrp.Low, lrp[1])
- lrp.Up <- c(lrp.Up, lrp[2])
-
- }
-
- # Store in data frame
- lrpFrame <- data.frame(nboot = 4:100, lowLim = lrp.Low, upLim = lrp.Up, LR = rep(lr[k], length(4:100)))
-
- # Store in list
- lrpList[[lrpCount]] <- lrpFrame
- lrpCount <- lrpCount + 1
- }
-
- }
-
- axCount <- axCount + 1
-
- }
- ```
- #####Reformat
- ```{r}
- # 1/3 random observations per plane, no replacement
- numExtract <- floor(1/3*numberTypes)
- checkFrame <- do.call(rbind, lrpList)
- checkFrame$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), times = numExtract*(length(4:100))*3)
- checkFrame$Plane <- c(rep(c("XY", "XZ", "YZ"), each = numExtract[1]*length(4:100)),
- rep(c("XY", "XZ", "YZ"), each = numExtract[2]*length(4:100)),
- rep(c("XY", "XZ", "YZ"), each = numExtract[3]*length(4:100)),
- rep(c("XY", "XZ", "YZ"), each = numExtract[4]*length(4:100)))
- lowMean <- aggregate(lowLim ~ neurType + Plane + nboot, checkFrame, mean)
- upMean <- aggregate(upLim ~ neurType + Plane + nboot, checkFrame, mean)
- LRmean <- aggregate(LR ~ neurType + Plane + nboot, checkFrame, mean)
- cumError <- with(lowMean[lowMean$neurType == "Type1" & lowMean$Plane == "XY", ],
- abs((lowLim[-1] - lowLim[-length(lowLim)])/lowLim[-length(lowLim)])*100)
- ```
- #####Plot
- ```{r, fig.height=20, fig.width=30}
- # Separate plots
- png(filename=paste("convergence_bootPI_separate.png", sep=""), type="cairo",
- units="in", width=30, height=20, pointsize=12,
- res=300)
- par(mfrow=c(4,6), cex = 1.1)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- plot(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2)
- plot(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
-
- }
-
- }
- dev.off()
- # Same plot
- png(filename=paste("convergence_bootPI_together.png", sep=""), type="cairo",
- units="in", width=15, height=20, pointsize=12,
- res=300)
- par(mfrow=c(4,3), cex = 1.1)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- minY <- min(lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"])
- maxY <- max(upMean[upMean$neurType == i & upMean$Plane == j, "upLim"])
- plot(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2, ylim = c(minY, maxY))
- lines(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
- # lines(LR ~ nboot, LRmean[LRmean$neurType == i & LRmean$Plane == j, ], type = "l", lwd = 2, col = "red")
-
- }
-
- }
- dev.off()
- # Diff plot
- png(filename=paste("convergence_bootPI_Diff.png", sep=""), type="cairo",
- units="in", width=15, height=20, pointsize=12,
- res=300)
- par(mfrow=c(4,3), cex = 1.1)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- minY <- min(lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"])
- maxY <- max(upMean[upMean$neurType == i & upMean$Plane == j, "upLim"])
- diflu <- upMean[upMean$neurType == i & upMean$Plane == j, "upLim"] - lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"]
- plot(4:100, diflu, type = "l", col = "red", lwd = 2)
-
- }
-
- }
- dev.off()
- # Same plot random intervals
- for (m in 1:4) {
-
- png(filename=paste("convergence_bootPI_together_example_rand", m, ".png", sep=""), type="cairo",
- units="in", width=15, height=20, pointsize=12,
- res=300)
-
- par(mfrow=c(4,3), cex = 1.1)
-
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- subData <- checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ]
- randLR <- sample(subData[, "LR"], 1)
- # randLR <- sample(subData[which(subData$lowLim > subData$LR), "LR"], 1)
- minY <- min(subData[subData$LR == randLR, "lowLim"])
- maxY <- max(subData[subData$LR == randLR, "upLim"])
- plot(lowLim ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2, ylim = c(minY, maxY))
- lines(upLim ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2)
- lines(LR ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2, col = "red")
-
- }
-
- }
-
- dev.off()
-
- }
- # Separate with lines
- png(filename=paste("convergence_bootPI_separate_FULL.png", sep=""), type="cairo",
- units="in", width=30, height=20, pointsize=12,
- res=300)
- par(mfrow=c(4,6), cex = 1.1)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- plot(lowLim ~ nboot, checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ], type = "l", col = "lightgray")
- lines(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2)
-
- plot(upLim ~ nboot, checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ], type = "l", col = "lightgray")
- lines(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
-
- }
-
- }
- dev.off()
- ```
- ####Estimate porcentual errors
- ```{r, fig.width=8, fig.height=8}
- # 1/3 random observations per plane, no replacement
- numExtract <- floor(1/3*numberTypes)
- # Load object
- bootPI <- readRDS("ProyeccionAnalysis/bootPI_full_Replacement_TRUE.rds")
- # First list is prediction error, second is random subset
- axList <- bootPI[[1]]
- axRand <- bootPI[[2]]
- # Estimate and store in list
- peList <- list()
- peCount <- 1
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
- for (j in c("XY", "XZ", "YZ")) {
-
- # Extract LR values
- lr <- axRand[[i]][[j]]$LR
-
- for (k in 1:length(lr)) {
-
- # Divide absolute errors by LR to get porcentual errors
- pe <- axList[[i]][[j]][[k]] / lr[k]
-
- # Store in list
- peList[[peCount]] <- pe*100
- peCount <-peCount + 1
- }
-
- }
-
- }
- # Reformat as data frame
- # for bootPI
- # peFrame <- data.frame(pe = unlist(peList),
- # neurType = rep(c("Type1", "Type2", "Type3", "Type4"), each = 20*100*3),
- # Plane = rep(rep(c("XY", "XZ", "YZ"), each = 20*100), 4))
- # for bootPI_full
- peFrame <- data.frame(pe = unlist(peList),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), times = numExtract*100*3),
- Plane = c(rep(c("XY", "XZ", "YZ"), each = numExtract[1]*100),
- rep(c("XY", "XZ", "YZ"), each = numExtract[2]*100),
- rep(c("XY", "XZ", "YZ"), each = numExtract[3]*100),
- rep(c("XY", "XZ", "YZ"), each = numExtract[4]*100)))
- peFrame$interact <- interaction(peFrame$neurType, peFrame$Plane, lex.order=T)
- ```
- #####Plot density and histogram
- ```{r}
- # Plot
- # peGroup <- peFrame %>% group_by(interact)
- # peSample <- sample_n(peGroup, 100)
- png(filename=paste("prederrorDistributions_ols_FULLsample_Repeat_TRUE.png", sep=""), type="cairo",
- units="in", width=8, height=10, pointsize=12,
- res=300)
- (p <- ggplot(peFrame, aes(x = `pe`, y = `interact`, fill = ifelse(..x.. >=-5 & ..x.. <=5, ">= -5", "< 5"))) +
- scale_fill_manual(values = c("gray70", "coral2"), name = NULL) +
- geom_density_ridges_gradient(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none") +
- xlab("% Error")) +
- xlim(c(-40,40))
- dev.off()
- png(filename=paste("prederrorHistogram_ols_FULLsample_Repeat_TRUE.png", sep=""), type="cairo",
- units="in", width=8, height=10, pointsize=12,
- res=300)
- # Histogram
- # nint 300 bootPI
- # nint 500 bootPI_full
- # nint 400 bootPI_full_replace
- (h <- histogram( ~ pe | Plane + neurType, peFrame, nint = 400, type ="density", xlim = c(-50,50)))
- dev.off()
- ```
- #####Plot errors vs cumul probability
- ```{r, fig.height=5, fig.width=20}
- # Inverse quantile
- quantInv <- function(distr, value) ecdf(distr)(value)
- #ecdf plot example
- # plot(ecdf(peFrame[peFrame$interact == "Type1.XY", "pe"]))
- # lines(ecdf(peFrame[peFrame$interact == "Type1.XZ", "pe"]), col = "red")
- # lines(ecdf(peFrame[peFrame$interact == "Type1.YZ", "pe"]), col = "blue")
- probList <- list()
- binProb <- 0.5
- probSeq <- seq(binProb, 100, binProb)
- for (i in unique(peFrame$interact)) {
-
- dataProb <- peFrame[peFrame$interact == i, "pe"]
- probVec <- numeric()
-
- for (j in probSeq) {
-
- errProb <- quantInv(dataProb, j) - quantInv(dataProb, -j)
- probVec <- c(probVec, errProb)
-
- }
-
- probList[[i]] <- probVec
-
- }
- # Plot with limited axis
- setwd("ProyeccionAnalysis/")
- png(filename=paste("errorProbabilityCum_bin", binProb, ".png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,1), xlim = c(0.5, 40),
- ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- abline(h = 0.95, lty = "dashed", col = "gray" )
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- # Plot with free axis
- png(filename=paste("errorProbabilityCum_bin", binProb, "_freeAxis.png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
- ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- abline(h = 0.95, lty = "dashed", col = "gray" )
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- ```
- #####Plot errors vs inverse cumul probability
- ```{r, fig.height=5, fig.width=20}
- # Inverse quantile
- quantInv <- function(distr, value) ecdf(distr)(value)
- #ecdf plot example
- # plot(ecdf(peFrame[peFrame$interact == "Type1.XY", "pe"]))
- # lines(ecdf(peFrame[peFrame$interact == "Type1.XZ", "pe"]), col = "red")
- # lines(ecdf(peFrame[peFrame$interact == "Type1.YZ", "pe"]), col = "blue")
- probList <- list()
- binProb <- 0.5
- probSeq <- seq(binProb, 100, binProb)
- for (i in unique(peFrame$interact)) {
-
- dataProb <- peFrame[peFrame$interact == i, "pe"]
- probVec <- numeric()
-
- for (j in probSeq) {
-
- errProb <- 1 - (quantInv(dataProb, j) - quantInv(dataProb, -j))
- probVec <- c(probVec, errProb)
-
- }
-
- probList[[i]] <- probVec
-
- }
- # Plot with limited axis
- setwd("ProyeccionAnalysis/")
- png(filename=paste("errorProbabilityCumInv_bin", binProb, ".png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,1), xlim = c(0.5, 40),
- ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- abline(h = 0.05, lty = "dashed", col = "gray" )
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- # Plot with free axis
- png(filename=paste("errorProbabilityCumInv_bin", binProb, "_freeAxis.png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
- ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- abline(h = 0.05, lty = "dashed", col = "gray" )
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- ```
- #####Plot errors vs probability
- ```{r, fig.height=5, fig.width=20}
- # Inverse quantile
- quantInv <- function(distr, value) ecdf(distr)(value)
- #ecdf plot example
- # plot(ecdf(peFrame[peFrame$interact == "Type1.XY", "pe"]))
- # lines(ecdf(peFrame[peFrame$interact == "Type1.XZ", "pe"]), col = "red")
- # lines(ecdf(peFrame[peFrame$interact == "Type1.YZ", "pe"]), col = "blue")
- probList <- list()
- binProb <- 1
- probSeq <- seq(0, 100, binProb)
- intProb <- 0.1
- for (i in unique(peFrame$interact)) {
-
- dataProb <- peFrame[peFrame$interact == i, "pe"]
- probVec <- numeric()
-
- for (j in probSeq) {
-
- errProbPos <- quantInv(dataProb, j + intProb) - quantInv(dataProb, j - intProb)
- errProbNeg <- quantInv(dataProb, -j + intProb) - quantInv(dataProb, - j - intProb)
- probVec <- c(probVec, errProbPos + errProbNeg)
-
- }
-
- probList[[i]] <- probVec
-
- }
- # Plot with limited axis
- setwd("ProyeccionAnalysis/")
- png(filename=paste("errorProbability_bin", binProb, ".png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,0.07), xlim = c(0.5, 40),
- ylab = "Probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- # Plot with free axis
- png(filename=paste("errorProbability_bin", binProb, "_freeAxis.png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
- ylab = "Probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- ```
|