123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846 |
- ---
- title: "Axonal Length"
- author: "Sergio D?ez Hermano"
- date: '`r format(Sys.Date(),"%e de %B, %Y")`'
- output:
- html_document:
- highlight: tango
- toc: yes
- toc_depth: 4
- toc_float:
- collapsed: no
- ---
- ```{r setup, include=FALSE}
- require(knitr)
- # include this code chunk as-is to set options
- opts_chunk$set(comment = NA, prompt = FALSE, fig.height=5, fig.width=5, dpi=300, fig.align = "center", echo = TRUE, message = FALSE, warning = FALSE, cache=FALSE)
- Sys.setlocale("LC_TIME", "C")
- ```
- ##Load libraries and functions
- ```{r}
- library(R.matlab)
- library(lattice)
- library(dplyr)
- library(ggplot2)
- library(ggridges)
- library(hrbrthemes)
- library(viridis)
- library(ggpubr)
- library(mltools)
- library(data.table)
- library(caret)
- library(interactions)
- library(performance)
- library(MASS)
- library(geepack)
- library(pstools)
- library(MXM)
- library(rmcorr)
- library(multcomp)
- library(Hmisc)
- options(digits = 10)
- # https://rstudio-pubs-static.s3.amazonaws.com/297778_5fce298898d64c81a4127cf811a9d486.html
- ################
- # Bootstrap PI #
- ################
- the.replication <- function(reg, s, x_Np1 = 0){
- # Make bootstrap residuals
- ep.star <- sample(s, size=length(reg$residuals),replace=TRUE)
- # Make bootstrap Y
- y.star <- fitted(reg) + ep.star
- # Do bootstrap regression
- lp <- model.frame(reg)[,2]
- nt <- model.frame(reg)[,3]
- bs.reg <- lm(y.star ~ lp:nt - 1)
- # Create bootstrapped adjusted residuals
- bs.lev <- influence(bs.reg)$hat
- bs.s <- residuals(bs.reg)/sqrt(1-bs.lev)
- bs.s <- bs.s - mean(bs.s)
- # Calculate draw on prediction error
- # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"]
- xb.xb <- (coef(my.reg)[1] - coef(bs.reg)[1])*x_Np1
- return(unname(xb.xb + sample(bs.s, size=1)))
-
- }
- ############################
- # Bootstrap PI generalized #
- ############################
- the.replication.gen <- function(reg, s, axType, x_Np1 = 0){
- # Make bootstrap residuals
- ep.star <- sample(s, size=length(reg$residuals), replace=TRUE)
- # Make bootstrap Y
- y.star <- fitted(reg) + ep.star
- # Do bootstrap regression
- lp <- model.frame(reg)[,2]
- nt <- model.frame(reg)[,3]
- bs.reg <- lm(y.star ~ lp:nt - 1)
- # Create bootstrapped adjusted residuals
- bs.lev <- influence(bs.reg)$hat
- bs.s <- residuals(bs.reg)/sqrt(1-bs.lev)
- bs.s <- bs.s - mean(bs.s)
- # Calculate draw on prediction error
- # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"]
- xb.xb <- (coef(my.reg)[axType] - coef(bs.reg)[axType])*x_Np1
- return(unname(xb.xb + sample(bs.s, size=1)))
-
- }
- ##############################
- # Bootstrap PI per axon type #
- ##############################
- the.replication.type <- function(reg, s, x_Np1 = 0){
- # Make bootstrap residuals
- ep.star <- sample(s, size=length(reg$residuals),replace=TRUE)
- # Make bootstrap Y
- y.star <- fitted(reg) + ep.star
- # Do bootstrap regression
- lp <- model.frame(reg)[,2]
- bs.reg <- lm(y.star ~ lp - 1)
- # Create bootstrapped adjusted residuals
- bs.lev <- influence(bs.reg)$hat
- bs.s <- residuals(bs.reg)/sqrt(1-bs.lev)
- bs.s <- bs.s - mean(bs.s)
- # Calculate draw on prediction error
- # xb.xb <- coef(my.reg)["(Intercept)"] - coef(bs.reg)["(Intercept)"]
- xb.xb <- (coef(my.reg) - coef(bs.reg))*x_Np1
- return(unname(xb.xb + sample(bs.s, size=1)))
-
- }
- ##############################
- # Stratified random sampling #
- ##############################
- stratified <- function(df, group, size, select = NULL,
- replace = FALSE, bothSets = FALSE) {
- if (is.null(select)) {
- df <- df
- } else {
- if (is.null(names(select))) stop("'select' must be a named list")
- if (!all(names(select) %in% names(df)))
- stop("Please verify your 'select' argument")
- temp <- sapply(names(select),
- function(x) df[[x]] %in% select[[x]])
- df <- df[rowSums(temp) == length(select), ]
- }
- df.interaction <- interaction(df[group], drop = TRUE)
- df.table <- table(df.interaction)
- df.split <- split(df, df.interaction)
- if (length(size) > 1) {
- if (length(size) != length(df.split))
- stop("Number of groups is ", length(df.split),
- " but number of sizes supplied is ", length(size))
- if (is.null(names(size))) {
- n <- setNames(size, names(df.split))
- message(sQuote("size"), " vector entered as:\n\nsize = structure(c(",
- paste(n, collapse = ", "), "),\n.Names = c(",
- paste(shQuote(names(n)), collapse = ", "), ")) \n\n")
- } else {
- ifelse(all(names(size) %in% names(df.split)),
- n <- size[names(df.split)],
- stop("Named vector supplied with names ",
- paste(names(size), collapse = ", "),
- "\n but the names for the group levels are ",
- paste(names(df.split), collapse = ", ")))
- }
- } else if (size < 1) {
- n <- round(df.table * size, digits = 0)
- } else if (size >= 1) {
- if (all(df.table >= size) || isTRUE(replace)) {
- n <- setNames(rep(size, length.out = length(df.split)),
- names(df.split))
- } else {
- message(
- "Some groups\n---",
- paste(names(df.table[df.table < size]), collapse = ", "),
- "---\ncontain fewer observations",
- " than desired number of samples.\n",
- "All observations have been returned from those groups.")
- n <- c(sapply(df.table[df.table >= size], function(x) x = size),
- df.table[df.table < size])
- }
- }
- temp <- lapply(
- names(df.split),
- function(x) df.split[[x]][sample(df.table[x],
- n[x], replace = replace), ])
- set1 <- do.call("rbind", temp)
-
- if (isTRUE(bothSets)) {
- set2 <- df[!rownames(df) %in% rownames(set1), ]
- list(SET1 = set1, SET2 = set2)
- } else {
- set1
- }
- }
- ```
- ##Load data
- ```{r}
- # Get file paths
- axonNames <- list.files(paste("ProyeccionData/", sep=""), full.names=T)
- # Load matlab file
- axonFiles <- lapply(axonNames, function(x) readMat(x))
- names(axonFiles) <- c("Type1", "Type2", "Type3", "Type4")
- # VAR NAMES
- # REAL_LENGTH, PROYECTION_LENGTH_XY, PROYECTION_LENGTH_XZ, PROYECTION_LENGTH_YZ
- # Extract data
- realLength <- lapply(axonFiles, function(x) x$REAL.LENGTH)
- planeXY <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XY)
- planeXZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XZ)
- planeYZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.YZ)
- # Get number of neurons per type
- numberTypes <- unlist(lapply(realLength, function(x) length(x)))
- # Store in data frames by plane
- xyData <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), LP = unlist(planeXY), Plane = rep("XY", sum(numberTypes)),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- xzData <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), LP = unlist(planeXZ), Plane = rep("XZ", sum(numberTypes)),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- yzData <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), LP = unlist(planeYZ), Plane = rep("YZ", sum(numberTypes)),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- # Merge into a single data frame and sort by id
- allData <- do.call("rbind", list(xyData, xzData, yzData))
- allData <- allData[order(allData$id),]
- # Add different number for every neuron-plane combination
- allData$NeurPlane <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]),
- rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
- allData$interact <-interaction(allData$neurType, allData$Plane, lex.order=T)
- # Create data frame w/o Type4
- dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)
- # Data in short format
- dataShort <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), XY = unlist(planeXY), XZ = unlist(planeXZ), YZ = unlist(planeYZ),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- ```
- ##CV-Bootstrap Pred Interval
- This code is meant to be run independently for every neurType and in parallel. RUN AT CREMOTO
- ```{r}
- # Ensure reproducibility
- set.seed(123456)
- # Subset axon type
- dataCV <- allData[allData$neurType == "Type1", ]
- # List to store CV repetitions
- cvList <- list ()
- cvRand <- list()
- for (i in 1:10) {
-
- # Stratified random training/test sets
- stratDat <- stratified(dataCV, "Plane", 0.7, bothSets = TRUE, replace = FALSE)
- trainSet <- stratDat$SET1
- testSet <- stratDat$SET2
- # Store train and test set
- cvRand[[i]] <- list(trainSet, testSet)
-
- # OLS with training set
- my.reg <- lm(LR ~ LP - 1, trainSet)
- # Create adjusted residuals
- leverage <- influence(my.reg)$hat
- my.s.resid <- residuals(my.reg)/sqrt(1-leverage)
- my.s.resid <- my.s.resid - mean(my.s.resid)
-
- # List to store planes
- planeList <- list()
- planeRand <- list()
- # Assign test data to bootstrapping
- dataToBoot <- testSet
-
- # Bootstrapping on test set
- for (j in c("XY", "XZ", "YZ")) {
-
- # Subset plane
- dataSub <- dataToBoot[dataToBoot$Plane == j, ]
-
- # List to store errors
- epList <- list()
- epCount <- 1
-
- for (k in dataSub[ , "LP"]) {
-
- # Predict LR
- y.p <- coef(my.reg)*k
-
- # Do bootstrap with 100 replications
- ep.draws <- replicate(n=100, the.replication.type(reg = my.reg, s = my.s.resid, x_Np1 = k))
-
- # Store in list
- epList[[epCount]] <- ep.draws
- epCount <- epCount + 1
-
- print(epCount)
-
- }
-
- # Store in list
- planeList[[j]] <- epList
-
- print(paste("CV", i, "Plane", j, "finished"))
-
- }
-
- # Store in list
- cvList[[i]] <- planeList
- print(paste("CV", i, "finished"))
-
- }
-
- # names(axList) <- c("Type1", "Type2", "Type3", "Type4")
- # names(axRand) <- c("Type1", "Type2", "Type3", "Type4")
- #
- # saveRDS(list(distList, distRand), paste("stereo_bootPI_full_Type", axCount, "_boot20.rds", sep = ""))
- ```
- ####Check convergence
- ```{r}
- # Load object
- bootCheck <- readRDS("bootPI_full.rds")
- # First list is prediction error, second is random subset
- axList <- bootCheck[[1]]
- axRand <- bootCheck[[2]]
- # One example
- # OLS
- my.reg <- lm(LR ~ LP:neurType - 1, allData)
- # Estimate and store in list
- lrpList <- list()
- lrpCount <- 1
- axCount <- 1
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
- for (j in c("XY", "XZ", "YZ")) {
-
- # Extract LP and LR values
- lp <- axRand[[i]][[j]]$LP
- lr <- axRand[[i]][[j]]$LR
-
- for (k in 1:length(lp)) {
-
- # Predicted LR
- lr.p <- coef(my.reg)[axCount]*lp[k]
-
- # Extract absolute errors
- ae <- axList[[i]][[j]][[k]]
-
- # Empty vectors for storage
- lrp.Low <- numeric()
- lrp.Up <- numeric()
-
- # Estimate PI95 for boot n=10...100
- for (l in 4:100) {
-
- lrp <- lr.p + quantile(ae[1:l], probs = c(0.025,0.975))
-
- # Store in vectors
- lrp.Low <- c(lrp.Low, lrp[1])
- lrp.Up <- c(lrp.Up, lrp[2])
-
- }
-
- # Store in data frame
- lrpFrame <- data.frame(nboot = 4:100, lowLim = lrp.Low, upLim = lrp.Up, LR = rep(lr[k], length(4:100)))
-
- # Store in list
- lrpList[[lrpCount]] <- lrpFrame
- lrpCount <- lrpCount + 1
- }
-
- }
-
- axCount <- axCount + 1
-
- }
- ```
- #####Reformat
- ```{r}
- # 1/3 random observations per plane, no replacement
- numExtract <- floor(1/3*numberTypes)
- checkFrame <- do.call(rbind, lrpList)
- checkFrame$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), times = numExtract*(length(4:100))*3)
- checkFrame$Plane <- c(rep(c("XY", "XZ", "YZ"), each = numExtract[1]*length(4:100)),
- rep(c("XY", "XZ", "YZ"), each = numExtract[2]*length(4:100)),
- rep(c("XY", "XZ", "YZ"), each = numExtract[3]*length(4:100)),
- rep(c("XY", "XZ", "YZ"), each = numExtract[4]*length(4:100)))
- lowMean <- aggregate(lowLim ~ neurType + Plane + nboot, checkFrame, mean)
- upMean <- aggregate(upLim ~ neurType + Plane + nboot, checkFrame, mean)
- LRmean <- aggregate(LR ~ neurType + Plane + nboot, checkFrame, mean)
- cumError <- with(lowMean[lowMean$neurType == "Type1" & lowMean$Plane == "XY", ],
- abs((lowLim[-1] - lowLim[-length(lowLim)])/lowLim[-length(lowLim)])*100)
- ```
- #####Plot
- ```{r, fig.height=20, fig.width=30}
- # Separate plots
- png(filename=paste("convergence_bootPI_separate.png", sep=""), type="cairo",
- units="in", width=30, height=20, pointsize=12,
- res=300)
- par(mfrow=c(4,6), cex = 1.1)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- plot(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2)
- plot(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
-
- }
-
- }
- dev.off()
- # Same plot
- png(filename=paste("convergence_bootPI_together.png", sep=""), type="cairo",
- units="in", width=15, height=20, pointsize=12,
- res=300)
- par(mfrow=c(4,3), cex = 1.1)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- minY <- min(lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"])
- maxY <- max(upMean[upMean$neurType == i & upMean$Plane == j, "upLim"])
- plot(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2, ylim = c(minY, maxY))
- lines(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
- # lines(LR ~ nboot, LRmean[LRmean$neurType == i & LRmean$Plane == j, ], type = "l", lwd = 2, col = "red")
-
- }
-
- }
- dev.off()
- # Diff plot
- png(filename=paste("convergence_bootPI_Diff.png", sep=""), type="cairo",
- units="in", width=15, height=20, pointsize=12,
- res=300)
- par(mfrow=c(4,3), cex = 1.1)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- minY <- min(lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"])
- maxY <- max(upMean[upMean$neurType == i & upMean$Plane == j, "upLim"])
- diflu <- upMean[upMean$neurType == i & upMean$Plane == j, "upLim"] - lowMean[lowMean$neurType == i & lowMean$Plane == j, "lowLim"]
- plot(4:100, diflu, type = "l", col = "red", lwd = 2)
-
- }
-
- }
- dev.off()
- # Same plot random intervals
- for (m in 1:4) {
-
- png(filename=paste("convergence_bootPI_together_example_rand", m, ".png", sep=""), type="cairo",
- units="in", width=15, height=20, pointsize=12,
- res=300)
-
- par(mfrow=c(4,3), cex = 1.1)
-
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- subData <- checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ]
- randLR <- sample(subData[, "LR"], 1)
- # randLR <- sample(subData[which(subData$lowLim > subData$LR), "LR"], 1)
- minY <- min(subData[subData$LR == randLR, "lowLim"])
- maxY <- max(subData[subData$LR == randLR, "upLim"])
- plot(lowLim ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2, ylim = c(minY, maxY))
- lines(upLim ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2)
- lines(LR ~ nboot, subData[subData$LR == randLR, ], type = "l", lwd = 2, col = "red")
-
- }
-
- }
-
- dev.off()
-
- }
- # Separate with lines
- png(filename=paste("convergence_bootPI_separate_FULL.png", sep=""), type="cairo",
- units="in", width=30, height=20, pointsize=12,
- res=300)
- par(mfrow=c(4,6), cex = 1.1)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- plot(lowLim ~ nboot, checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ], type = "l", col = "lightgray")
- lines(lowLim ~ nboot, lowMean[lowMean$neurType == i & lowMean$Plane == j, ], type = "l", lwd = 2)
-
- plot(upLim ~ nboot, checkFrame[checkFrame$neurType == i & checkFrame$Plane == j, ], type = "l", col = "lightgray")
- lines(upLim ~ nboot, upMean[upMean$neurType == i & upMean$Plane == j, ], type = "l", lwd = 2)
-
- }
-
- }
- dev.off()
- ```
- ####Estimate porcentual errors
- ```{r, fig.width=8, fig.height=8}
- # # Load objects
- cvPI <- list()
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
- # Whithin each Type, element 1 are erros and element 2 are random samples, training and test
- cvPI[[i]] <- readRDS(paste("ProyeccionAnalysis/proyec_boot10CVPI_", i, ".rds", sep = ""))
- }
- # Estimate and store in list
- peList <- list()
- peCount <- 1
- lrLength <- list()
- for (h in c("Type1", "Type2", "Type3", "Type4")) {
-
- for (i in 1:10) {
-
- for (j in c("XY", "XZ", "YZ")) {
-
- # Extract LR values
- lr <- cvPI[[h]][[2]][[i]][[2]]
- lr <- lr[lr$Plane == j, "LR"]
-
- for (k in 1:length(lr)) {
-
- # Divide absolute errors by LR to get porcentual errors
- pe <- cvPI[[h]][[1]][[i]][[j]][[k]] / lr[k]
-
- # Store in list
- peList[[peCount]] <- pe*100
- peCount <-peCount + 1
- }
-
- }
-
- }
-
- lrLength[h] <- length(lr)
-
- }
- lrLength <- unlist(lrLength)
- peFrame <- data.frame(pe = unlist(peList),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), times = lrLength*10*3*100),
- Plane = c(rep(rep(c("XY", "XZ", "YZ"), each = lrLength[1]*100), 10),
- rep(rep(c("XY", "XZ", "YZ"), each = lrLength[2]*100), 10),
- rep(rep(c("XY", "XZ", "YZ"), each = lrLength[3]*100), 10),
- rep(rep(c("XY", "XZ", "YZ"), each = lrLength[4]*100), 10)))
- peFrame$interact <- interaction(peFrame$neurType, peFrame$Plane, lex.order=T)
- ```
- #####Plot density and histogram
- ```{r}
- # Plot
- # peGroup <- peFrame %>% group_by(interact)
- # peSample <- sample_n(peGroup, 100)
- setwd("ProyeccionAnalysis/")
- png(filename=paste("prederrorDistributions_ols_CV.png", sep=""), type="cairo",
- units="in", width=8, height=10, pointsize=12,
- res=300)
- (p <- ggplot(peFrame, aes(x = `pe`, y = `interact`, fill = ifelse(..x.. >=-5 & ..x.. <=5, ">= -5", "< 5"))) +
- scale_fill_manual(values = c("gray70", "coral2"), name = NULL) +
- geom_density_ridges_gradient(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none") +
- xlab("% Error")) +
- xlim(c(-40,40))
- dev.off()
- png(filename=paste("prederrorHistogram_ols_CV.png", sep=""), type="cairo",
- units="in", width=8, height=10, pointsize=12,
- res=300)
- # Histogram
- # nint 300 bootPI
- # nint 500 bootPI_full
- # nint 400 bootPI_full_replace
- (h <- histogram( ~ pe | Plane + neurType, peFrame, nint = 300, type ="density", xlim = c(-50,50)))
- dev.off()
- ```
- #####Plot errors vs cumul probability
- ```{r, fig.height=5, fig.width=20}
- # Inverse quantile
- quantInv <- function(distr, value) ecdf(distr)(value)
- #ecdf plot example
- # plot(ecdf(peFrame[peFrame$interact == "Type1.XY", "pe"]))
- # lines(ecdf(peFrame[peFrame$interact == "Type1.XZ", "pe"]), col = "red")
- # lines(ecdf(peFrame[peFrame$interact == "Type1.YZ", "pe"]), col = "blue")
- probList <- list()
- binProb <- 0.5
- probSeq <- seq(binProb, 100, binProb)
- for (i in unique(peFrame$interact)) {
-
- dataProb <- peFrame[peFrame$interact == i, "pe"]
- probVec <- numeric()
-
- for (j in probSeq) {
-
- errProb <- quantInv(dataProb, j) - quantInv(dataProb, -j)
- probVec <- c(probVec, errProb)
-
- }
-
- probList[[i]] <- probVec
-
- }
- # Plot with limited axis
- setwd("ProyeccionAnalysis/")
- png(filename=paste("errorProbabilityCum_bin", binProb, "_CV.png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,1), xlim = c(0.5, 40),
- ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- abline(h = 0.95, lty = "dashed", col = "gray" )
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- # Plot with free axis
- png(filename=paste("errorProbabilityCum_bin", binProb, "_freeAxis_CV.png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
- ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- abline(h = 0.95, lty = "dashed", col = "gray" )
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- ```
- #####Random sample
- ```{r}
- size <- 0.05
- peRand <- peFrame %>% group_by(interact)
- peSample <- sample_frac(peRand, size)
- ```
- ######Plot density and histogram
- ```{r}
- setwd("ProyeccionAnalysis/")
- png(filename=paste("prederrorDistributions_ols_CV_rand", size, ".png", sep=""), type="cairo",
- units="in", width=8, height=10, pointsize=12,
- res=300)
- (p <- ggplot(peSample, aes(x = `pe`, y = `interact`, fill = ifelse(..x.. >=-5 & ..x.. <=5, ">= -5", "< 5"))) +
- scale_fill_manual(values = c("gray70", "coral2"), name = NULL) +
- geom_density_ridges_gradient(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none") +
- xlab("% Error")) +
- xlim(c(-40,40))
- dev.off()
- png(filename=paste("prederrorHistogram_ols_CV_rand", size, ".png", sep=""), type="cairo",
- units="in", width=8, height=10, pointsize=12,
- res=300)
- # Histogram
- # nint 300 bootPI
- # nint 500 bootPI_full
- # nint 400 bootPI_full_replace
- (h <- histogram( ~ pe | Plane + neurType, peSample, nint = 300, type ="density", xlim = c(-50,50)))
- dev.off()
- ```
- ######Plot errors vs cumul probability
- ```{r, fig.height=5, fig.width=20}
- # Inverse quantile
- quantInv <- function(distr, value) ecdf(distr)(value)
- peSample <- as.data.frame(peSample)
- probList <- list()
- binProb <- 0.5
- probSeq <- seq(binProb, 100, binProb)
- for (i in unique(peSample$interact)) {
-
- dataProb <- peSample[peSample$interact == i, "pe"]
- probVec <- numeric()
-
- for (j in probSeq) {
-
- errProb <- quantInv(dataProb, j) - quantInv(dataProb, -j)
- probVec <- c(probVec, errProb)
-
- }
-
- probList[[i]] <- probVec
-
- }
- # Plot with limited axis
- setwd("ProyeccionAnalysis/")
- png(filename=paste("errorProbabilityCum_bin", binProb, "_CV_rand", size, ".png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5, ylim = c(0,1), xlim = c(0.5, 40),
- ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- abline(h = 0.95, lty = "dashed", col = "gray" )
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- # Plot with free axis
- png(filename=paste("errorProbabilityCum_bin", binProb, "_freeAxis_CV_rand", size, ".png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- par(mfrow = c(1,4))
- axCount <- 1
- for (i in seq(1,10,3)) {
- plot(probSeq, probList[[i]], type = "l", col = "green", lwd = 1.5,
- ylab = "Cumulative probability", xlab = "% Error", main = paste("Axon Type", axCount))
- lines(probSeq, probList[[i + 1]], col = "red", lwd = 1.5)
- lines(probSeq, probList[[i + 2]], col = "blue", lwd = 1.5)
- abline(v = 5, lty = "dashed", col = "gray")
- abline(h = 0.95, lty = "dashed", col = "gray" )
- legend("bottomright", c("XY", "XZ", "YZ"), lwd = 1.5, col = c("green", "red", "blue"))
- axCount <- axCount + 1
- }
- dev.off()
- ```
- ####Estimate mean porcentual errors
- UNFINISHED
- ```{r, fig.width=8, fig.height=8}
- # mm <- peFrame[peFrame$Plane == "XY", "pe"]
- # mm.mat <- matrix(mm, 100, 270)
- # meanMat <- apply(mm.mat, 1, mean)
- ```
|