--- title: "Axonal Length" author: "Sergio D?ez Hermano" date: '`r format(Sys.Date(),"%e de %B, %Y")`' output: html_document: highlight: tango toc: yes toc_depth: 4 toc_float: collapsed: no --- ```{r setup, include=FALSE} require(knitr) # include this code chunk as-is to set options opts_chunk$set(comment = NA, prompt = FALSE, fig.height=5, fig.width=5, dpi=300, fig.align = "center", echo = TRUE, message = FALSE, warning = FALSE, cache=FALSE) Sys.setlocale("LC_TIME", "C") ``` ##Load libraries and functions ```{r} library(R.matlab) library(lattice) library(dplyr) library(ggplot2) library(ggridges) library(hrbrthemes) library(viridis) library(ggpubr) library(mltools) library(data.table) library(caret) library(interactions) library(performance) library(MASS) #rlm library(clickR) #rlm pvals library(geepack) library(pstools) library(MXM) library(rmcorr) library(multcomp) # https://rstudio-pubs-static.s3.amazonaws.com/297778_5fce298898d64c81a4127cf811a9d486.html ########################### # Robust sanwich variance # ########################### source("summaryRobust.R") ######################################### # Deviation from empirical variance GEE # ######################################### var.crit <- function(mymodel) sum(abs(mymodel[["geese"]]$vbeta.naiv - mymodel[["geese"]]$vbeta)) ############################ # Distances between planes # ############################ distPlan <- function(mydata) { # Vector to store distances distPlanes <- vector() # Estimate distances between planes for each subject for (i in unique(mydata$id)) { LPvals <- mydata[mydata$id == i, 3] distPlanes <- c(distPlanes, (LPvals[1] - LPvals[2])/max(LPvals[1], LPvals[2]), (LPvals[1] - LPvals[3])/max(LPvals[1], LPvals[3]), (LPvals[2] - LPvals[3])/max(LPvals[2], LPvals[3])) } # Concatenate factor with substraction information numSubjects <- length(unique(mydata$id)) subsFactor <- rep(c("XY - XZ", "XY - YZ", "YZ - XZ"), rep = numSubjects) distFrame <- data.frame(id = mydata$id, type = mydata$neurType, distPlanes = distPlanes, substraction = subsFactor) return(distFrame) } ########################## # Bootstrap coefficients # ########################## bootCoef <- function(mydata, axontype, projection, corrMat, nreps) { # Select data modData <- mydata[mydata$neurType == axontype & mydata$Plane == projection, ] # Fit the model modGee <- geeglm(LR ~ LP, data = modData, id = id, corstr = corrMat) # Ensure reproducibility # set.seed(12345) # Set up a matrix to store the results (1 intercept + 1 predictor = 2) coefmat <- matrix(NA, nreps, 2) # We need the fitted values and the residuals from the model resids <- residuals(modGee) preds <- fitted(modGee) # Repeatedly generate bootstrapped responses for(i in 1:nreps) { booty <- preds + sample(resids, rep=TRUE) modGee2 <- update(modGee, booty ~ .) coefmat[i,] <- coef(modGee2) } # Create a dataframe to store predictors values colnames(coefmat) <- names(coef(modGee2)) coefmat <- data.frame(coefmat) return(coefmat) } ############################## # Bootstrap coefficients gEE # ############################## # For models with interactions bootCoefint <- function(mydata, modForm, corrMat, nreps) { # Fit model modGee <- geeglm(formula(modForm), data = mydata, id = id, corstr = corrMat) # Ensure reproducibility # set.seed(12345) # Set up a matrix to store the results (1 intercept + 1 predictor = 2) coefmat <- matrix(NA, nreps, length(coef(modGee))) # We need the fitted values and the residuals from the model resids <- residuals(modGee) preds <- fitted(modGee) # Repeatedly generate bootstrapped responses for(i in 1:nreps) { booty <- preds + sample(resids, rep=TRUE) modGee2 <- update(modGee, booty ~ .) coefmat[i,] <- coef(modGee2) print(i) } # Create a dataframe to store predictors values colnames(coefmat) <- names(coef(modGee2)) coefmat <- data.frame(coefmat) return(coefmat) } ############################# # Bootstrap coefficients LM # ############################# # For models with interactions bootCoefintLM <- function(mydata, modForm, nreps) { # Fit model modLM <- lm(formula(modForm), data = mydata) # Ensure reproducibility set.seed(12345) # Set up a matrix to store the results (1 intercept + 1 predictor = 2) coefmat <- matrix(NA, nreps, length(coef(modLM))) # We need the fitted values and the residuals from the model resids <- residuals(modLM) preds <- fitted(modLM) # Repeatedly generate bootstrapped responses for(i in 1:nreps) { booty <- preds + sample(resids, rep=TRUE) modLM2 <- update(modLM, booty ~ .) coefmat[i,] <- coef(modLM2) print(i) } # Create a dataframe to store predictors values colnames(coefmat) <- names(coef(modLM2)) coefmat <- data.frame(coefmat) return(coefmat) } ``` ##Load data ```{r} # Get file paths axonNames <- list.files(paste("ProyeccionData/", sep=""), full.names=T) # Load matlab file axonFiles <- lapply(axonNames, function(x) readMat(x)) names(axonFiles) <- c("Type1", "Type2", "Type3", "Type4") # VAR NAMES # REAL_LENGTH, PROYECTION_LENGTH_XY, PROYECTION_LENGTH_XZ, PROYECTION_LENGTH_YZ # Extract data realLength <- lapply(axonFiles, function(x) x$REAL.LENGTH) planeXY <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XY) planeXZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XZ) planeYZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.YZ) # Get number of neurons per type numberTypes <- unlist(lapply(realLength, function(x) length(x))) # Store in data frames by plane xyData <- data.frame(id = 1:length(unlist(realLength)), LR = unlist(realLength), LP = unlist(planeXY), Plane = rep("XY", sum(numberTypes)), neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)) xzData <- data.frame(id = 1:length(unlist(realLength)), LR = unlist(realLength), LP = unlist(planeXZ), Plane = rep("XZ", sum(numberTypes)), neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)) yzData <- data.frame(id = 1:length(unlist(realLength)), LR = unlist(realLength), LP = unlist(planeYZ), Plane = rep("YZ", sum(numberTypes)), neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)) # Merge into a single data frame and sort by id allData <- do.call("rbind", list(xyData, xzData, yzData)) allData <- allData[order(allData$id),] # Add different number for every neuron-plane combination allData$NeurPlane <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]), rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4])) allData$interact <-interaction(allData$neurType, allData$Plane, lex.order=T) # Create data frame w/o Type4 dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType) # Data in short format dataShort <- data.frame(id = 1:length(unlist(realLength)), LR = unlist(realLength), XY = unlist(planeXY), XZ = unlist(planeXZ), YZ = unlist(planeYZ), neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)) ``` ##Descriptive ###Histograms ```{r, fig.width=8, fig.height=8} # Use dataShort for plotting histogram( ~ YZ + XZ + XY + LR | neurType, dataShort, type ="density") histogram( ~ LR | Plane, allData, type ="density") ``` ####GGplot2 ```{r, fig.width=8, fig.height=8} #https://www.r-graph-gallery.com/violin_grouped_ggplot2.html # New variables releveled for plotting reasons allData$NPFac <- factor(allData$NeurPlane) levels(allData$NPFac) <- paste(rep(levels(allData$neurType), each = 3), rep(levels(allData$Plane), 3), sep=".") # allData$NPFac <- factor(allData$NPFac, levels = rev(levels(allData$NPFac))) allData$typeFac <- factor(allData$neurType, levels = rev(levels(allData$neurType))) # Plot ggplot(allData, aes(x = `LP`, y = `NPFac`, fill = ..x..)) + geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) + scale_fill_viridis(name = "LP", option = "D") + labs(title = 'Length 2D Proyections') + theme_ipsum() + theme( legend.position="none", panel.spacing = unit(0.1, "lines"), strip.text.x = element_text(size = 8) ) # Plot ggplot(allData, aes(x = `LR`, y = `neurType`, fill = ..x..)) + geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) + scale_fill_viridis(name = "LP", option = "D") + labs(title = 'Length 2D Proyections') + theme_ipsum() + theme( legend.position="none", panel.spacing = unit(0.1, "lines"), strip.text.x = element_text(size = 8) ) # Plot ggplot(allData, aes(x = LP, y = NPFac, fill = neurType)) + geom_density_ridges() + theme_ridges() + theme(legend.position = "none") # All together gradient # dataShort$normLR <- dataShort$LR/dataShort$LR # dataShort$normXY <- dataShort$LR/dataShort$XY # dataShort$normXZ <- dataShort$LR/dataShort$XZ # dataShort$normYZ <- dataShort$LR/dataShort$YZ joinData <- melt(setDT(dataShort), id.vars = c("id","neurType"), variable.name = "length") joinData <- joinData[order(joinData$id), ] joinData$NeurPlane <-interaction(joinData$neurType, joinData$length, lex.order=T) png(filename=paste("gradientDistributions.png", sep=""), type="cairo", units="in", width=10, height=10, pointsize=12, res=300) ggplot(joinData, aes(x = `value`, y = `NeurPlane`, fill = ..x..)) + geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) + scale_fill_viridis(name = "LP", option = "D") + labs(title = 'Length 2D Proyections') + theme_ipsum() + theme( legend.position="none", panel.spacing = unit(0.1, "lines"), strip.text.x = element_text(size = 8) ) dev.off() # All together gradient (LOG) joinData <- melt(setDT(dataShort), id.vars = c("id","neurType"), variable.name = "length") joinData <- joinData[order(joinData$id), ] joinData$NeurPlane <- interaction(joinData$neurType, joinData$length, lex.order=T) joinData$logValue <- log(joinData$value) ggplot(joinData, aes(x = `logValue`, y = `NeurPlane`, fill = ..x..)) + geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) + scale_fill_viridis(name = "LP", option = "D") + labs(title = 'Length 2D Proyections') + theme_bw() + theme( legend.position="none", panel.spacing = unit(0.1, "lines"), strip.text.x = element_text(size = 8) ) # All together color by type ggplot(joinData, aes(x = `value`, y = `NeurPlane`, fill = neurType)) + geom_density_ridges(alpha = 0.8) + theme_ridges() + theme(legend.position = "none") ``` ####GGPlot2 Stacked ```{r, fig.width=20, fig.height=6} # All together overlapping # levels(joinData$length) <- rev(levels(joinData$length)) joinData$estim <- joinData$value*1.27 joinData[joinData$length == "LR", 6] <- joinData[joinData$length == "LR", 4] png(filename=paste("gradientStacked.png", sep=""), type="cairo", units="in", width=20, height=5, pointsize=12, res=300) ggplot(data=joinData, aes(x=value, fill=length)) + geom_density(adjust=1.5, alpha = .4) + theme_ipsum() + facet_wrap(~neurType, ncol = 4) + xlim(0, 2.5e05) + theme( legend.position="bottom", panel.spacing = unit(1, "lines"), axis.ticks.x=element_blank() ) dev.off() png(filename=paste("gradientEstimation.png", sep=""), type="cairo", units="in", width=20, height=5, pointsize=12, res=300) ggplot(data=joinData, aes(x=estim, fill=length)) + geom_density(adjust=1.5, alpha = .4) + theme_ipsum() + facet_wrap(~neurType, ncol = 4) + xlim(0, 2.5e05) + theme( legend.position="bottom", panel.spacing = unit(1, "lines"), axis.ticks.x=element_blank() ) dev.off() ``` ###Boxplot ```{r, fig.width=20, fig.height=5} # Use dataShort for plotting lrBx <- ggplot(dataShort, aes(x=neurType, y=LR, fill = neurType)) + geom_violin(trim=FALSE) + geom_boxplot(width=0.1, fill = "white") + theme_minimal() xyBx <- ggplot(dataShort, aes(x=neurType, y=XY, fill = neurType)) + geom_violin(trim=FALSE) + geom_boxplot(width=0.1, fill = "white") + theme_minimal() xzBx <- ggplot(dataShort, aes(x=neurType, y=XZ, fill = neurType)) + geom_violin(trim=FALSE) + geom_boxplot(width=0.1, fill = "white") + theme_minimal() yzBx <- ggplot(dataShort, aes(x=neurType, y=YZ, fill = neurType)) + geom_violin(trim=FALSE) + geom_boxplot(width=0.1, fill = "white") + theme_minimal() ggarrange(lrBx, xyBx, xzBx, yzBx, ncol = 4, nrow = 1) ``` ###Correlations ####LR vs LP by Neuron ```{r, fig.width=20} par(mfrow = c(1,4)) plot(LR ~ LP, data = allData[allData$neurType == "Type1", ], pch = 21, bg = "coral", main = "Type1", xlim = c(0,250000), ylim = c(0, 300000)) abline(h=1000, v=1000, col = "gray", lty = "dashed") plot(LR ~ LP, data = allData[allData$neurType == "Type2", ], pch = 21, bg = "lightblue", main = "Type2", xlim = c(0,250000), ylim = c(0, 300000)) abline(h=1000, v=1000, col = "gray", lty = "dashed") plot(LR ~ LP, data = allData[allData$neurType == "Type3", ], pch = 21, bg = "lightgreen", main = "Type3", xlim = c(0,250000), ylim = c(0, 300000)) abline(h=1000, v=1000, col = "gray", lty = "dashed") plot(LR ~ LP, data = allData[allData$neurType == "Type4", ], pch = 21, bg = "violet", main = "Type4", xlim = c(0,250000), ylim = c(0, 300000)) abline(h=1000, v=1000, col = "gray", lty = "dashed") ``` ####LR vs LP by Plane ```{r, fig.width=15} par(mfrow = c(1,3)) plot(LR ~ LP, data = allData[allData$Plane == "XY", ], pch = 21, bg = "red", main = "XY", xlim = c(0,250000), ylim = c(0, 300000)) abline(h=1000, v=1000, col = "gray", lty = "dashed") plot(LR ~ LP, data = allData[allData$Plane == "XZ", ], pch = 21, bg = "blue", main = "XZ", xlim = c(0,250000), ylim = c(0, 300000)) abline(h=1000, v=1000, col = "gray", lty = "dashed") plot(LR ~ LP, data = allData[allData$Plane == "YZ", ], pch = 21, bg = "green", main = "YZ", xlim = c(0,250000), ylim = c(0, 300000)) abline(h=1000, v=1000, col = "gray", lty = "dashed") ``` ####LR vs LP by Neuron*Plane ```{r} # Duplicate data to avoid confusion data3D <- allData # Add a number identifyng every neuron-plane combination data3D$plaNum <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]), rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4])) # Plot car::scatter3d(x = data3D$LP, y = data3D$LR, z = data3D$plaNum, group = data3D$neurType, surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE) # Plot dataXY.1v4 <- data3D[data3D$Plane == "XY" & (data3D$neurType == "Type1" | data3D$neurType == "Type4"), ] car::scatter3d(x = dataXY.1v4$LP, y = dataXY.1v4$LR, z = dataXY.1v4$plaNum, group = dataXY.1v4$neurType, surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE) ``` ####Planes distance ```{r, fig.width=20, fig.height=5} distWithin <- distPlan(allData) distWithin$Combin <- interaction(distWithin$type, distWithin$substraction, lex.order = TRUE) boxplot(distPlanes ~ substraction + type, distWithin) # Boxplot png(filename=paste("boxDistance.png", sep=""), type="cairo", units="in", width=5, height=5, pointsize=12, res=300) bx1 <- distWithin %>% ggplot( aes(x=Combin, y=distPlanes, fill=type)) + geom_boxplot() + scale_fill_viridis(discrete = TRUE, alpha=0.6) + theme_ipsum() + theme( legend.position="none", plot.title = element_text(size=11), axis.text.x = element_text(angle = 45, hjust = 1, size = 9) ) + ggtitle("Distances plane boxplot") + xlab("") bx1 dev.off() # Boxlot with jitter bx2 <- distWithin %>% ggplot( aes(x=Combin, y=distPlanes, fill=type)) + geom_boxplot() + scale_fill_viridis(discrete = TRUE, alpha=0.6) + geom_jitter(color = "black", size=0.1, alpha=0.9) + scale_color_viridis(discrete = TRUE, alpha=0.6) + theme_ipsum() + theme( legend.position="none", plot.title = element_text(size=11), axis.text.x = element_text(angle = 45, hjust = 1, size = 9) ) + ggtitle("A boxplot with jitter") + xlab("") # Vertical violin bx3 <- distWithin %>% ggplot(aes(fill=substraction, y=distPlanes, x=type)) + geom_violin(position="dodge", alpha=0.5, outlier.colour="transparent") + scale_fill_viridis(discrete=T, name="") + theme_ipsum() + xlab("") + ylab("Distance Within Types") # Horizontal violin bx4 <- distWithin %>% ggplot( aes(x=Combin, y=distPlanes, fill=type)) + geom_violin(width=2, size=0.1) + scale_fill_viridis(discrete=TRUE) + scale_color_viridis(discrete=TRUE) + theme_ipsum() + theme( legend.position="none" ) + coord_flip() + # This switch X and Y axis and allows to get the horizontal version xlab("") + ylab("Distance Within Types") ggarrange(bx1, bx2, bx3, bx4, ncol = 4, nrow = 1) ``` #####Distances ggplot ```{r, fig.width=15, fig.height=5} rest1 <- ggplot(distWithin[distWithin$substraction == "XY - XZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) + geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) + scale_fill_viridis(name = "LP", option = "D") + labs(title = 'XY - XZ') + theme_ipsum() + theme( legend.position="none", panel.spacing = unit(0.1, "lines"), strip.text.x = element_text(size = 8) ) rest2 <- ggplot(distWithin[distWithin$substraction == "XY - YZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) + geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) + scale_fill_viridis(name = "LP", option = "D") + labs(title = 'XY - YZ') + theme_ipsum() + theme( legend.position="none", panel.spacing = unit(0.1, "lines"), strip.text.x = element_text(size = 8) ) rest3 <- ggplot(distWithin[distWithin$substraction == "YZ - XZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) + geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) + scale_fill_viridis(name = "LP", option = "D") + labs(title = 'YZ - XZ') + theme_ipsum() + theme( legend.position="none", panel.spacing = unit(0.1, "lines"), strip.text.x = element_text(size = 8) ) ggarrange(rest1, rest2, rest3, ncol = 3, nrow = 1) car::scatter3d(x = distWithin[distWithin$substraction == "XY - XZ", 3], y = distWithin[distWithin$substraction == "XY - YZ", 3], z = distWithin[distWithin$substraction == "YZ - XZ", 3], group = factor(rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)), surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE) plot(x = distWithin[distWithin$substraction == "XY - XZ", 3], y = distWithin[distWithin$substraction == "XY - YZ", 3], pch = 21, bg = factor(rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))) ``` ##Empirical error ```{r, fig.width=15} # https://stackoverflow.com/questions/12394321/r-what-algorithm-does-geom-density-use-and-how-to-extract-points-equation-of # https://stackoverflow.com/questions/24985361/understanding-bandwidth-smoothing-in-ggplot2 dataEmpirical <- allData dataEmpirical$alpha <- dataEmpirical$LR/dataEmpirical$LP ####################################### # Estimate alphas and use grand alpha # ####################################### # Grand alpha dataEmpirical$error <- 100*(1 - mean(dataEmpirical$alpha)/dataEmpirical$alpha) # Plot ga <- ggplot(dataEmpirical, aes(x = `error`, y = `interact`, fill = neurType)) + geom_density_ridges(alpha = 0.8) + theme_ridges() + theme(legend.position = "none") + ggtitle("Grand Alpha") ####################################### # Estimate alphas and use local alpha # ####################################### # Find mean alpha per group meanAlpha <- aggregate(alpha ~ neurType + Plane, dataEmpirical, mean) dataEmpirical$localErr <- dataEmpirical$error # Divide by max LR per axon type and plane for (i in c("Type1", "Type2", "Type3", "Type4")) { for (j in c("XY", "XZ", "YZ")) { # Subset and divide alpha alphaVec <- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 8] localAlpha <- meanAlpha[meanAlpha$neurType == i & meanAlpha$Plane == j, 3] dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 10] <- 100*(1 - localAlpha/alphaVec) } } # Plot la <-ggplot(dataEmpirical, aes(x = `localErr`, y = `interact`, fill = neurType)) + geom_density_ridges(alpha = 0.8) + theme_ridges() + theme(legend.position = "none") + ggtitle("Local Alpha") ############### # Fixed alpha # ############### dataEmpirical$fixedError <- 100*(1 - 1.268/dataEmpirical$alpha) fa <- ggplot(dataEmpirical, aes(x = `fixedError`, y = `interact`, fill = neurType)) + geom_density_ridges(alpha = 0.8) + theme_ridges() + theme(legend.position = "none") + ggtitle("Fixed Alpha") ############## # Joint plot # ############## png(filename=paste("alphasComparison_LRbyLP.png", sep=""), type="cairo", units="in", width=15, height=6, pointsize=12, res=300) ggarrange(ga, la, fa, ncol=3, nrow=1) dev.off() ``` ###Equal Bandwidth ```{r, fig.width=15} # https://stackoverflow.com/questions/12394321/r-what-algorithm-does-geom-density-use-and-how-to-extract-points-equation-of # https://stackoverflow.com/questions/24985361/understanding-bandwidth-smoothing-in-ggplot2 dataEmpirical <- allData dataEmpirical$alpha <- dataEmpirical$LR/dataEmpirical$LP bw <- 1 ####################################### # Estimate alphas and use grand alpha # ####################################### # Grand alpha dataEmpirical$error <- 100*(1 - mean(dataEmpirical$alpha)/dataEmpirical$alpha) # Plot ga <- ggplot(dataEmpirical, aes(x = `error`, y = `interact`, fill = neurType)) + geom_density_ridges(alpha = 0.8, bandwidth = bw) + theme_ridges() + theme(legend.position = "none") + ggtitle("Grand Alpha") ####################################### # Estimate alphas and use local alpha # ####################################### # Find mean alpha per group meanAlpha <- aggregate(alpha ~ neurType + Plane, dataEmpirical, mean) dataEmpirical$localErr <- dataEmpirical$error # Divide by max LR per axon type and plane for (i in c("Type1", "Type2", "Type3", "Type4")) { for (j in c("XY", "XZ", "YZ")) { # Subset and divide alpha alphaVec <- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 8] localAlpha <- meanAlpha[meanAlpha$neurType == i & meanAlpha$Plane == j, 3] dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 10] <- 100*(1 - localAlpha/alphaVec) } } # Plot la <-ggplot(dataEmpirical, aes(x = `localErr`, y = `interact`, fill = neurType)) + geom_density_ridges(alpha = 0.8, bandwidth = bw) + theme_ridges() + theme(legend.position = "none") + ggtitle("Local Alpha") ############### # Fixed alpha # ############### dataEmpirical$fixedError <- 100*(1 - 1.268/dataEmpirical$alpha) fa <- ggplot(dataEmpirical, aes(x = `fixedError`, y = `interact`, fill = neurType)) + geom_density_ridges(alpha = 0.8, bandwidth = bw) + theme_ridges() + theme(legend.position = "none") + ggtitle("Fixed Alpha") ############## # Joint plot # ############## png(filename=paste("alphasComparison_LRbyLP_bw", bw, ".png", sep=""), type="cairo", units="in", width=15, height=6, pointsize=12, res=300) ggarrange(ga, la, fa, ncol=3, nrow=1) dev.off() ``` ###Histograms ```{r, fig.width=8, fig.height=8} # Use dataShort for plotting histogram( ~ error | Plane + neurType, dataEmpirical, nint = 200, type ="density") histogram( ~ localErr | Plane + neurType, dataEmpirical, nint = 200, type ="density") histogram( ~ fixedError | Plane + neurType, dataEmpirical, nint = 200, type ="density") ``` ###Error vs LP ```{r} ## Create cuts: x_c <- cut(dataEmpirical$LP, 50) y_c <- cut(dataEmpirical$fixedError, 50) ## Calculate joint counts at cut levels: z <- table(x_c, y_c) ## Plot as a 3D histogram: plot3D::hist3D(dataEmpirical$LP, dataEmpirical$fixedError, border="black") ## Plot as a 2D heatmap: plot3D::image2D(z=z, border="black") gplots::hist2d(dataEmpirical[, c(3,11)], nbins=50) ``` ##OLS ```{r, fig.width=15, fig.height=10} ############## # Fit models # ############## # mod3 <- lm(LR ~ LP*neurType*Plane, data = allData[allData$neurType != "Type4", ]) mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData[allData$neurType != "Type4", ]) mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData) mod2 <- lm(LR ~ LP*neurType + LP*Plane + neurType*Plane, data = allData) mod1 <- lm(LR ~ LP + neurType + Plane, data = allData) # Summmaries and anovas anova(mod3, mod2) # 3rd level interaction seems significant summary(mod3) car::Anova(mod3) # RLS2 robust variance summaryRobust(mod3, robust = TRUE) # Do some plotting to check lines interact_plot(model = mod3, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ")) interact_plot(model = mod3, pred = LP, modx = neurType, mod2 = Plane, plot.points = TRUE, interval = T, modx.labels = c("Type1", "Type2", "Type3", "Type4"), mod2.labels = c("XY", "XZ", "YZ")) # Pruebas mod3.XY <- lm(LR ~ LP*neurType - 1, data = allData[allData$Plane == "XY" & allData$neurType != "Type4", ]) car::Anova(mod3.XY) summary(mod3.XY) ####### # NAs # ####### # Test that NAs happen, when no intercept or main effects are present, for the dummy factor level that is not used as reference because one level is a linear combination of the others mod.Intercept <- lm(LR ~ LP + LP:neurType + LP:Plane + neurType:Plane + LP:neurType:Plane, data = allData) modNA.neur4 <- lm(LR ~ LP + LP:neurType + LP:Plane + neurType:Plane + LP:neurType:Plane - 1, data = allData) modNA.planYZ <- lm(LR ~ LP + LP:Plane + LP:neurType + neurType:Plane + LP:Plane:neurType - 1, data = allData) summary(modNA.neur4) # Do some plotting to check lines interact_plot(model = mod.Intercept, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ")) interact_plot(model = modNA.neur4, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ")) ``` ###Diagnosis ```{r, fig.width=15, fig.height=10} # Diagnosis par(mfrow = c(2, 3)) plot(mod3, 1:5) abline(h=c(-3,3), col="gray", lty="dashed") ############################ # Discard by Cook distance # ############################ # Augment model with diagnosis mod3.diag.metrics <- broom::augment(mod3) # Find observations that are highly influential cookDist <- mod3.diag.metrics[mod3.diag.metrics$.cooksd > 0.02, ] # Find rows and IDs of those observations to eliminate its participation in all planes remRows <- sapply(cookDist$.rownames, function(x) which(rownames(allData) == x)) remIDs <- sapply(cookDist$.rownames, function(x) allData[which(rownames(allData) == x), 1]) remRowsIDs <- as.vector(sapply(unique(remIDs), function(x) which(allData$id == x))) # Discard highly influential observations allData2 <- allData[-remRowsIDs, ] # Refit mod3.2 <- lm(LR ~ LP + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData2) summary(mod3.2) # Diagnosis par(mfrow = c(2, 3)) plot(mod3.2, 1:5) abline(h=c(-3,3), col="gray", lty="dashed") ############################ # Discard by Strd Residual # ############################ # Augment model with diagnosis mod3.diag.metrics <- broom::augment(mod3) # Find observations that are highly influential stdResid <- mod3.diag.metrics[mod3.diag.metrics$.std.resid > 3, ] # Find rows and IDs of those observations to eliminate its participation in all planes remRows <- sapply(stdResid$.rownames, function(x) which(rownames(allData) == x)) remIDs <- sapply(stdResid$.rownames, function(x) allData[which(rownames(allData) == x), 1]) remRowsIDs <- as.vector(sapply(unique(remIDs), function(x) which(allData$id == x))) # Discard highly influential observations allData3 <- allData[-remRowsIDs, ] # Refit mod3.3 <- lm(LR ~ LP*neurType*Plane, data = allData3) summary(mod3.3) # Diagnosis par(mfrow = c(2, 3)) plot(mod3.3, 1:5) abline(h=c(-3,3), col="gray", lty="dashed") #################### # Discard outliers # #################### # Use multivariate Mahalanobis distance and add to allData mahalDist <- by(allData, allData$neurType, function(x) mahalanobis(x[,2:3], colMeans(x[,2:3]), cov(x[,2:3]))) allData$mahalDist <- unlist(mahalDist) # Sort from higher from lower Mahalanobis distance per neuron type mahalSort <- by(allData, allData$neurType, function(x) x[order(x$mahalDist, decreasing = TRUE),]) # Store as data frame mahalSort <- do.call("rbind", mahalSort) # Discard observations with Mahalanobis distance >= 30 allData4 <- mahalSort[mahalSort$mahalDist < 20, ] # Refit mod3.4 <- lm(LR ~ LP*neurType*Plane, data = allData4) summary(mod3.4) # Diagnosis par(mfrow = c(2, 3)) plot(mod3.4, 1:5) abline(h=c(-3,3), col="gray", lty="dashed") # 3D PLOT Add a number identifyng every neuron-plane combination data3D$mahalDist <- unlist(mahalDist) data3Dmahal <- data3D[data3D$mahalDist < 20, ] rgl::open3d() car::scatter3d(x = data3Dmahal$LP, y = data3Dmahal$LR, z = data3Dmahal$plaNum, group = data3Dmahal$neurType, surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE) # Do some plotting to check lines interact_plot(model = mod3.4, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ")) ########################### # Discard outliers by iqr # ########################### # https://easystats.github.io/performance/reference/check_outliers.html iqr <- by(allData[,2:3], attributes(check_outliers(mod3, method = "mahalanobis"))) mod3.5 <- update(mod3, .~., data = allData[-as.numeric(which(iqr$data$Outlier == TRUE)), ]) summary(mod3.5) # Diagnosis par(mfrow = c(2, 3)) plot(mod3.5, 1:5) abline(h=c(-3,3), col="gray", lty="dashed") # 3D PLOT Add a number identifyng every neuron-plane combination data3Diqr <- data3D[-as.numeric(which(iqr$data$Outlier == TRUE)), ] rgl::open3d() car::scatter3d(x = data3Diqr$LP, y = data3Diqr$LR, z = data3Diqr$plaNum, group = data3Diqr$neurType, surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE) # Do some plotting to check lines interact_plot(model = mod3.5, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ")) ``` ###One hot coding ```{r, fig.width=8, fig.height=8} onehotData <- one_hot(as.data.table(allData)) # Fit models mod1 <- lm(LR ~ LP + neurType_Type2 + neurType_Type3 + neurType_Type4 + Plane_XZ + Plane_YZ + LP:neurType_Type2 + LP:neurType_Type3, data = onehotData) # Summmaries and anovas summary(mod1) # Diagnosis par(mfrow = c(2, 2)) plot(mod1) ``` ###Partial regression ```{r} # http://www.colbyimaging.com/wiki/statistics/multiple-regression lm.lr <- lm(LR ~ neurType*Plane, data = allData) lm.lp <- lm(LP ~ neurType*Plane, data = allData) lm.Res <- lm(lm.lr$res ~ lm.lp$res:as.factor(allData$NeurPlane) + 0) summary(lm.Res) ``` ###Fixed/variable intercept/slopes ```{r} ############################ # Allow variable intercept # ############################ modIncp.neur <- lm(LR ~ LP + neurType, data = allData) modIncp.plane <- lm(LR ~ LP + Plane, data = allData) modIncp <- lm(LR ~ LP + neurType + Plane, data = allData) summary(modIncp.neur) summary(modIncp.plane) summary(modIncp) aggregate(LR ~ neurType, allData, mean) aggregate(LP ~ neurType, allData, mean) aggregate(LR ~ Plane, allData, mean) ############################ # Allow variable slope # ############################ modSlope.neur <- lm(LR ~ LP + LP:neurType, data = allData) modSlope.plane <- lm(LR ~ LP + LP:Plane, data = allData) modSlope <- lm(LR ~ LP + LP:neurType + LP:Plane, data = allData) summary(modSlope.neur) summary(modSlope.plane) summary(modSlope) ``` ###Contr sum ```{r} dataSum <- allData contr.sum(5) contrasts(dataSum$neurType) <- contr.sum(4) contrasts(dataSum$Plane) <- contr.sum(3) mod.Sum <- lm(LR ~ LP + LP:neurType + LP:Plane + LP:neurType:Plane, data = dataSum) summary(mod.Sum) ``` ```{r} nT <- allData$neurType p <- allData$Plane l <- allData$LP lr <- allData$LR X0 <- model.matrix(~ nT + p, contrasts.arg = list(nT = contr.treatment(n = 4, contrasts = FALSE), p = contr.treatment(n = 3, contrasts = FALSE))) f <- sample(gl(3, 4, labels = letters[1:3])) unname( rowSums(X0[, c("nT1", "nT2", "nT3", "nT4")]) ) unname( rowSums(X0[, c("p1", "p2", "p3")]) ) qr(X0)$rank summary(lm(lr ~ l + X0 - 1)) ``` ##RLS1 ```{r, fig.width=15, fig.height=10} # Weight observations by residual size # https://stats.idre.ucla.edu/r/dae/robust-regression/ # mod3 <- lm(LR ~ LP*neurType*Plane, data = allData[allData$neurType != "Type4", ]) # mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData[allData$neurType != "Type4", ]) mod3R <- rlm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData) mod2R <- rlm(LR ~ LP*neurType + LP*Plane + neurType*Plane - 1, data = allData) mod1R <- rlm(LR ~ LP + neurType + Plane - 1, data = allData) # Summmaries and anovas (sumMod <- summary(mod3R)) # Get which coefficients are significative, although it may not be correct to use t distribution for H0 in RLS # https://stat.ethz.ch/pipermail/r-help/2006-July/108659.html attributes(sumMod$coefficients)$dimnames[[1]][which(rob.pvals(mod3R) < 0.05)] car::Anova(mod3R) # Do some plotting to check lines interact_plot(model = mod3R, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T, mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ")) interact_plot(model = mod3R, pred = LP, modx = neurType, mod2 = Plane, plot.points = TRUE, interval = T, modx.labels = c("Type1", "Type2", "Type3", "Type4"), mod2.labels = c("XY", "XZ", "YZ")) ``` ##RLS2 ```{r} # Robust variance and clustering mod3 <- estimatr::lm_robust(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData, clusters = id) summary(mod3) ``` ###Diagnosis ```{r} # Diagnosis par(mfrow = c(2, 3)) plot(mod3R, 1:5) abline(h=c(-3,3), col="gray", lty="dashed") ``` ##GEE ```{r, fig.width=15, fig.height=10} # Fit models strucor <- "independence" strucor <- "exchangeable" dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType) mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataNo4, id = id, corstr = strucor) # mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData, id = id, corstr = strucor) mod2Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane - 1, data = dataNo4, id = id) mod1Gee <- geeglm(LR ~ LP + neurType + Plane - 1, data = dataNo4, id = id) # Summmaries and anovas coef(mod3Gee) anova(mod3Gee, mod2Gee, test = "F") summary(mod3Gee) anova(mod3Gee, test = "Chi") # Varriable selection gee_stepper(mod1Gee, formula(mod3Gee)) # also needs strucor ``` ###Change ref level ```{r} # Change reference level dataNo4$neurType <- relevel(dataNo4$neurType, ref="Type2") dataNo4$Plane <- relevel(dataNo4$Plane, ref="YZ") mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataNo4, id = id, corstr = strucor) summary(mod3Gee) ``` ###Check correlation matrix We can also use the naive and robust variance estimates to select a correlation model. The model whose robust variance estimates most closely resembles its naive variance estimates is the better correlation model. To obtain a single summary statistic for this comparison I use the entire parameter covariance matrix and sum the absolute differences between the naive and robust covariance matrices. Link: https://sakai.unc.edu/access/content/group/2842013b-58f5-4453-aa8d-3e01bacbfc3d/public/Ecol562_Spring2012/docs/lectures/lecture23.htm#checking ```{r} # Select data datToMod <- allData # Check empirical correlation matrix mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod) xyRes <- mod3$residuals[datToMod$Plane == "XY"] xzRes <- mod3$residuals[datToMod$Plane == "XZ"] yzRes <- mod3$residuals[datToMod$Plane == "YZ"] Hmisc::rcorr(cbind(xyRes, xzRes, yzRes)) # Fit models with differente corr mat mod3Gee.In <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod, id = id, corstr = "independence") mod3Gee.Ex <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod, id = id, corstr = "exchangeable") mod3Gee.Ar <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod, id = id, corstr = "ar1") mod3Gee.Un <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod, id = id, corstr = "unstructured") # Deviation from naive variance sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit) # QIC criteria sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x)) # Check multiple comparisons K1 <- glht(mod3Gee.Ex, mcp(neurType = "Tukey"))$linfct K2 <- glht(mod3Gee.Ex, mcp(Plane = "Tukey"))$linfct summary(glht(mod3Gee.Ex, linfct = rbind(K1, K2))) mod3Gee.check <- geeglm(LR ~ LP + interact + LP:interact, data = datToMod, id = id, corstr = "exchangeable") summary(glht(mod3Gee.check, linfct = mcp(interact = "Tukey"))) # Calculate means meanCalc <- aggregate(LR ~ neurType + Plane, data = allData, mean) summary(geeglm(LR ~ LP + neurType + Plane, data = datToMod, id = id, corstr = "exchangeable")) ``` ###log transformation ```{r} # Select data datToMod <- dataNo4 datToMod$logLP <- log(datToMod$LP) datToMod$logLR <- log(datToMod$LR) # Fit models with differente corr mat mod3Gee.In <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1, data = datToMod, id = id, corstr = "independence") mod3Gee.Ex <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1, data = datToMod, id = id, corstr = "exchangeable") mod3Gee.Ar <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1, data = datToMod, id = id, corstr = "ar1") mod3Gee.Un <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1, data = datToMod, id = id, corstr = "unstructured") # Deviation from naive variance sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit) # QIC criteria sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x)) # Check the best option summary(mod3Gee.Ex) anova(mod3Gee.Ex) ``` ###Standarize ```{r} # List order is: Type1XY, Type2XY, Type3XY, Type1XZ... LPstand <- by(dataNo4$LP, list(dataNo4$neurType, dataNo4$Plane), scale) LRstand <- by(dataNo4$LR, list(dataNo4$neurType, dataNo4$Plane), scale) # Number of observations numObs <- unlist(lapply(LPstand[1:3], function(x) length(x))) # Create data frame dataStand <- data.frame(id = rep(1:sum(numObs), 3), LR = unlist(LRstand), LP = unlist(LPstand), Plane = rep(c("XY", "XZ", "YZ"), each = sum(numObs)), neurType = rep(rep(c("Type1", "Type2", "Type3"), times = numObs), 3)) dataStand <- dataStand[order(dataStand$id), ] dataStand$interact <- interaction(dataStand$neurType, dataStand$Plane, lex.order=T) # Plot ggplot(dataStand, aes(x = `LP`, y = `interact`, fill = neurType)) + geom_density_ridges(alpha = 0.8) + theme_ridges() + theme(legend.position = "none") # Fit Gee # dataStand$neurType <- relevel(dataNo4$neurType, ref="Type1") # dataStand$Plane <- relevel(dataNo4$Plane, ref="XY") standGee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataStand, id = id, corstr = "exchangeable") standMain <- geeglm(LR ~ LP + neurType + Plane - 1, data = dataStand, id = id, corstr = "exchangeable") summary(standGee) anova(standGee) anova(standGee, standMain) ``` ####Bootstrap ```{r, fig.width=10, fig.height=5} # Bootstrap for inference # Ensure reproducibility # set.seed(12345) # bootInter <- bootCoefint(dataStand, fullMod = T, "exchangeable", 500) # saveRDS(bootInter, "bootCoefsInteraction.rds") # Store as data frame options(digits=10) bootFrame <- data.frame(Type1XY = bootInter$LP, Type1XZ = bootInter$LP + bootInter$LP.PlaneXZ, Type1YZ = bootInter$LP + bootInter$LP.PlaneYZ, Type2XY = bootInter$LP + bootInter$LP.neurTypeType2, Type2XZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneXZ, Type2YZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneYZ, Type3XY = bootInter$LP + bootInter$LP.neurTypeType3, Type3XZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneXZ, Type3YZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneYZ) bootMelt <- reshape2::melt(bootFrame) bootMelt$neurType <- rep(c("Type1", "Type2", "Type3"), each = 1500) bootMelt$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = 500), 3) #Represent LP ggNeur <- ggplot(data=bootMelt, aes(x=value, fill=Plane)) + geom_density(adjust=1.5, alpha = .4) + theme_bw() + facet_wrap(~neurType, ncol = 3) + theme( legend.position="bottom", panel.spacing = unit(1, "lines"), axis.ticks.x=element_blank() ) ggPlanes <- ggplot(data=bootMelt, aes(x=value, fill=neurType)) + geom_density(adjust=1.5, alpha = .4) + theme_bw() + facet_wrap(~Plane, ncol = 3) + theme( legend.position="bottom", panel.spacing = unit(1, "lines"), axis.ticks.x=element_blank() ) ggarrange(ggNeur, ggPlanes, ncol = 2, nrow = 1) ``` #####CI95 plot ```{r} # Extract the quantiles we need quant <- apply(bootFrame, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7)) quantFrame <- reshape2::melt(quant) quantFrame$y <- rep(1:9, each=3) # Plot CI95 plot(0, type="n", xlim=c(0.9,1.1), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7) ytick <- seq(1, 9, by=1) axis(side=2, at=ytick, labels = FALSE) text(par("usr")[1], ytick, labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7) for (i in seq(1, 27,3)) { segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4]) points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red") } abline(v=c(1.268), lty = "dashed", col = "gray") text(x=1.2765, y=10.1, "1.268",col="gray", font=2, cex=0.6) ``` ###Divide max LR ```{r} # Find max LR maxLR <- aggregate(LR ~ neurType + Plane, dataNo4, max) dataMax <- dataNo4 # Divide by max LR per axon type and plane for (i in c("Type1", "Type2", "Type3")) { for (j in c("XY", "XZ", "YZ")) { # Subset and divide LR lr <- dataMax[dataMax$neurType == i & dataMax$Plane == j, 2] dataMax[dataMax$neurType == i & dataMax$Plane == j, 2] <- lr/maxLR[maxLR$neurType == i & maxLR$Plane == j, 3] # Subset and divide LP lp <- dataMax[dataMax$neurType == i & dataMax$Plane == j, 3] dataMax[dataMax$neurType == i & dataMax$Plane == j, 3] <- lp/maxLR[maxLR$neurType == i & maxLR$Plane == j, 3] } } # Plot ggplot(dataMax, aes(x = `LP`, y = `interact`, fill = neurType)) + geom_density_ridges(alpha = 0.8) + theme_ridges() + theme(legend.position = "none") # Fit GEE maxGee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataMax, id = id, corstr = "exchangeable") summary(maxGee) anova(maxGee) ``` ###neurType*Plane included ```{r} # Select data datToMod <- dataNo4 contrasts(datToMod$neurType) <- contr.treatment contrasts(datToMod$Plane) <- contr.treatment # Fit models with differente corr mat mod3Gee.In <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "independence") mod3Gee.Ex <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "exchangeable") mod3Gee.Ar <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "ar1") mod3Gee.Un <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "unstructured") # Deviation from naive variance sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit) # QIC criteria sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x)) # Exchangeable seems the best option mod3Gee.Ex.Int <- geeglm(LR ~ LP*interact, data = datToMod, id = id, corstr = "exchangeable") # Multiple comparisons FWER comparMult <- glht(mod3Gee.Ex.Int, linfct = mcp(interact = "Tukey")) summary(comparMult) # Multiple comparisons FDR summary(comparMult, adjust = "bonferroni") # Variable selection mod1Gee <- geeglm(LR ~ 1, data = datToMod, id = id, corstr = "exchangeable") gee_stepper(mod1Gee, formula(mod3Gee.Ex)) summary(mod3Gee.Ex) anova(mod3Gee.Ex) ``` ####log transformation ```{r} # Select data datToMod <- dataNo4 datToMod$interact <- factor(datToMod$interact) datToMod$logLP <- log(datToMod$LP) datToMod$logLR <- log(datToMod$LR) contrasts(datToMod$neurType) <- contr.treatment contrasts(datToMod$Plane) <- contr.treatment # Fit models with differente corr mat mod3Gee.In <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "independence") mod3Gee.Ex <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "exchangeable") mod3Gee.Ar <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "ar1") mod3Gee.Un <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "unstructured") # Deviation from naive variance sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit) # QIC criteria sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x)) # Check the best option summary(mod3Gee.Ex) ``` ####1/(y^2) ```{r} # Select data datToMod <- allData datToMod$interact <- factor(datToMod$interact) datToMod$wei <- 1/((datToMod$LR)^2) contrasts(datToMod$neurType) <- contr.treatment contrasts(datToMod$Plane) <- contr.treatment # Fit models with differente corr mat mod3Gee.In <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "independence") mod3Gee.Ex <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "exchangeable") mod3Gee.Ar <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "ar1") mod3Gee.Un <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "unstructured") # Deviation from naive variance sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit) # QIC criteria sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x)) # Check the best option summary(mod3Gee.Un) ``` ##Bootstrapped coefficients ```{r} # Store bootstrapped coefficients in an empty list listBoot <- list() count <- 1 reps <- 4000 # Iterate through all axon types and planes # Ensure reproducibility # set.seed(12345) for (i in c("Type1", "Type2", "Type3", "Type4")) { for (j in c("XY", "XZ", "YZ")) { listBoot[[count]] <- bootCoef(allData, i, j, "exchangeable", reps) count <- count + 1 } } # saveRDS(listBoot, "bootCoefs400reps.rds") # Store as data frame bootFrame <- do.call(rbind, listBoot) bootFrame$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), each = reps*3) bootFrame$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = reps), 4) bootFrame$NeurPlane <- interaction(bootFrame$neurType, bootFrame$Plane, lex.order=T) # Extract the quantiles we need listQuant <- list() count <- 1 quantToEst <- c(0.025,0.05,0.95,0.975) # Iterate through all axon types and planes for (i in c("Type1", "Type2", "Type3", "Type4")) { for (j in c("XY", "XZ", "YZ")) { listQuant[[count]] <- quantile(bootFrame[bootFrame$neurType == i & bootFrame$Plane == j, 2], c(0.025,0.05,0.95,0.975)) count <- count + 1 } } # Store quantiles in data frame quantMat <- as.data.frame(do.call(rbind, listQuant)) quantMat$NeurPlane <- factor(paste(rep(c("Type1", "Type2", "Type3", "Type4"), each = 3), rep(c("XY", "XZ", "YZ"), each = 4), sep=".")) quantFrame <- reshape2::melt(quantMat) ``` ```{r} #Represent the CI ggplot(data=bootFrame, aes(x=LP, fill=Plane)) + geom_density(adjust=1.5, alpha = .4) + theme_bw() + facet_wrap(~neurType, ncol = 4) + theme( legend.position="bottom", panel.spacing = unit(1, "lines"), axis.ticks.x=element_blank() ) ``` ###Simulate experimental noise ####Check intra-cluster variance ```{r} intraVar <- aggregate(LP ~ id, data = allData, function(x) sd(x)/mean(x)) intraVar$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes) png(filename=paste("intraclusterCV.png", sep=""), type="cairo", units="in", width=5, height=5, pointsize=12, res=300) intraVar %>% ggplot( aes(x=neurType, y=LP, fill=neurType)) + geom_boxplot() + scale_fill_viridis(discrete = TRUE, alpha=0.6) + theme_bw() + theme( legend.position="none", plot.title = element_text(size=11), axis.text.x = element_text(angle = 45, hjust = 1, size = 9) ) + xlab("Axonal type") + ylab("Intracluster Variance") dev.off() ``` ####Add normal noise ```{r} set.seed(12345) # type1Noise <- rnorm() ``` ##Figure scripts OLD ####Intracluster variance Same chunk as "Check intra-cluster variance" within "Bootstrapped coefficients" ```{r} intraVar <- aggregate(LP ~ id, data = allData, function(x) sd(x)/mean(x)) intraVar$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes) #Ggplot png(filename=paste("intraclusterCV.png", sep=""), type="cairo", units="in", width=5, height=5, pointsize=12, res=300) intraVar %>% ggplot( aes(x=neurType, y=LP, fill=neurType)) + geom_boxplot() + scale_fill_viridis(discrete = TRUE, alpha=0.6) + theme_bw() + theme( legend.position="none", plot.title = element_text(size=11), axis.text.x = element_text(angle = 45, hjust = 1, size = 9) ) + xlab("Axonal type") + ylab("Intracluster Variance") dev.off() #Base R png(filename=paste("intraclusterCV_baseR.png", sep=""), type="cairo", units="in", width=5, height=5, pointsize=12, res=300) boxplot(LP ~ neurType, data = intraVar, col= c("violet", "lightblue", "lightgreen", "tan1"), outpch = 19, outbg = "black", outcol = "black", cex = .7, ylim = c(0,0.4)) dev.off() ``` ###Inference #####Original model bootstrap Same chunk as "Check correlation matrix" within "GEE" ```{r, fig.width=10, fig.height=5} # Bootstrap for inference # Ensure reproducibility # set.seed(12345) # bootInter <- bootCoefint(dataNo4, "LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1", "exchangeable", 2000) # saveRDS(bootInter, "bootCoefsInteraction.rds") bootInter <- readRDS("bootCoefsInteraction.rds") # Store as data frame options(digits=10) bootFrame <- data.frame(Type1XY = bootInter$LP, Type1XZ = bootInter$LP + bootInter$LP.PlaneXZ, Type1YZ = bootInter$LP + bootInter$LP.PlaneYZ, Type2XY = bootInter$LP + bootInter$LP.neurTypeType2, Type2XZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneXZ, Type2YZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneYZ, Type3XY = bootInter$LP + bootInter$LP.neurTypeType3, Type3XZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneXZ, Type3YZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneYZ) bootMelt <- reshape2::melt(bootFrame) bootMelt$neurType <- rep(c("Type1", "Type2", "Type3"), each = 6000) bootMelt$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = 2000), 3) #Represent LP ggNeur <- ggplot(data=bootMelt, aes(x=value, fill=Plane)) + geom_density(adjust=1.5, alpha = .4) + theme_bw() + facet_wrap(~neurType, ncol = 3) + theme( legend.position="bottom", panel.spacing = unit(1, "lines"), axis.ticks.x=element_blank() ) ggPlanes <- ggplot(data=bootMelt, aes(x=value, fill=neurType)) + geom_density(adjust=1.5, alpha = .4) + theme_bw() + facet_wrap(~Plane, ncol = 3) + theme( legend.position="bottom", panel.spacing = unit(1, "lines"), axis.ticks.x=element_blank() ) png(filename=paste("bootstrapLP_neurvsplane.png", sep=""), type="cairo", units="in", width=10, height=3, pointsize=12, res=300) ggarrange(ggNeur, ggPlanes, ncol = 2, nrow = 1) dev.off() ``` #####CI95 plot ```{r} # Extract the quantiles we need quant <- apply(bootFrame, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7)) quantFrame <- reshape2::melt(quant) quantFrame$y <- rep(1:9, each=3) # Plot CI95 png(filename=paste("ci95.png", sep=""), type="cairo", units="in", width=5, height=5, pointsize=12, res=300) plot(0, type="n", xlim=c(1.20,1.35), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7) ytick <- seq(1, 9, by=1) axis(side=2, at=ytick, labels = FALSE) text(par("usr")[1], ytick, labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7) for (i in seq(1, 27,3)) { segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4]) points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red") } abline(v=c(1.268), lty = "dashed", col = "gray") text(x=1.2765, y=10.1, "1.268",col="gray", font=2, cex=0.6) dev.off() # Zoom to check whether 2 and 7 overlap or not png(filename=paste("ci95zoom.png", sep=""), type="cairo", units="in", width=5, height=5, pointsize=12, res=300) plot(0,type="n", xlim=c(1.26,1.27), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7) ytick<-seq(1, 9, by=1) axis(side=2, at=ytick, labels = FALSE) text(par("usr")[1], ytick, labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7) for (i in seq(1, 27,3)) { segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4]) points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red") } abline(v=c(1.268), lty = "dashed", col = "gray") dev.off() ``` #####Global LP ```{r} # Grand LP by sum of bootstrapped coeffients globalLP <- data.frame(LP = bootInter$LP + bootInter$LP.PlaneXZ + bootInter$LP.PlaneYZ + bootInter$LP.neurTypeType2 + bootInter$LP.neurTypeType2.PlaneXZ + bootInter$LP.neurTypeType2.PlaneYZ + bootInter$LP.neurTypeType3 + bootInter$LP.neurTypeType3.PlaneXZ + bootInter$LP.neurTypeType3.PlaneYZ) globalQuant <- quantile(globalLP$LP, c(0.025, 0.5, 0.975), type=7) sumLP <- ggplot(data=globalLP, aes(x=LP)) + geom_density(adjust=1.5, alpha = .4) + geom_vline(xintercept = globalQuant, linetype="dotted", color = "blue") + theme_bw() + ggtitle("Sum LP") + theme( legend.position="bottom", panel.spacing = unit(1, "lines"), axis.ticks.x=element_blank() ) # Grand LP by bootstrapping # Ensure reproducibility # set.seed(12345) # bootLP <- bootCoefint(dataNo4, "LR ~ LP - 1", "exchangeable", 2000) bootLPQuant <- quantile(bootLP$LP, c(0.025, 0.5, 0.975), type=7) bootLP <- ggplot(data=bootLP, aes(x=LP)) + geom_density(adjust=1.5, alpha = .4) + geom_vline(xintercept = bootLPQuant, linetype="dotted", color = "blue") + theme_bw() + ggtitle("Boot LP") + theme( legend.position="bottom", panel.spacing = unit(1, "lines"), axis.ticks.x=element_blank() ) png(filename=paste("LPestimations.png", sep=""), type="cairo", units="in", width=10, height=5, pointsize=12, res=300) ggarrange(sumLP, bootLP, ncol = 2, nrow = 1) dev.off() ``` ##Figure scripts NEW ###Estimate alpha per neuron type ```{r} # Alpha per neurType options(digits = 10) ####### # GEE # ####### # Check correlation matrix geeNeur.in <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "independence") geeNeur.ex <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "exchangeable") geeNeur.ar <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "ar1") geeNeur.un <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "unstructured") # Deviation from naive variance sapply(list(geeNeur.in, geeNeur.ex, geeNeur.ar, geeNeur.un), var.crit) # Bootstrap # Ensure reproducibility # set.seed(12345) # bootNeur <- bootCoefint(allData, "LR ~ LP:neurType - 1", "exchangeable", 2000) # saveRDS(bootNeur, "bootCoefs_perNeuron.rds") # bootNeur <- readRDS("bootCoefs_perNeuron.rds") # colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4") ####### # OLS # ####### # Bootstrap # Ensure reproducibility # set.seed(12345) # bootNeur <- bootCoefintLM(allData, "LR ~ LP:neurType - 1", 2000) # saveRDS(bootNeur, "bootCoefs_perNeuron_OLS.rds") bootNeur <- readRDS("ProyeccionAnalysis/bootCoefs_perNeuron_OLS.rds") colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4") # Get R2 value summary(lm(LR ~ LP:neurType - 1, allData)) ``` ####CI95 plot ```{r} # Extract the quantiles we need quant <- apply(bootNeur, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7)) quantFrame <- reshape2::melt(quant) quantFrame$y <- rep(1:4, each=3) # Plot CI95 png(filename=paste("ci95_perNeuron_OLS.png", sep=""), type="cairo", units="in", width=5, height=5, pointsize=12, res=300) plot(0, type="n", xlim=c(1.265, 1.293), ylim=c(0.5,4.5), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7) ytick <- seq(1, 4, by=1) axis(side=2, at=ytick, labels = FALSE) text(par("usr")[1], ytick, labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7) for (i in seq(1, 12,3)) { segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4]) points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red") } rect(quantFrame[quantFrame$Var1 == "2.5%" & quantFrame$Var2 == "Type4", 3], -1, quantFrame[quantFrame$Var1 == "97.5%" & quantFrame$Var2 == "Type3", 3], 6, col = rgb(0.5,0.5,0.5,1/4), lty = "dashed", border = "darkgray") dev.off() ``` ####Line with shades ```{r, fig.width=8, fig.height=8} png(filename=paste("lines_perNeuron_OLS.png", sep=""), type="cairo", units="in", width=5, height=5, pointsize=12, res=300) op <- par(mfrow = c(2,2), oma = c(0,0,0,0) + 0.1, mar = c(0,0,1,1) + 0.1) for (i in (1:4)) { # Get axis range xRange <- allData[allData$neurType == paste("Type", i, sep=""), 3] yRange <- allData[allData$neurType == paste("Type", i, sep=""), 2] # Get max, mean and min alphas alphaVec <- c(max(bootNeur[,i]), median(bootNeur[,i]), min(bootNeur[,1])) # Plot observations, mean line and shade with span of all booted lines plot(LR ~ LP, data = allData[allData$neurType == paste("Type", i, sep=""), ], pch = 21, cex = 0.5, bg = "gray", col = NULL, xlab = "Projected 2D Length", ylab = "Real 3D Length", xaxt="n", yaxt = "n") lines(xRange, alphaVec[2]*xRange, col = "red", lwd = 0.5) polygon(x = c(min(xRange), min(xRange), max(xRange), max(xRange)), y = c(min(alphaVec[3]*xRange), min(alphaVec[1]*xRange), max(alphaVec[3]*xRange), max(alphaVec[1]*xRange)), col = rgb(1,0,0, alpha = 0.3), border = NA) # plotrix::corner.label(x = -1, y = 1, paste("Type", i, sep="")) } par(op) dev.off() ```