1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804 |
- ---
- title: "Axonal Length"
- author: "Sergio D?ez Hermano"
- date: '`r format(Sys.Date(),"%e de %B, %Y")`'
- output:
- html_document:
- highlight: tango
- toc: yes
- toc_depth: 4
- toc_float:
- collapsed: no
- ---
- ```{r setup, include=FALSE}
- require(knitr)
- # include this code chunk as-is to set options
- opts_chunk$set(comment = NA, prompt = FALSE, fig.height=5, fig.width=5, dpi=300, fig.align = "center", echo = TRUE, message = FALSE, warning = FALSE, cache=FALSE)
- Sys.setlocale("LC_TIME", "C")
- ```
- ##Load libraries and functions
- ```{r}
- library(R.matlab)
- library(lattice)
- library(dplyr)
- library(ggplot2)
- library(ggridges)
- library(hrbrthemes)
- library(viridis)
- library(ggpubr)
- library(mltools)
- library(data.table)
- library(caret)
- library(interactions)
- library(performance)
- library(MASS) #rlm
- library(clickR) #rlm pvals
- library(geepack)
- library(pstools)
- library(MXM)
- library(rmcorr)
- library(multcomp)
- # https://rstudio-pubs-static.s3.amazonaws.com/297778_5fce298898d64c81a4127cf811a9d486.html
- ###########################
- # Robust sanwich variance #
- ###########################
- source("summaryRobust.R")
- #########################################
- # Deviation from empirical variance GEE #
- #########################################
- var.crit <- function(mymodel) sum(abs(mymodel[["geese"]]$vbeta.naiv - mymodel[["geese"]]$vbeta))
- ############################
- # Distances between planes #
- ############################
- distPlan <- function(mydata) {
-
- # Vector to store distances
- distPlanes <- vector()
-
- # Estimate distances between planes for each subject
- for (i in unique(mydata$id)) {
- LPvals <- mydata[mydata$id == i, 3]
- distPlanes <- c(distPlanes,
- (LPvals[1] - LPvals[2])/max(LPvals[1], LPvals[2]),
- (LPvals[1] - LPvals[3])/max(LPvals[1], LPvals[3]),
- (LPvals[2] - LPvals[3])/max(LPvals[2], LPvals[3]))
- }
-
-
- # Concatenate factor with substraction information
- numSubjects <- length(unique(mydata$id))
- subsFactor <- rep(c("XY - XZ", "XY - YZ", "YZ - XZ"), rep = numSubjects)
- distFrame <- data.frame(id = mydata$id,
- type = mydata$neurType,
- distPlanes = distPlanes,
- substraction = subsFactor)
-
- return(distFrame)
-
- }
- ##########################
- # Bootstrap coefficients #
- ##########################
- bootCoef <- function(mydata, axontype, projection, corrMat, nreps) {
- # Select data
- modData <- mydata[mydata$neurType == axontype & mydata$Plane == projection, ]
-
- # Fit the model
- modGee <- geeglm(LR ~ LP, data = modData, id = id, corstr = corrMat)
-
- # Ensure reproducibility
- # set.seed(12345)
-
- # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
- coefmat <- matrix(NA, nreps, 2)
-
- # We need the fitted values and the residuals from the model
- resids <- residuals(modGee)
- preds <- fitted(modGee)
-
- # Repeatedly generate bootstrapped responses
- for(i in 1:nreps) {
- booty <- preds + sample(resids, rep=TRUE)
- modGee2 <- update(modGee, booty ~ .)
- coefmat[i,] <- coef(modGee2)
- }
-
- # Create a dataframe to store predictors values
- colnames(coefmat) <- names(coef(modGee2))
- coefmat <- data.frame(coefmat)
-
- return(coefmat)
- }
- ##############################
- # Bootstrap coefficients gEE #
- ##############################
- # For models with interactions
- bootCoefint <- function(mydata, modForm, corrMat, nreps) {
-
- # Fit model
- modGee <- geeglm(formula(modForm), data = mydata, id = id, corstr = corrMat)
- # Ensure reproducibility
- # set.seed(12345)
-
- # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
- coefmat <- matrix(NA, nreps, length(coef(modGee)))
-
- # We need the fitted values and the residuals from the model
- resids <- residuals(modGee)
- preds <- fitted(modGee)
-
- # Repeatedly generate bootstrapped responses
- for(i in 1:nreps) {
- booty <- preds + sample(resids, rep=TRUE)
- modGee2 <- update(modGee, booty ~ .)
- coefmat[i,] <- coef(modGee2)
- print(i)
- }
-
- # Create a dataframe to store predictors values
- colnames(coefmat) <- names(coef(modGee2))
- coefmat <- data.frame(coefmat)
-
- return(coefmat)
- }
- #############################
- # Bootstrap coefficients LM #
- #############################
- # For models with interactions
- bootCoefintLM <- function(mydata, modForm, nreps) {
-
- # Fit model
- modLM <- lm(formula(modForm), data = mydata)
- # Ensure reproducibility
- set.seed(12345)
-
- # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
- coefmat <- matrix(NA, nreps, length(coef(modLM)))
-
- # We need the fitted values and the residuals from the model
- resids <- residuals(modLM)
- preds <- fitted(modLM)
-
- # Repeatedly generate bootstrapped responses
- for(i in 1:nreps) {
- booty <- preds + sample(resids, rep=TRUE)
- modLM2 <- update(modLM, booty ~ .)
- coefmat[i,] <- coef(modLM2)
- print(i)
- }
-
- # Create a dataframe to store predictors values
- colnames(coefmat) <- names(coef(modLM2))
- coefmat <- data.frame(coefmat)
-
- return(coefmat)
- }
- ```
- ##Load data
- ```{r}
- # Get file paths
- axonNames <- list.files(paste("ProyeccionData/", sep=""), full.names=T)
- # Load matlab file
- axonFiles <- lapply(axonNames, function(x) readMat(x))
- names(axonFiles) <- c("Type1", "Type2", "Type3", "Type4")
- # VAR NAMES
- # REAL_LENGTH, PROYECTION_LENGTH_XY, PROYECTION_LENGTH_XZ, PROYECTION_LENGTH_YZ
- # Extract data
- realLength <- lapply(axonFiles, function(x) x$REAL.LENGTH)
- planeXY <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XY)
- planeXZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XZ)
- planeYZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.YZ)
- # Get number of neurons per type
- numberTypes <- unlist(lapply(realLength, function(x) length(x)))
- # Store in data frames by plane
- xyData <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), LP = unlist(planeXY), Plane = rep("XY", sum(numberTypes)),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- xzData <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), LP = unlist(planeXZ), Plane = rep("XZ", sum(numberTypes)),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- yzData <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), LP = unlist(planeYZ), Plane = rep("YZ", sum(numberTypes)),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- # Merge into a single data frame and sort by id
- allData <- do.call("rbind", list(xyData, xzData, yzData))
- allData <- allData[order(allData$id),]
- # Add different number for every neuron-plane combination
- allData$NeurPlane <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]),
- rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
- allData$interact <-interaction(allData$neurType, allData$Plane, lex.order=T)
- # Create data frame w/o Type4
- dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)
- # Data in short format
- dataShort <- data.frame(id = 1:length(unlist(realLength)),
- LR = unlist(realLength), XY = unlist(planeXY), XZ = unlist(planeXZ), YZ = unlist(planeYZ),
- neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
- ```
- ##Descriptive
- ###Histograms
- ```{r, fig.width=8, fig.height=8}
- # Use dataShort for plotting
- histogram( ~ YZ + XZ + XY + LR | neurType, dataShort, type ="density")
- histogram( ~ LR | Plane, allData, type ="density")
- ```
- ####GGplot2
- ```{r, fig.width=8, fig.height=8}
- #https://www.r-graph-gallery.com/violin_grouped_ggplot2.html
- # New variables releveled for plotting reasons
- allData$NPFac <- factor(allData$NeurPlane)
- levels(allData$NPFac) <- paste(rep(levels(allData$neurType), each = 3), rep(levels(allData$Plane), 3), sep=".")
- # allData$NPFac <- factor(allData$NPFac, levels = rev(levels(allData$NPFac)))
- allData$typeFac <- factor(allData$neurType, levels = rev(levels(allData$neurType)))
- # Plot
- ggplot(allData, aes(x = `LP`, y = `NPFac`, fill = ..x..)) +
- geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
- scale_fill_viridis(name = "LP", option = "D") +
- labs(title = 'Length 2D Proyections') +
- theme_ipsum() +
- theme(
- legend.position="none",
- panel.spacing = unit(0.1, "lines"),
- strip.text.x = element_text(size = 8)
- )
- # Plot
- ggplot(allData, aes(x = `LR`, y = `neurType`, fill = ..x..)) +
- geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
- scale_fill_viridis(name = "LP", option = "D") +
- labs(title = 'Length 2D Proyections') +
- theme_ipsum() +
- theme(
- legend.position="none",
- panel.spacing = unit(0.1, "lines"),
- strip.text.x = element_text(size = 8)
- )
- # Plot
- ggplot(allData, aes(x = LP, y = NPFac, fill = neurType)) +
- geom_density_ridges() +
- theme_ridges() +
- theme(legend.position = "none")
- # All together gradient
- # dataShort$normLR <- dataShort$LR/dataShort$LR
- # dataShort$normXY <- dataShort$LR/dataShort$XY
- # dataShort$normXZ <- dataShort$LR/dataShort$XZ
- # dataShort$normYZ <- dataShort$LR/dataShort$YZ
- joinData <- melt(setDT(dataShort), id.vars = c("id","neurType"), variable.name = "length")
- joinData <- joinData[order(joinData$id), ]
- joinData$NeurPlane <-interaction(joinData$neurType, joinData$length, lex.order=T)
- png(filename=paste("gradientDistributions.png", sep=""), type="cairo",
- units="in", width=10, height=10, pointsize=12,
- res=300)
- ggplot(joinData, aes(x = `value`, y = `NeurPlane`, fill = ..x..)) +
- geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
- scale_fill_viridis(name = "LP", option = "D") +
- labs(title = 'Length 2D Proyections') +
- theme_ipsum() +
- theme(
- legend.position="none",
- panel.spacing = unit(0.1, "lines"),
- strip.text.x = element_text(size = 8)
- )
- dev.off()
- # All together gradient (LOG)
- joinData <- melt(setDT(dataShort), id.vars = c("id","neurType"), variable.name = "length")
- joinData <- joinData[order(joinData$id), ]
- joinData$NeurPlane <- interaction(joinData$neurType, joinData$length, lex.order=T)
- joinData$logValue <- log(joinData$value)
- ggplot(joinData, aes(x = `logValue`, y = `NeurPlane`, fill = ..x..)) +
- geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
- scale_fill_viridis(name = "LP", option = "D") +
- labs(title = 'Length 2D Proyections') +
- theme_bw() +
- theme(
- legend.position="none",
- panel.spacing = unit(0.1, "lines"),
- strip.text.x = element_text(size = 8)
- )
- # All together color by type
- ggplot(joinData, aes(x = `value`, y = `NeurPlane`, fill = neurType)) +
- geom_density_ridges(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none")
- ```
- ####GGPlot2 Stacked
- ```{r, fig.width=20, fig.height=6}
- # All together overlapping
- # levels(joinData$length) <- rev(levels(joinData$length))
- joinData$estim <- joinData$value*1.27
- joinData[joinData$length == "LR", 6] <- joinData[joinData$length == "LR", 4]
- png(filename=paste("gradientStacked.png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- ggplot(data=joinData, aes(x=value, fill=length)) +
- geom_density(adjust=1.5, alpha = .4) +
- theme_ipsum() +
- facet_wrap(~neurType, ncol = 4) +
- xlim(0, 2.5e05) +
- theme(
- legend.position="bottom",
- panel.spacing = unit(1, "lines"),
- axis.ticks.x=element_blank()
- )
- dev.off()
- png(filename=paste("gradientEstimation.png", sep=""), type="cairo",
- units="in", width=20, height=5, pointsize=12,
- res=300)
- ggplot(data=joinData, aes(x=estim, fill=length)) +
- geom_density(adjust=1.5, alpha = .4) +
- theme_ipsum() +
- facet_wrap(~neurType, ncol = 4) +
- xlim(0, 2.5e05) +
- theme(
- legend.position="bottom",
- panel.spacing = unit(1, "lines"),
- axis.ticks.x=element_blank()
- )
- dev.off()
- ```
- ###Boxplot
- ```{r, fig.width=20, fig.height=5}
- # Use dataShort for plotting
- lrBx <- ggplot(dataShort, aes(x=neurType, y=LR, fill = neurType)) +
- geom_violin(trim=FALSE) +
- geom_boxplot(width=0.1, fill = "white") + theme_minimal()
- xyBx <- ggplot(dataShort, aes(x=neurType, y=XY, fill = neurType)) +
- geom_violin(trim=FALSE) +
- geom_boxplot(width=0.1, fill = "white") + theme_minimal()
- xzBx <- ggplot(dataShort, aes(x=neurType, y=XZ, fill = neurType)) +
- geom_violin(trim=FALSE) +
- geom_boxplot(width=0.1, fill = "white") + theme_minimal()
- yzBx <- ggplot(dataShort, aes(x=neurType, y=YZ, fill = neurType)) +
- geom_violin(trim=FALSE) +
- geom_boxplot(width=0.1, fill = "white") + theme_minimal()
- ggarrange(lrBx, xyBx, xzBx, yzBx, ncol = 4, nrow = 1)
- ```
- ###Correlations
- ####LR vs LP by Neuron
- ```{r, fig.width=20}
- par(mfrow = c(1,4))
- plot(LR ~ LP, data = allData[allData$neurType == "Type1", ], pch = 21, bg = "coral", main = "Type1", xlim = c(0,250000), ylim = c(0, 300000))
- abline(h=1000, v=1000, col = "gray", lty = "dashed")
- plot(LR ~ LP, data = allData[allData$neurType == "Type2", ], pch = 21, bg = "lightblue", main = "Type2", xlim = c(0,250000), ylim = c(0, 300000))
- abline(h=1000, v=1000, col = "gray", lty = "dashed")
- plot(LR ~ LP, data = allData[allData$neurType == "Type3", ], pch = 21, bg = "lightgreen", main = "Type3", xlim = c(0,250000), ylim = c(0, 300000))
- abline(h=1000, v=1000, col = "gray", lty = "dashed")
- plot(LR ~ LP, data = allData[allData$neurType == "Type4", ], pch = 21, bg = "violet", main = "Type4", xlim = c(0,250000), ylim = c(0, 300000))
- abline(h=1000, v=1000, col = "gray", lty = "dashed")
- ```
- ####LR vs LP by Plane
- ```{r, fig.width=15}
- par(mfrow = c(1,3))
- plot(LR ~ LP, data = allData[allData$Plane == "XY", ], pch = 21, bg = "red", main = "XY", xlim = c(0,250000), ylim = c(0, 300000))
- abline(h=1000, v=1000, col = "gray", lty = "dashed")
- plot(LR ~ LP, data = allData[allData$Plane == "XZ", ], pch = 21, bg = "blue", main = "XZ", xlim = c(0,250000), ylim = c(0, 300000))
- abline(h=1000, v=1000, col = "gray", lty = "dashed")
- plot(LR ~ LP, data = allData[allData$Plane == "YZ", ], pch = 21, bg = "green", main = "YZ", xlim = c(0,250000), ylim = c(0, 300000))
- abline(h=1000, v=1000, col = "gray", lty = "dashed")
- ```
- ####LR vs LP by Neuron*Plane
- ```{r}
- # Duplicate data to avoid confusion
- data3D <- allData
- # Add a number identifyng every neuron-plane combination
- data3D$plaNum <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]), rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
- # Plot
- car::scatter3d(x = data3D$LP, y = data3D$LR, z = data3D$plaNum, group = data3D$neurType,
- surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
- # Plot
- dataXY.1v4 <- data3D[data3D$Plane == "XY" & (data3D$neurType == "Type1" | data3D$neurType == "Type4"), ]
- car::scatter3d(x = dataXY.1v4$LP, y = dataXY.1v4$LR, z = dataXY.1v4$plaNum, group = dataXY.1v4$neurType,
- surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
- ```
- ####Planes distance
- ```{r, fig.width=20, fig.height=5}
- distWithin <- distPlan(allData)
- distWithin$Combin <- interaction(distWithin$type, distWithin$substraction, lex.order = TRUE)
- boxplot(distPlanes ~ substraction + type, distWithin)
- # Boxplot
- png(filename=paste("boxDistance.png", sep=""), type="cairo",
- units="in", width=5, height=5, pointsize=12,
- res=300)
- bx1 <- distWithin %>%
- ggplot( aes(x=Combin, y=distPlanes, fill=type)) +
- geom_boxplot() +
- scale_fill_viridis(discrete = TRUE, alpha=0.6) +
- theme_ipsum() +
- theme(
- legend.position="none",
- plot.title = element_text(size=11),
- axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
- ) +
- ggtitle("Distances plane boxplot") +
- xlab("")
- bx1
- dev.off()
- # Boxlot with jitter
- bx2 <- distWithin %>%
- ggplot( aes(x=Combin, y=distPlanes, fill=type)) +
- geom_boxplot() +
- scale_fill_viridis(discrete = TRUE, alpha=0.6) +
- geom_jitter(color = "black", size=0.1, alpha=0.9) +
- scale_color_viridis(discrete = TRUE, alpha=0.6) +
- theme_ipsum() +
- theme(
- legend.position="none",
- plot.title = element_text(size=11),
- axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
- ) +
- ggtitle("A boxplot with jitter") +
- xlab("")
- # Vertical violin
- bx3 <- distWithin %>%
- ggplot(aes(fill=substraction, y=distPlanes, x=type)) +
- geom_violin(position="dodge", alpha=0.5, outlier.colour="transparent") +
- scale_fill_viridis(discrete=T, name="") +
- theme_ipsum() +
- xlab("") +
- ylab("Distance Within Types")
- # Horizontal violin
- bx4 <- distWithin %>%
- ggplot( aes(x=Combin, y=distPlanes, fill=type)) +
- geom_violin(width=2, size=0.1) +
- scale_fill_viridis(discrete=TRUE) +
- scale_color_viridis(discrete=TRUE) +
- theme_ipsum() +
- theme(
- legend.position="none"
- ) +
- coord_flip() + # This switch X and Y axis and allows to get the horizontal version
- xlab("") +
- ylab("Distance Within Types")
- ggarrange(bx1, bx2, bx3, bx4, ncol = 4, nrow = 1)
- ```
- #####Distances ggplot
- ```{r, fig.width=15, fig.height=5}
- rest1 <- ggplot(distWithin[distWithin$substraction == "XY - XZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) +
- geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) +
- scale_fill_viridis(name = "LP", option = "D") +
- labs(title = 'XY - XZ') +
- theme_ipsum() +
- theme(
- legend.position="none",
- panel.spacing = unit(0.1, "lines"),
- strip.text.x = element_text(size = 8)
- )
- rest2 <- ggplot(distWithin[distWithin$substraction == "XY - YZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) +
- geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) +
- scale_fill_viridis(name = "LP", option = "D") +
- labs(title = 'XY - YZ') +
- theme_ipsum() +
- theme(
- legend.position="none",
- panel.spacing = unit(0.1, "lines"),
- strip.text.x = element_text(size = 8)
- )
- rest3 <- ggplot(distWithin[distWithin$substraction == "YZ - XZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) +
- geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) +
- scale_fill_viridis(name = "LP", option = "D") +
- labs(title = 'YZ - XZ') +
- theme_ipsum() +
- theme(
- legend.position="none",
- panel.spacing = unit(0.1, "lines"),
- strip.text.x = element_text(size = 8)
- )
- ggarrange(rest1, rest2, rest3, ncol = 3, nrow = 1)
- car::scatter3d(x = distWithin[distWithin$substraction == "XY - XZ", 3],
- y = distWithin[distWithin$substraction == "XY - YZ", 3],
- z = distWithin[distWithin$substraction == "YZ - XZ", 3],
- group = factor(rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)),
- surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
- plot(x = distWithin[distWithin$substraction == "XY - XZ", 3],
- y = distWithin[distWithin$substraction == "XY - YZ", 3],
- pch = 21,
- bg = factor(rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)))
- ```
- ##Empirical error
- ```{r, fig.width=15}
- # https://stackoverflow.com/questions/12394321/r-what-algorithm-does-geom-density-use-and-how-to-extract-points-equation-of
- # https://stackoverflow.com/questions/24985361/understanding-bandwidth-smoothing-in-ggplot2
- dataEmpirical <- allData
- dataEmpirical$alpha <- dataEmpirical$LR/dataEmpirical$LP
- #######################################
- # Estimate alphas and use grand alpha #
- #######################################
- # Grand alpha
- dataEmpirical$error <- 100*(1 - mean(dataEmpirical$alpha)/dataEmpirical$alpha)
- # Plot
- ga <- ggplot(dataEmpirical, aes(x = `error`, y = `interact`, fill = neurType)) +
- geom_density_ridges(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none") +
- ggtitle("Grand Alpha")
- #######################################
- # Estimate alphas and use local alpha #
- #######################################
- # Find mean alpha per group
- meanAlpha <- aggregate(alpha ~ neurType + Plane, dataEmpirical, mean)
- dataEmpirical$localErr <- dataEmpirical$error
- # Divide by max LR per axon type and plane
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
- for (j in c("XY", "XZ", "YZ")) {
- # Subset and divide alpha
- alphaVec <- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 8]
- localAlpha <- meanAlpha[meanAlpha$neurType == i & meanAlpha$Plane == j, 3]
- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 10] <- 100*(1 - localAlpha/alphaVec)
- }
- }
- # Plot
- la <-ggplot(dataEmpirical, aes(x = `localErr`, y = `interact`, fill = neurType)) +
- geom_density_ridges(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none") +
- ggtitle("Local Alpha")
- ###############
- # Fixed alpha #
- ###############
- dataEmpirical$fixedError <- 100*(1 - 1.268/dataEmpirical$alpha)
- fa <- ggplot(dataEmpirical, aes(x = `fixedError`, y = `interact`, fill = neurType)) +
- geom_density_ridges(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none") +
- ggtitle("Fixed Alpha")
- ##############
- # Joint plot #
- ##############
- png(filename=paste("alphasComparison_LRbyLP.png", sep=""), type="cairo",
- units="in", width=15, height=6, pointsize=12,
- res=300)
- ggarrange(ga, la, fa, ncol=3, nrow=1)
- dev.off()
- ```
- ###Equal Bandwidth
- ```{r, fig.width=15}
- # https://stackoverflow.com/questions/12394321/r-what-algorithm-does-geom-density-use-and-how-to-extract-points-equation-of
- # https://stackoverflow.com/questions/24985361/understanding-bandwidth-smoothing-in-ggplot2
- dataEmpirical <- allData
- dataEmpirical$alpha <- dataEmpirical$LR/dataEmpirical$LP
- bw <- 1
- #######################################
- # Estimate alphas and use grand alpha #
- #######################################
- # Grand alpha
- dataEmpirical$error <- 100*(1 - mean(dataEmpirical$alpha)/dataEmpirical$alpha)
- # Plot
- ga <- ggplot(dataEmpirical, aes(x = `error`, y = `interact`, fill = neurType)) +
- geom_density_ridges(alpha = 0.8, bandwidth = bw) +
- theme_ridges() +
- theme(legend.position = "none") +
- ggtitle("Grand Alpha")
- #######################################
- # Estimate alphas and use local alpha #
- #######################################
- # Find mean alpha per group
- meanAlpha <- aggregate(alpha ~ neurType + Plane, dataEmpirical, mean)
- dataEmpirical$localErr <- dataEmpirical$error
- # Divide by max LR per axon type and plane
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
- for (j in c("XY", "XZ", "YZ")) {
- # Subset and divide alpha
- alphaVec <- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 8]
- localAlpha <- meanAlpha[meanAlpha$neurType == i & meanAlpha$Plane == j, 3]
- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 10] <- 100*(1 - localAlpha/alphaVec)
- }
- }
- # Plot
- la <-ggplot(dataEmpirical, aes(x = `localErr`, y = `interact`, fill = neurType)) +
- geom_density_ridges(alpha = 0.8, bandwidth = bw) +
- theme_ridges() +
- theme(legend.position = "none") +
- ggtitle("Local Alpha")
- ###############
- # Fixed alpha #
- ###############
- dataEmpirical$fixedError <- 100*(1 - 1.268/dataEmpirical$alpha)
- fa <- ggplot(dataEmpirical, aes(x = `fixedError`, y = `interact`, fill = neurType)) +
- geom_density_ridges(alpha = 0.8, bandwidth = bw) +
- theme_ridges() +
- theme(legend.position = "none") +
- ggtitle("Fixed Alpha")
- ##############
- # Joint plot #
- ##############
- png(filename=paste("alphasComparison_LRbyLP_bw", bw, ".png", sep=""), type="cairo",
- units="in", width=15, height=6, pointsize=12,
- res=300)
- ggarrange(ga, la, fa, ncol=3, nrow=1)
- dev.off()
- ```
- ###Histograms
- ```{r, fig.width=8, fig.height=8}
- # Use dataShort for plotting
- histogram( ~ error | Plane + neurType, dataEmpirical, nint = 200, type ="density")
- histogram( ~ localErr | Plane + neurType, dataEmpirical, nint = 200, type ="density")
- histogram( ~ fixedError | Plane + neurType, dataEmpirical, nint = 200, type ="density")
- ```
- ###Error vs LP
- ```{r}
- ## Create cuts:
- x_c <- cut(dataEmpirical$LP, 50)
- y_c <- cut(dataEmpirical$fixedError, 50)
- ## Calculate joint counts at cut levels:
- z <- table(x_c, y_c)
- ## Plot as a 3D histogram:
- plot3D::hist3D(dataEmpirical$LP, dataEmpirical$fixedError, border="black")
- ## Plot as a 2D heatmap:
- plot3D::image2D(z=z, border="black")
- gplots::hist2d(dataEmpirical[, c(3,11)], nbins=50)
- ```
- ##OLS
- ```{r, fig.width=15, fig.height=10}
- ##############
- # Fit models #
- ##############
- # mod3 <- lm(LR ~ LP*neurType*Plane, data = allData[allData$neurType != "Type4", ])
- mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData[allData$neurType != "Type4", ])
- mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData)
- mod2 <- lm(LR ~ LP*neurType + LP*Plane + neurType*Plane, data = allData)
- mod1 <- lm(LR ~ LP + neurType + Plane, data = allData)
- # Summmaries and anovas
- anova(mod3, mod2) # 3rd level interaction seems significant
- summary(mod3)
- car::Anova(mod3)
- # RLS2 robust variance
- summaryRobust(mod3, robust = TRUE)
- # Do some plotting to check lines
- interact_plot(model = mod3, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
- mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
- interact_plot(model = mod3, pred = LP, modx = neurType, mod2 = Plane, plot.points = TRUE, interval = T,
- modx.labels = c("Type1", "Type2", "Type3", "Type4"), mod2.labels = c("XY", "XZ", "YZ"))
- # Pruebas
- mod3.XY <- lm(LR ~ LP*neurType - 1, data = allData[allData$Plane == "XY" & allData$neurType != "Type4", ])
- car::Anova(mod3.XY)
- summary(mod3.XY)
- #######
- # NAs #
- #######
- # Test that NAs happen, when no intercept or main effects are present, for the dummy factor level that is not used as reference because one level is a linear combination of the others
- mod.Intercept <- lm(LR ~ LP + LP:neurType + LP:Plane + neurType:Plane + LP:neurType:Plane, data = allData)
- modNA.neur4 <- lm(LR ~ LP + LP:neurType + LP:Plane + neurType:Plane + LP:neurType:Plane - 1, data = allData)
- modNA.planYZ <- lm(LR ~ LP + LP:Plane + LP:neurType + neurType:Plane + LP:Plane:neurType - 1, data = allData)
- summary(modNA.neur4)
- # Do some plotting to check lines
- interact_plot(model = mod.Intercept, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
- mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
- interact_plot(model = modNA.neur4, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
- mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
- ```
- ###Diagnosis
- ```{r, fig.width=15, fig.height=10}
- # Diagnosis
- par(mfrow = c(2, 3))
- plot(mod3, 1:5)
- abline(h=c(-3,3), col="gray", lty="dashed")
- ############################
- # Discard by Cook distance #
- ############################
- # Augment model with diagnosis
- mod3.diag.metrics <- broom::augment(mod3)
- # Find observations that are highly influential
- cookDist <- mod3.diag.metrics[mod3.diag.metrics$.cooksd > 0.02, ]
- # Find rows and IDs of those observations to eliminate its participation in all planes
- remRows <- sapply(cookDist$.rownames, function(x) which(rownames(allData) == x))
- remIDs <- sapply(cookDist$.rownames, function(x) allData[which(rownames(allData) == x), 1])
- remRowsIDs <- as.vector(sapply(unique(remIDs), function(x) which(allData$id == x)))
- # Discard highly influential observations
- allData2 <- allData[-remRowsIDs, ]
- # Refit
- mod3.2 <- lm(LR ~ LP + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData2)
- summary(mod3.2)
- # Diagnosis
- par(mfrow = c(2, 3))
- plot(mod3.2, 1:5)
- abline(h=c(-3,3), col="gray", lty="dashed")
- ############################
- # Discard by Strd Residual #
- ############################
- # Augment model with diagnosis
- mod3.diag.metrics <- broom::augment(mod3)
- # Find observations that are highly influential
- stdResid <- mod3.diag.metrics[mod3.diag.metrics$.std.resid > 3, ]
- # Find rows and IDs of those observations to eliminate its participation in all planes
- remRows <- sapply(stdResid$.rownames, function(x) which(rownames(allData) == x))
- remIDs <- sapply(stdResid$.rownames, function(x) allData[which(rownames(allData) == x), 1])
- remRowsIDs <- as.vector(sapply(unique(remIDs), function(x) which(allData$id == x)))
- # Discard highly influential observations
- allData3 <- allData[-remRowsIDs, ]
- # Refit
- mod3.3 <- lm(LR ~ LP*neurType*Plane, data = allData3)
- summary(mod3.3)
- # Diagnosis
- par(mfrow = c(2, 3))
- plot(mod3.3, 1:5)
- abline(h=c(-3,3), col="gray", lty="dashed")
- ####################
- # Discard outliers #
- ####################
- # Use multivariate Mahalanobis distance and add to allData
- mahalDist <- by(allData, allData$neurType, function(x) mahalanobis(x[,2:3], colMeans(x[,2:3]), cov(x[,2:3])))
- allData$mahalDist <- unlist(mahalDist)
- # Sort from higher from lower Mahalanobis distance per neuron type
- mahalSort <- by(allData, allData$neurType, function(x) x[order(x$mahalDist, decreasing = TRUE),])
- # Store as data frame
- mahalSort <- do.call("rbind", mahalSort)
- # Discard observations with Mahalanobis distance >= 30
- allData4 <- mahalSort[mahalSort$mahalDist < 20, ]
- # Refit
- mod3.4 <- lm(LR ~ LP*neurType*Plane, data = allData4)
- summary(mod3.4)
- # Diagnosis
- par(mfrow = c(2, 3))
- plot(mod3.4, 1:5)
- abline(h=c(-3,3), col="gray", lty="dashed")
- # 3D PLOT Add a number identifyng every neuron-plane combination
- data3D$mahalDist <- unlist(mahalDist)
- data3Dmahal <- data3D[data3D$mahalDist < 20, ]
- rgl::open3d()
- car::scatter3d(x = data3Dmahal$LP, y = data3Dmahal$LR, z = data3Dmahal$plaNum, group = data3Dmahal$neurType,
- surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
- # Do some plotting to check lines
- interact_plot(model = mod3.4, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
- mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
- ###########################
- # Discard outliers by iqr #
- ###########################
- # https://easystats.github.io/performance/reference/check_outliers.html
- iqr <- by(allData[,2:3], attributes(check_outliers(mod3, method = "mahalanobis")))
- mod3.5 <- update(mod3, .~., data = allData[-as.numeric(which(iqr$data$Outlier == TRUE)), ])
- summary(mod3.5)
- # Diagnosis
- par(mfrow = c(2, 3))
- plot(mod3.5, 1:5)
- abline(h=c(-3,3), col="gray", lty="dashed")
- # 3D PLOT Add a number identifyng every neuron-plane combination
- data3Diqr <- data3D[-as.numeric(which(iqr$data$Outlier == TRUE)), ]
- rgl::open3d()
- car::scatter3d(x = data3Diqr$LP, y = data3Diqr$LR, z = data3Diqr$plaNum, group = data3Diqr$neurType,
- surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
- # Do some plotting to check lines
- interact_plot(model = mod3.5, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
- mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
- ```
- ###One hot coding
- ```{r, fig.width=8, fig.height=8}
- onehotData <- one_hot(as.data.table(allData))
-
- # Fit models
- mod1 <- lm(LR ~ LP + neurType_Type2 + neurType_Type3 + neurType_Type4 + Plane_XZ + Plane_YZ + LP:neurType_Type2 + LP:neurType_Type3, data = onehotData)
- # Summmaries and anovas
- summary(mod1)
- # Diagnosis
- par(mfrow = c(2, 2))
- plot(mod1)
- ```
- ###Partial regression
- ```{r}
- # http://www.colbyimaging.com/wiki/statistics/multiple-regression
- lm.lr <- lm(LR ~ neurType*Plane, data = allData)
- lm.lp <- lm(LP ~ neurType*Plane, data = allData)
- lm.Res <- lm(lm.lr$res ~ lm.lp$res:as.factor(allData$NeurPlane) + 0)
- summary(lm.Res)
- ```
- ###Fixed/variable intercept/slopes
- ```{r}
- ############################
- # Allow variable intercept #
- ############################
- modIncp.neur <- lm(LR ~ LP + neurType, data = allData)
- modIncp.plane <- lm(LR ~ LP + Plane, data = allData)
- modIncp <- lm(LR ~ LP + neurType + Plane, data = allData)
- summary(modIncp.neur)
- summary(modIncp.plane)
- summary(modIncp)
- aggregate(LR ~ neurType, allData, mean)
- aggregate(LP ~ neurType, allData, mean)
- aggregate(LR ~ Plane, allData, mean)
- ############################
- # Allow variable slope #
- ############################
- modSlope.neur <- lm(LR ~ LP + LP:neurType, data = allData)
- modSlope.plane <- lm(LR ~ LP + LP:Plane, data = allData)
- modSlope <- lm(LR ~ LP + LP:neurType + LP:Plane, data = allData)
- summary(modSlope.neur)
- summary(modSlope.plane)
- summary(modSlope)
- ```
- ###Contr sum
- ```{r}
- dataSum <- allData
- contr.sum(5)
- contrasts(dataSum$neurType) <- contr.sum(4)
- contrasts(dataSum$Plane) <- contr.sum(3)
- mod.Sum <- lm(LR ~ LP + LP:neurType + LP:Plane + LP:neurType:Plane, data = dataSum)
- summary(mod.Sum)
- ```
- ```{r}
- nT <- allData$neurType
- p <- allData$Plane
- l <- allData$LP
- lr <- allData$LR
- X0 <- model.matrix(~ nT + p,
- contrasts.arg = list(nT = contr.treatment(n = 4, contrasts = FALSE),
- p = contr.treatment(n = 3, contrasts = FALSE)))
- f <- sample(gl(3, 4, labels = letters[1:3]))
- unname( rowSums(X0[, c("nT1", "nT2", "nT3", "nT4")]) )
- unname( rowSums(X0[, c("p1", "p2", "p3")]) )
- qr(X0)$rank
- summary(lm(lr ~ l + X0 - 1))
- ```
- ##RLS1
- ```{r, fig.width=15, fig.height=10}
- # Weight observations by residual size
- # https://stats.idre.ucla.edu/r/dae/robust-regression/
- # mod3 <- lm(LR ~ LP*neurType*Plane, data = allData[allData$neurType != "Type4", ])
- # mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData[allData$neurType != "Type4", ])
- mod3R <- rlm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData)
- mod2R <- rlm(LR ~ LP*neurType + LP*Plane + neurType*Plane - 1, data = allData)
- mod1R <- rlm(LR ~ LP + neurType + Plane - 1, data = allData)
- # Summmaries and anovas
- (sumMod <- summary(mod3R))
- # Get which coefficients are significative, although it may not be correct to use t distribution for H0 in RLS
- # https://stat.ethz.ch/pipermail/r-help/2006-July/108659.html
- attributes(sumMod$coefficients)$dimnames[[1]][which(rob.pvals(mod3R) < 0.05)]
- car::Anova(mod3R)
- # Do some plotting to check lines
- interact_plot(model = mod3R, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
- mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
- interact_plot(model = mod3R, pred = LP, modx = neurType, mod2 = Plane, plot.points = TRUE, interval = T,
- modx.labels = c("Type1", "Type2", "Type3", "Type4"), mod2.labels = c("XY", "XZ", "YZ"))
- ```
- ##RLS2
- ```{r}
- # Robust variance and clustering
- mod3 <- estimatr::lm_robust(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData, clusters = id)
- summary(mod3)
- ```
- ###Diagnosis
- ```{r}
- # Diagnosis
- par(mfrow = c(2, 3))
- plot(mod3R, 1:5)
- abline(h=c(-3,3), col="gray", lty="dashed")
- ```
- ##GEE
- ```{r, fig.width=15, fig.height=10}
- # Fit models
- strucor <- "independence"
- strucor <- "exchangeable"
- dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)
- mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataNo4, id = id, corstr = strucor)
- # mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData, id = id, corstr = strucor)
- mod2Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane - 1, data = dataNo4, id = id)
- mod1Gee <- geeglm(LR ~ LP + neurType + Plane - 1, data = dataNo4, id = id)
- # Summmaries and anovas
- coef(mod3Gee)
- anova(mod3Gee, mod2Gee, test = "F")
- summary(mod3Gee)
- anova(mod3Gee, test = "Chi")
- # Varriable selection
- gee_stepper(mod1Gee, formula(mod3Gee)) # also needs strucor
- ```
- ###Change ref level
- ```{r}
- # Change reference level
- dataNo4$neurType <- relevel(dataNo4$neurType, ref="Type2")
- dataNo4$Plane <- relevel(dataNo4$Plane, ref="YZ")
- mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataNo4, id = id, corstr = strucor)
- summary(mod3Gee)
- ```
- ###Check correlation matrix
- We can also use the naive and robust variance estimates to select a correlation model. The model whose robust variance estimates most closely resembles its naive variance estimates is the better correlation model. To obtain a single summary statistic for this comparison I use the entire parameter covariance matrix and sum the absolute differences between the naive and robust covariance matrices.
- Link: https://sakai.unc.edu/access/content/group/2842013b-58f5-4453-aa8d-3e01bacbfc3d/public/Ecol562_Spring2012/docs/lectures/lecture23.htm#checking
- ```{r}
- # Select data
- datToMod <- allData
- # Check empirical correlation matrix
- mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod)
- xyRes <- mod3$residuals[datToMod$Plane == "XY"]
- xzRes <- mod3$residuals[datToMod$Plane == "XZ"]
- yzRes <- mod3$residuals[datToMod$Plane == "YZ"]
- Hmisc::rcorr(cbind(xyRes, xzRes, yzRes))
- # Fit models with differente corr mat
- mod3Gee.In <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod,
- id = id, corstr = "independence")
- mod3Gee.Ex <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod,
- id = id, corstr = "exchangeable")
- mod3Gee.Ar <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod,
- id = id, corstr = "ar1")
- mod3Gee.Un <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod,
- id = id, corstr = "unstructured")
- # Deviation from naive variance
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
- # QIC criteria
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
- # Check multiple comparisons
- K1 <- glht(mod3Gee.Ex, mcp(neurType = "Tukey"))$linfct
- K2 <- glht(mod3Gee.Ex, mcp(Plane = "Tukey"))$linfct
- summary(glht(mod3Gee.Ex, linfct = rbind(K1, K2)))
- mod3Gee.check <- geeglm(LR ~ LP + interact + LP:interact, data = datToMod,
- id = id, corstr = "exchangeable")
- summary(glht(mod3Gee.check, linfct = mcp(interact = "Tukey")))
- # Calculate means
- meanCalc <- aggregate(LR ~ neurType + Plane, data = allData, mean)
- summary(geeglm(LR ~ LP + neurType + Plane, data = datToMod,
- id = id, corstr = "exchangeable"))
- ```
- ###log transformation
- ```{r}
- # Select data
- datToMod <- dataNo4
- datToMod$logLP <- log(datToMod$LP)
- datToMod$logLR <- log(datToMod$LR)
- # Fit models with differente corr mat
- mod3Gee.In <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1,
- data = datToMod, id = id, corstr = "independence")
- mod3Gee.Ex <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1,
- data = datToMod, id = id, corstr = "exchangeable")
- mod3Gee.Ar <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1,
- data = datToMod, id = id, corstr = "ar1")
- mod3Gee.Un <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1,
- data = datToMod, id = id, corstr = "unstructured")
- # Deviation from naive variance
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
- # QIC criteria
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
- # Check the best option
- summary(mod3Gee.Ex)
- anova(mod3Gee.Ex)
- ```
- ###Standarize
- ```{r}
- # List order is: Type1XY, Type2XY, Type3XY, Type1XZ...
- LPstand <- by(dataNo4$LP, list(dataNo4$neurType, dataNo4$Plane), scale)
- LRstand <- by(dataNo4$LR, list(dataNo4$neurType, dataNo4$Plane), scale)
- # Number of observations
- numObs <- unlist(lapply(LPstand[1:3], function(x) length(x)))
- # Create data frame
- dataStand <- data.frame(id = rep(1:sum(numObs), 3),
- LR = unlist(LRstand),
- LP = unlist(LPstand),
- Plane = rep(c("XY", "XZ", "YZ"), each = sum(numObs)),
- neurType = rep(rep(c("Type1", "Type2", "Type3"), times = numObs), 3))
- dataStand <- dataStand[order(dataStand$id), ]
- dataStand$interact <- interaction(dataStand$neurType, dataStand$Plane, lex.order=T)
- # Plot
- ggplot(dataStand, aes(x = `LP`, y = `interact`, fill = neurType)) +
- geom_density_ridges(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none")
- # Fit Gee
- # dataStand$neurType <- relevel(dataNo4$neurType, ref="Type1")
- # dataStand$Plane <- relevel(dataNo4$Plane, ref="XY")
- standGee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataStand, id = id, corstr = "exchangeable")
- standMain <- geeglm(LR ~ LP + neurType + Plane - 1, data = dataStand, id = id, corstr = "exchangeable")
- summary(standGee)
- anova(standGee)
- anova(standGee, standMain)
- ```
- ####Bootstrap
- ```{r, fig.width=10, fig.height=5}
- # Bootstrap for inference
- # Ensure reproducibility
- # set.seed(12345)
- # bootInter <- bootCoefint(dataStand, fullMod = T, "exchangeable", 500)
- # saveRDS(bootInter, "bootCoefsInteraction.rds")
- # Store as data frame
- options(digits=10)
- bootFrame <- data.frame(Type1XY = bootInter$LP,
- Type1XZ = bootInter$LP + bootInter$LP.PlaneXZ,
- Type1YZ = bootInter$LP + bootInter$LP.PlaneYZ,
- Type2XY = bootInter$LP + bootInter$LP.neurTypeType2,
- Type2XZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneXZ,
- Type2YZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneYZ,
- Type3XY = bootInter$LP + bootInter$LP.neurTypeType3,
- Type3XZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneXZ,
- Type3YZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneYZ)
- bootMelt <- reshape2::melt(bootFrame)
- bootMelt$neurType <- rep(c("Type1", "Type2", "Type3"), each = 1500)
- bootMelt$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = 500), 3)
- #Represent LP
- ggNeur <- ggplot(data=bootMelt, aes(x=value, fill=Plane)) +
- geom_density(adjust=1.5, alpha = .4) +
- theme_bw() +
- facet_wrap(~neurType, ncol = 3) +
- theme(
- legend.position="bottom",
- panel.spacing = unit(1, "lines"),
- axis.ticks.x=element_blank()
- )
- ggPlanes <- ggplot(data=bootMelt, aes(x=value, fill=neurType)) +
- geom_density(adjust=1.5, alpha = .4) +
- theme_bw() +
- facet_wrap(~Plane, ncol = 3) +
- theme(
- legend.position="bottom",
- panel.spacing = unit(1, "lines"),
- axis.ticks.x=element_blank()
- )
- ggarrange(ggNeur, ggPlanes, ncol = 2, nrow = 1)
- ```
- #####CI95 plot
- ```{r}
- # Extract the quantiles we need
- quant <- apply(bootFrame, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7))
- quantFrame <- reshape2::melt(quant)
- quantFrame$y <- rep(1:9, each=3)
- # Plot CI95
- plot(0, type="n", xlim=c(0.9,1.1), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
- ytick <- seq(1, 9, by=1)
- axis(side=2, at=ytick, labels = FALSE)
- text(par("usr")[1], ytick,
- labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)
- for (i in seq(1, 27,3)) {
- segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
- points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
- }
- abline(v=c(1.268), lty = "dashed", col = "gray")
- text(x=1.2765, y=10.1, "1.268",col="gray", font=2, cex=0.6)
- ```
- ###Divide max LR
- ```{r}
- # Find max LR
- maxLR <- aggregate(LR ~ neurType + Plane, dataNo4, max)
- dataMax <- dataNo4
- # Divide by max LR per axon type and plane
- for (i in c("Type1", "Type2", "Type3")) {
- for (j in c("XY", "XZ", "YZ")) {
- # Subset and divide LR
- lr <- dataMax[dataMax$neurType == i & dataMax$Plane == j, 2]
- dataMax[dataMax$neurType == i & dataMax$Plane == j, 2] <- lr/maxLR[maxLR$neurType == i & maxLR$Plane == j, 3]
-
- # Subset and divide LP
- lp <- dataMax[dataMax$neurType == i & dataMax$Plane == j, 3]
- dataMax[dataMax$neurType == i & dataMax$Plane == j, 3] <- lp/maxLR[maxLR$neurType == i & maxLR$Plane == j, 3]
- }
- }
- # Plot
- ggplot(dataMax, aes(x = `LP`, y = `interact`, fill = neurType)) +
- geom_density_ridges(alpha = 0.8) +
- theme_ridges() +
- theme(legend.position = "none")
- # Fit GEE
- maxGee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataMax, id = id, corstr = "exchangeable")
- summary(maxGee)
- anova(maxGee)
- ```
- ###neurType*Plane included
- ```{r}
- # Select data
- datToMod <- dataNo4
- contrasts(datToMod$neurType) <- contr.treatment
- contrasts(datToMod$Plane) <- contr.treatment
- # Fit models with differente corr mat
- mod3Gee.In <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "independence")
- mod3Gee.Ex <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "exchangeable")
- mod3Gee.Ar <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "ar1")
- mod3Gee.Un <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "unstructured")
- # Deviation from naive variance
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
- # QIC criteria
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
- # Exchangeable seems the best option
- mod3Gee.Ex.Int <- geeglm(LR ~ LP*interact, data = datToMod, id = id, corstr = "exchangeable")
- # Multiple comparisons FWER
- comparMult <- glht(mod3Gee.Ex.Int, linfct = mcp(interact = "Tukey"))
- summary(comparMult)
- # Multiple comparisons FDR
- summary(comparMult, adjust = "bonferroni")
- # Variable selection
- mod1Gee <- geeglm(LR ~ 1, data = datToMod, id = id, corstr = "exchangeable")
- gee_stepper(mod1Gee, formula(mod3Gee.Ex))
- summary(mod3Gee.Ex)
- anova(mod3Gee.Ex)
- ```
- ####log transformation
- ```{r}
- # Select data
- datToMod <- dataNo4
- datToMod$interact <- factor(datToMod$interact)
- datToMod$logLP <- log(datToMod$LP)
- datToMod$logLR <- log(datToMod$LR)
- contrasts(datToMod$neurType) <- contr.treatment
- contrasts(datToMod$Plane) <- contr.treatment
- # Fit models with differente corr mat
- mod3Gee.In <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "independence")
- mod3Gee.Ex <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "exchangeable")
- mod3Gee.Ar <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "ar1")
- mod3Gee.Un <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "unstructured")
- # Deviation from naive variance
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
- # QIC criteria
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
- # Check the best option
- summary(mod3Gee.Ex)
- ```
- ####1/(y^2)
- ```{r}
- # Select data
- datToMod <- allData
- datToMod$interact <- factor(datToMod$interact)
- datToMod$wei <- 1/((datToMod$LR)^2)
- contrasts(datToMod$neurType) <- contr.treatment
- contrasts(datToMod$Plane) <- contr.treatment
- # Fit models with differente corr mat
- mod3Gee.In <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "independence")
- mod3Gee.Ex <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "exchangeable")
- mod3Gee.Ar <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "ar1")
- mod3Gee.Un <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "unstructured")
- # Deviation from naive variance
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
- # QIC criteria
- sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
- # Check the best option
- summary(mod3Gee.Un)
- ```
- ##Bootstrapped coefficients
- ```{r}
- # Store bootstrapped coefficients in an empty list
- listBoot <- list()
- count <- 1
- reps <- 4000
- # Iterate through all axon types and planes
- # Ensure reproducibility
- # set.seed(12345)
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
- for (j in c("XY", "XZ", "YZ")) {
- listBoot[[count]] <- bootCoef(allData, i, j, "exchangeable", reps)
- count <- count + 1
- }
- }
- # saveRDS(listBoot, "bootCoefs400reps.rds")
- # Store as data frame
- bootFrame <- do.call(rbind, listBoot)
- bootFrame$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), each = reps*3)
- bootFrame$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = reps), 4)
- bootFrame$NeurPlane <- interaction(bootFrame$neurType, bootFrame$Plane, lex.order=T)
- # Extract the quantiles we need
- listQuant <- list()
- count <- 1
- quantToEst <- c(0.025,0.05,0.95,0.975)
- # Iterate through all axon types and planes
- for (i in c("Type1", "Type2", "Type3", "Type4")) {
- for (j in c("XY", "XZ", "YZ")) {
- listQuant[[count]] <- quantile(bootFrame[bootFrame$neurType == i & bootFrame$Plane == j, 2], c(0.025,0.05,0.95,0.975))
- count <- count + 1
- }
- }
- # Store quantiles in data frame
- quantMat <- as.data.frame(do.call(rbind, listQuant))
- quantMat$NeurPlane <- factor(paste(rep(c("Type1", "Type2", "Type3", "Type4"), each = 3), rep(c("XY", "XZ", "YZ"), each = 4), sep="."))
- quantFrame <- reshape2::melt(quantMat)
- ```
- ```{r}
- #Represent the CI
- ggplot(data=bootFrame, aes(x=LP, fill=Plane)) +
- geom_density(adjust=1.5, alpha = .4) +
- theme_bw() +
- facet_wrap(~neurType, ncol = 4) +
- theme(
- legend.position="bottom",
- panel.spacing = unit(1, "lines"),
- axis.ticks.x=element_blank()
- )
- ```
- ###Simulate experimental noise
- ####Check intra-cluster variance
- ```{r}
- intraVar <- aggregate(LP ~ id, data = allData, function(x) sd(x)/mean(x))
- intraVar$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)
- png(filename=paste("intraclusterCV.png", sep=""), type="cairo",
- units="in", width=5, height=5, pointsize=12,
- res=300)
- intraVar %>%
- ggplot( aes(x=neurType, y=LP, fill=neurType)) +
- geom_boxplot() +
- scale_fill_viridis(discrete = TRUE, alpha=0.6) +
- theme_bw() +
- theme(
- legend.position="none",
- plot.title = element_text(size=11),
- axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
- ) +
- xlab("Axonal type") +
- ylab("Intracluster Variance")
- dev.off()
- ```
- ####Add normal noise
- ```{r}
- set.seed(12345)
- # type1Noise <- rnorm()
- ```
- ##Figure scripts OLD
- ####Intracluster variance
- Same chunk as "Check intra-cluster variance" within "Bootstrapped coefficients"
- ```{r}
- intraVar <- aggregate(LP ~ id, data = allData, function(x) sd(x)/mean(x))
- intraVar$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)
- #Ggplot
- png(filename=paste("intraclusterCV.png", sep=""), type="cairo",
- units="in", width=5, height=5, pointsize=12,
- res=300)
- intraVar %>%
- ggplot( aes(x=neurType, y=LP, fill=neurType)) +
- geom_boxplot() +
- scale_fill_viridis(discrete = TRUE, alpha=0.6) +
- theme_bw() +
- theme(
- legend.position="none",
- plot.title = element_text(size=11),
- axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
- ) +
- xlab("Axonal type") +
- ylab("Intracluster Variance")
- dev.off()
- #Base R
- png(filename=paste("intraclusterCV_baseR.png", sep=""), type="cairo",
- units="in", width=5, height=5, pointsize=12,
- res=300)
- boxplot(LP ~ neurType, data = intraVar, col= c("violet", "lightblue", "lightgreen", "tan1"),
- outpch = 19, outbg = "black", outcol = "black", cex = .7, ylim = c(0,0.4))
- dev.off()
- ```
- ###Inference
- #####Original model bootstrap
- Same chunk as "Check correlation matrix" within "GEE"
- ```{r, fig.width=10, fig.height=5}
- # Bootstrap for inference
- # Ensure reproducibility
- # set.seed(12345)
- # bootInter <- bootCoefint(dataNo4, "LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1", "exchangeable", 2000)
- # saveRDS(bootInter, "bootCoefsInteraction.rds")
- bootInter <- readRDS("bootCoefsInteraction.rds")
- # Store as data frame
- options(digits=10)
- bootFrame <- data.frame(Type1XY = bootInter$LP,
- Type1XZ = bootInter$LP + bootInter$LP.PlaneXZ,
- Type1YZ = bootInter$LP + bootInter$LP.PlaneYZ,
- Type2XY = bootInter$LP + bootInter$LP.neurTypeType2,
- Type2XZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneXZ,
- Type2YZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneYZ,
- Type3XY = bootInter$LP + bootInter$LP.neurTypeType3,
- Type3XZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneXZ,
- Type3YZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneYZ)
- bootMelt <- reshape2::melt(bootFrame)
- bootMelt$neurType <- rep(c("Type1", "Type2", "Type3"), each = 6000)
- bootMelt$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = 2000), 3)
- #Represent LP
- ggNeur <- ggplot(data=bootMelt, aes(x=value, fill=Plane)) +
- geom_density(adjust=1.5, alpha = .4) +
- theme_bw() +
- facet_wrap(~neurType, ncol = 3) +
- theme(
- legend.position="bottom",
- panel.spacing = unit(1, "lines"),
- axis.ticks.x=element_blank()
- )
- ggPlanes <- ggplot(data=bootMelt, aes(x=value, fill=neurType)) +
- geom_density(adjust=1.5, alpha = .4) +
- theme_bw() +
- facet_wrap(~Plane, ncol = 3) +
- theme(
- legend.position="bottom",
- panel.spacing = unit(1, "lines"),
- axis.ticks.x=element_blank()
- )
- png(filename=paste("bootstrapLP_neurvsplane.png", sep=""), type="cairo",
- units="in", width=10, height=3, pointsize=12,
- res=300)
- ggarrange(ggNeur, ggPlanes, ncol = 2, nrow = 1)
- dev.off()
- ```
- #####CI95 plot
- ```{r}
- # Extract the quantiles we need
- quant <- apply(bootFrame, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7))
- quantFrame <- reshape2::melt(quant)
- quantFrame$y <- rep(1:9, each=3)
- # Plot CI95
- png(filename=paste("ci95.png", sep=""), type="cairo",
- units="in", width=5, height=5, pointsize=12,
- res=300)
- plot(0, type="n", xlim=c(1.20,1.35), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
- ytick <- seq(1, 9, by=1)
- axis(side=2, at=ytick, labels = FALSE)
- text(par("usr")[1], ytick,
- labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)
- for (i in seq(1, 27,3)) {
- segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
- points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
- }
- abline(v=c(1.268), lty = "dashed", col = "gray")
- text(x=1.2765, y=10.1, "1.268",col="gray", font=2, cex=0.6)
- dev.off()
- # Zoom to check whether 2 and 7 overlap or not
- png(filename=paste("ci95zoom.png", sep=""), type="cairo",
- units="in", width=5, height=5, pointsize=12,
- res=300)
- plot(0,type="n", xlim=c(1.26,1.27), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
- ytick<-seq(1, 9, by=1)
- axis(side=2, at=ytick, labels = FALSE)
- text(par("usr")[1], ytick,
- labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)
- for (i in seq(1, 27,3)) {
- segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
- points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
- }
- abline(v=c(1.268), lty = "dashed", col = "gray")
- dev.off()
- ```
- #####Global LP
- ```{r}
- # Grand LP by sum of bootstrapped coeffients
- globalLP <- data.frame(LP = bootInter$LP + bootInter$LP.PlaneXZ + bootInter$LP.PlaneYZ + bootInter$LP.neurTypeType2 + bootInter$LP.neurTypeType2.PlaneXZ + bootInter$LP.neurTypeType2.PlaneYZ + bootInter$LP.neurTypeType3 + bootInter$LP.neurTypeType3.PlaneXZ + bootInter$LP.neurTypeType3.PlaneYZ)
- globalQuant <- quantile(globalLP$LP, c(0.025, 0.5, 0.975), type=7)
- sumLP <- ggplot(data=globalLP, aes(x=LP)) +
- geom_density(adjust=1.5, alpha = .4) +
- geom_vline(xintercept = globalQuant, linetype="dotted",
- color = "blue") +
- theme_bw() +
- ggtitle("Sum LP") +
- theme(
- legend.position="bottom",
- panel.spacing = unit(1, "lines"),
- axis.ticks.x=element_blank()
- )
- # Grand LP by bootstrapping
- # Ensure reproducibility
- # set.seed(12345)
- # bootLP <- bootCoefint(dataNo4, "LR ~ LP - 1", "exchangeable", 2000)
- bootLPQuant <- quantile(bootLP$LP, c(0.025, 0.5, 0.975), type=7)
- bootLP <- ggplot(data=bootLP, aes(x=LP)) +
- geom_density(adjust=1.5, alpha = .4) +
- geom_vline(xintercept = bootLPQuant, linetype="dotted",
- color = "blue") +
- theme_bw() +
- ggtitle("Boot LP") +
- theme(
- legend.position="bottom",
- panel.spacing = unit(1, "lines"),
- axis.ticks.x=element_blank()
- )
- png(filename=paste("LPestimations.png", sep=""), type="cairo",
- units="in", width=10, height=5, pointsize=12,
- res=300)
- ggarrange(sumLP, bootLP, ncol = 2, nrow = 1)
- dev.off()
- ```
- ##Figure scripts NEW
- ###Estimate alpha per neuron type
- ```{r}
- # Alpha per neurType
- options(digits = 10)
- #######
- # GEE #
- #######
- # Check correlation matrix
- geeNeur.in <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "independence")
- geeNeur.ex <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "exchangeable")
- geeNeur.ar <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "ar1")
- geeNeur.un <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "unstructured")
- # Deviation from naive variance
- sapply(list(geeNeur.in, geeNeur.ex, geeNeur.ar, geeNeur.un), var.crit)
- # Bootstrap
- # Ensure reproducibility
- # set.seed(12345)
- # bootNeur <- bootCoefint(allData, "LR ~ LP:neurType - 1", "exchangeable", 2000)
- # saveRDS(bootNeur, "bootCoefs_perNeuron.rds")
- # bootNeur <- readRDS("bootCoefs_perNeuron.rds")
- # colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")
- #######
- # OLS #
- #######
- # Bootstrap
- # Ensure reproducibility
- # set.seed(12345)
- # bootNeur <- bootCoefintLM(allData, "LR ~ LP:neurType - 1", 2000)
- # saveRDS(bootNeur, "bootCoefs_perNeuron_OLS.rds")
- bootNeur <- readRDS("ProyeccionAnalysis/bootCoefs_perNeuron_OLS.rds")
- colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")
- # Get R2 value
- summary(lm(LR ~ LP:neurType - 1, allData))
- ```
- ####CI95 plot
- ```{r}
- # Extract the quantiles we need
- quant <- apply(bootNeur, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7))
- quantFrame <- reshape2::melt(quant)
- quantFrame$y <- rep(1:4, each=3)
- # Plot CI95
- png(filename=paste("ci95_perNeuron_OLS.png", sep=""), type="cairo",
- units="in", width=5, height=5, pointsize=12,
- res=300)
- plot(0, type="n", xlim=c(1.265, 1.293), ylim=c(0.5,4.5), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
- ytick <- seq(1, 4, by=1)
- axis(side=2, at=ytick, labels = FALSE)
- text(par("usr")[1], ytick,
- labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)
- for (i in seq(1, 12,3)) {
- segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
- points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
- }
- rect(quantFrame[quantFrame$Var1 == "2.5%" & quantFrame$Var2 == "Type4", 3], -1,
- quantFrame[quantFrame$Var1 == "97.5%" & quantFrame$Var2 == "Type3", 3], 6,
- col = rgb(0.5,0.5,0.5,1/4), lty = "dashed", border = "darkgray")
- dev.off()
- ```
- ####Line with shades
- ```{r, fig.width=8, fig.height=8}
- png(filename=paste("lines_perNeuron_OLS.png", sep=""), type="cairo",
- units="in", width=5, height=5, pointsize=12,
- res=300)
- op <- par(mfrow = c(2,2),
- oma = c(0,0,0,0) + 0.1,
- mar = c(0,0,1,1) + 0.1)
- for (i in (1:4)) {
- # Get axis range
- xRange <- allData[allData$neurType == paste("Type", i, sep=""), 3]
- yRange <- allData[allData$neurType == paste("Type", i, sep=""), 2]
- # Get max, mean and min alphas
- alphaVec <- c(max(bootNeur[,i]), median(bootNeur[,i]), min(bootNeur[,1]))
- # Plot observations, mean line and shade with span of all booted lines
- plot(LR ~ LP, data = allData[allData$neurType == paste("Type", i, sep=""), ], pch = 21, cex = 0.5, bg = "gray", col = NULL,
- xlab = "Projected 2D Length", ylab = "Real 3D Length", xaxt="n", yaxt = "n")
- lines(xRange, alphaVec[2]*xRange, col = "red", lwd = 0.5)
- polygon(x = c(min(xRange), min(xRange), max(xRange), max(xRange)),
- y = c(min(alphaVec[3]*xRange), min(alphaVec[1]*xRange), max(alphaVec[3]*xRange), max(alphaVec[1]*xRange)),
- col = rgb(1,0,0, alpha = 0.3), border = NA)
- # plotrix::corner.label(x = -1, y = 1, paste("Type", i, sep=""))
-
- }
- par(op)
- dev.off()
- ```
|