modelBased_projections.Rmd 61 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804
  1. ---
  2. title: "Axonal Length"
  3. author: "Sergio D?ez Hermano"
  4. date: '`r format(Sys.Date(),"%e de %B, %Y")`'
  5. output:
  6. html_document:
  7. highlight: tango
  8. toc: yes
  9. toc_depth: 4
  10. toc_float:
  11. collapsed: no
  12. ---
  13. ```{r setup, include=FALSE}
  14. require(knitr)
  15. # include this code chunk as-is to set options
  16. opts_chunk$set(comment = NA, prompt = FALSE, fig.height=5, fig.width=5, dpi=300, fig.align = "center", echo = TRUE, message = FALSE, warning = FALSE, cache=FALSE)
  17. Sys.setlocale("LC_TIME", "C")
  18. ```
  19. ##Load libraries and functions
  20. ```{r}
  21. library(R.matlab)
  22. library(lattice)
  23. library(dplyr)
  24. library(ggplot2)
  25. library(ggridges)
  26. library(hrbrthemes)
  27. library(viridis)
  28. library(ggpubr)
  29. library(mltools)
  30. library(data.table)
  31. library(caret)
  32. library(interactions)
  33. library(performance)
  34. library(MASS) #rlm
  35. library(clickR) #rlm pvals
  36. library(geepack)
  37. library(pstools)
  38. library(MXM)
  39. library(rmcorr)
  40. library(multcomp)
  41. # https://rstudio-pubs-static.s3.amazonaws.com/297778_5fce298898d64c81a4127cf811a9d486.html
  42. ###########################
  43. # Robust sanwich variance #
  44. ###########################
  45. source("summaryRobust.R")
  46. #########################################
  47. # Deviation from empirical variance GEE #
  48. #########################################
  49. var.crit <- function(mymodel) sum(abs(mymodel[["geese"]]$vbeta.naiv - mymodel[["geese"]]$vbeta))
  50. ############################
  51. # Distances between planes #
  52. ############################
  53. distPlan <- function(mydata) {
  54. # Vector to store distances
  55. distPlanes <- vector()
  56. # Estimate distances between planes for each subject
  57. for (i in unique(mydata$id)) {
  58. LPvals <- mydata[mydata$id == i, 3]
  59. distPlanes <- c(distPlanes,
  60. (LPvals[1] - LPvals[2])/max(LPvals[1], LPvals[2]),
  61. (LPvals[1] - LPvals[3])/max(LPvals[1], LPvals[3]),
  62. (LPvals[2] - LPvals[3])/max(LPvals[2], LPvals[3]))
  63. }
  64. # Concatenate factor with substraction information
  65. numSubjects <- length(unique(mydata$id))
  66. subsFactor <- rep(c("XY - XZ", "XY - YZ", "YZ - XZ"), rep = numSubjects)
  67. distFrame <- data.frame(id = mydata$id,
  68. type = mydata$neurType,
  69. distPlanes = distPlanes,
  70. substraction = subsFactor)
  71. return(distFrame)
  72. }
  73. ##########################
  74. # Bootstrap coefficients #
  75. ##########################
  76. bootCoef <- function(mydata, axontype, projection, corrMat, nreps) {
  77. # Select data
  78. modData <- mydata[mydata$neurType == axontype & mydata$Plane == projection, ]
  79. # Fit the model
  80. modGee <- geeglm(LR ~ LP, data = modData, id = id, corstr = corrMat)
  81. # Ensure reproducibility
  82. # set.seed(12345)
  83. # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
  84. coefmat <- matrix(NA, nreps, 2)
  85. # We need the fitted values and the residuals from the model
  86. resids <- residuals(modGee)
  87. preds <- fitted(modGee)
  88. # Repeatedly generate bootstrapped responses
  89. for(i in 1:nreps) {
  90. booty <- preds + sample(resids, rep=TRUE)
  91. modGee2 <- update(modGee, booty ~ .)
  92. coefmat[i,] <- coef(modGee2)
  93. }
  94. # Create a dataframe to store predictors values
  95. colnames(coefmat) <- names(coef(modGee2))
  96. coefmat <- data.frame(coefmat)
  97. return(coefmat)
  98. }
  99. ##############################
  100. # Bootstrap coefficients gEE #
  101. ##############################
  102. # For models with interactions
  103. bootCoefint <- function(mydata, modForm, corrMat, nreps) {
  104. # Fit model
  105. modGee <- geeglm(formula(modForm), data = mydata, id = id, corstr = corrMat)
  106. # Ensure reproducibility
  107. # set.seed(12345)
  108. # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
  109. coefmat <- matrix(NA, nreps, length(coef(modGee)))
  110. # We need the fitted values and the residuals from the model
  111. resids <- residuals(modGee)
  112. preds <- fitted(modGee)
  113. # Repeatedly generate bootstrapped responses
  114. for(i in 1:nreps) {
  115. booty <- preds + sample(resids, rep=TRUE)
  116. modGee2 <- update(modGee, booty ~ .)
  117. coefmat[i,] <- coef(modGee2)
  118. print(i)
  119. }
  120. # Create a dataframe to store predictors values
  121. colnames(coefmat) <- names(coef(modGee2))
  122. coefmat <- data.frame(coefmat)
  123. return(coefmat)
  124. }
  125. #############################
  126. # Bootstrap coefficients LM #
  127. #############################
  128. # For models with interactions
  129. bootCoefintLM <- function(mydata, modForm, nreps) {
  130. # Fit model
  131. modLM <- lm(formula(modForm), data = mydata)
  132. # Ensure reproducibility
  133. set.seed(12345)
  134. # Set up a matrix to store the results (1 intercept + 1 predictor = 2)
  135. coefmat <- matrix(NA, nreps, length(coef(modLM)))
  136. # We need the fitted values and the residuals from the model
  137. resids <- residuals(modLM)
  138. preds <- fitted(modLM)
  139. # Repeatedly generate bootstrapped responses
  140. for(i in 1:nreps) {
  141. booty <- preds + sample(resids, rep=TRUE)
  142. modLM2 <- update(modLM, booty ~ .)
  143. coefmat[i,] <- coef(modLM2)
  144. print(i)
  145. }
  146. # Create a dataframe to store predictors values
  147. colnames(coefmat) <- names(coef(modLM2))
  148. coefmat <- data.frame(coefmat)
  149. return(coefmat)
  150. }
  151. ```
  152. ##Load data
  153. ```{r}
  154. # Get file paths
  155. axonNames <- list.files(paste("ProyeccionData/", sep=""), full.names=T)
  156. # Load matlab file
  157. axonFiles <- lapply(axonNames, function(x) readMat(x))
  158. names(axonFiles) <- c("Type1", "Type2", "Type3", "Type4")
  159. # VAR NAMES
  160. # REAL_LENGTH, PROYECTION_LENGTH_XY, PROYECTION_LENGTH_XZ, PROYECTION_LENGTH_YZ
  161. # Extract data
  162. realLength <- lapply(axonFiles, function(x) x$REAL.LENGTH)
  163. planeXY <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XY)
  164. planeXZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.XZ)
  165. planeYZ <- lapply(axonFiles, function(x) x$PROYECTION.LENGTH.YZ)
  166. # Get number of neurons per type
  167. numberTypes <- unlist(lapply(realLength, function(x) length(x)))
  168. # Store in data frames by plane
  169. xyData <- data.frame(id = 1:length(unlist(realLength)),
  170. LR = unlist(realLength), LP = unlist(planeXY), Plane = rep("XY", sum(numberTypes)),
  171. neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
  172. xzData <- data.frame(id = 1:length(unlist(realLength)),
  173. LR = unlist(realLength), LP = unlist(planeXZ), Plane = rep("XZ", sum(numberTypes)),
  174. neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
  175. yzData <- data.frame(id = 1:length(unlist(realLength)),
  176. LR = unlist(realLength), LP = unlist(planeYZ), Plane = rep("YZ", sum(numberTypes)),
  177. neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
  178. # Merge into a single data frame and sort by id
  179. allData <- do.call("rbind", list(xyData, xzData, yzData))
  180. allData <- allData[order(allData$id),]
  181. # Add different number for every neuron-plane combination
  182. allData$NeurPlane <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]),
  183. rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
  184. allData$interact <-interaction(allData$neurType, allData$Plane, lex.order=T)
  185. # Create data frame w/o Type4
  186. dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)
  187. # Data in short format
  188. dataShort <- data.frame(id = 1:length(unlist(realLength)),
  189. LR = unlist(realLength), XY = unlist(planeXY), XZ = unlist(planeXZ), YZ = unlist(planeYZ),
  190. neurType = rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes))
  191. ```
  192. ##Descriptive
  193. ###Histograms
  194. ```{r, fig.width=8, fig.height=8}
  195. # Use dataShort for plotting
  196. histogram( ~ YZ + XZ + XY + LR | neurType, dataShort, type ="density")
  197. histogram( ~ LR | Plane, allData, type ="density")
  198. ```
  199. ####GGplot2
  200. ```{r, fig.width=8, fig.height=8}
  201. #https://www.r-graph-gallery.com/violin_grouped_ggplot2.html
  202. # New variables releveled for plotting reasons
  203. allData$NPFac <- factor(allData$NeurPlane)
  204. levels(allData$NPFac) <- paste(rep(levels(allData$neurType), each = 3), rep(levels(allData$Plane), 3), sep=".")
  205. # allData$NPFac <- factor(allData$NPFac, levels = rev(levels(allData$NPFac)))
  206. allData$typeFac <- factor(allData$neurType, levels = rev(levels(allData$neurType)))
  207. # Plot
  208. ggplot(allData, aes(x = `LP`, y = `NPFac`, fill = ..x..)) +
  209. geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  210. scale_fill_viridis(name = "LP", option = "D") +
  211. labs(title = 'Length 2D Proyections') +
  212. theme_ipsum() +
  213. theme(
  214. legend.position="none",
  215. panel.spacing = unit(0.1, "lines"),
  216. strip.text.x = element_text(size = 8)
  217. )
  218. # Plot
  219. ggplot(allData, aes(x = `LR`, y = `neurType`, fill = ..x..)) +
  220. geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  221. scale_fill_viridis(name = "LP", option = "D") +
  222. labs(title = 'Length 2D Proyections') +
  223. theme_ipsum() +
  224. theme(
  225. legend.position="none",
  226. panel.spacing = unit(0.1, "lines"),
  227. strip.text.x = element_text(size = 8)
  228. )
  229. # Plot
  230. ggplot(allData, aes(x = LP, y = NPFac, fill = neurType)) +
  231. geom_density_ridges() +
  232. theme_ridges() +
  233. theme(legend.position = "none")
  234. # All together gradient
  235. # dataShort$normLR <- dataShort$LR/dataShort$LR
  236. # dataShort$normXY <- dataShort$LR/dataShort$XY
  237. # dataShort$normXZ <- dataShort$LR/dataShort$XZ
  238. # dataShort$normYZ <- dataShort$LR/dataShort$YZ
  239. joinData <- melt(setDT(dataShort), id.vars = c("id","neurType"), variable.name = "length")
  240. joinData <- joinData[order(joinData$id), ]
  241. joinData$NeurPlane <-interaction(joinData$neurType, joinData$length, lex.order=T)
  242. png(filename=paste("gradientDistributions.png", sep=""), type="cairo",
  243. units="in", width=10, height=10, pointsize=12,
  244. res=300)
  245. ggplot(joinData, aes(x = `value`, y = `NeurPlane`, fill = ..x..)) +
  246. geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  247. scale_fill_viridis(name = "LP", option = "D") +
  248. labs(title = 'Length 2D Proyections') +
  249. theme_ipsum() +
  250. theme(
  251. legend.position="none",
  252. panel.spacing = unit(0.1, "lines"),
  253. strip.text.x = element_text(size = 8)
  254. )
  255. dev.off()
  256. # All together gradient (LOG)
  257. joinData <- melt(setDT(dataShort), id.vars = c("id","neurType"), variable.name = "length")
  258. joinData <- joinData[order(joinData$id), ]
  259. joinData$NeurPlane <- interaction(joinData$neurType, joinData$length, lex.order=T)
  260. joinData$logValue <- log(joinData$value)
  261. ggplot(joinData, aes(x = `logValue`, y = `NeurPlane`, fill = ..x..)) +
  262. geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  263. scale_fill_viridis(name = "LP", option = "D") +
  264. labs(title = 'Length 2D Proyections') +
  265. theme_bw() +
  266. theme(
  267. legend.position="none",
  268. panel.spacing = unit(0.1, "lines"),
  269. strip.text.x = element_text(size = 8)
  270. )
  271. # All together color by type
  272. ggplot(joinData, aes(x = `value`, y = `NeurPlane`, fill = neurType)) +
  273. geom_density_ridges(alpha = 0.8) +
  274. theme_ridges() +
  275. theme(legend.position = "none")
  276. ```
  277. ####GGPlot2 Stacked
  278. ```{r, fig.width=20, fig.height=6}
  279. # All together overlapping
  280. # levels(joinData$length) <- rev(levels(joinData$length))
  281. joinData$estim <- joinData$value*1.27
  282. joinData[joinData$length == "LR", 6] <- joinData[joinData$length == "LR", 4]
  283. png(filename=paste("gradientStacked.png", sep=""), type="cairo",
  284. units="in", width=20, height=5, pointsize=12,
  285. res=300)
  286. ggplot(data=joinData, aes(x=value, fill=length)) +
  287. geom_density(adjust=1.5, alpha = .4) +
  288. theme_ipsum() +
  289. facet_wrap(~neurType, ncol = 4) +
  290. xlim(0, 2.5e05) +
  291. theme(
  292. legend.position="bottom",
  293. panel.spacing = unit(1, "lines"),
  294. axis.ticks.x=element_blank()
  295. )
  296. dev.off()
  297. png(filename=paste("gradientEstimation.png", sep=""), type="cairo",
  298. units="in", width=20, height=5, pointsize=12,
  299. res=300)
  300. ggplot(data=joinData, aes(x=estim, fill=length)) +
  301. geom_density(adjust=1.5, alpha = .4) +
  302. theme_ipsum() +
  303. facet_wrap(~neurType, ncol = 4) +
  304. xlim(0, 2.5e05) +
  305. theme(
  306. legend.position="bottom",
  307. panel.spacing = unit(1, "lines"),
  308. axis.ticks.x=element_blank()
  309. )
  310. dev.off()
  311. ```
  312. ###Boxplot
  313. ```{r, fig.width=20, fig.height=5}
  314. # Use dataShort for plotting
  315. lrBx <- ggplot(dataShort, aes(x=neurType, y=LR, fill = neurType)) +
  316. geom_violin(trim=FALSE) +
  317. geom_boxplot(width=0.1, fill = "white") + theme_minimal()
  318. xyBx <- ggplot(dataShort, aes(x=neurType, y=XY, fill = neurType)) +
  319. geom_violin(trim=FALSE) +
  320. geom_boxplot(width=0.1, fill = "white") + theme_minimal()
  321. xzBx <- ggplot(dataShort, aes(x=neurType, y=XZ, fill = neurType)) +
  322. geom_violin(trim=FALSE) +
  323. geom_boxplot(width=0.1, fill = "white") + theme_minimal()
  324. yzBx <- ggplot(dataShort, aes(x=neurType, y=YZ, fill = neurType)) +
  325. geom_violin(trim=FALSE) +
  326. geom_boxplot(width=0.1, fill = "white") + theme_minimal()
  327. ggarrange(lrBx, xyBx, xzBx, yzBx, ncol = 4, nrow = 1)
  328. ```
  329. ###Correlations
  330. ####LR vs LP by Neuron
  331. ```{r, fig.width=20}
  332. par(mfrow = c(1,4))
  333. plot(LR ~ LP, data = allData[allData$neurType == "Type1", ], pch = 21, bg = "coral", main = "Type1", xlim = c(0,250000), ylim = c(0, 300000))
  334. abline(h=1000, v=1000, col = "gray", lty = "dashed")
  335. plot(LR ~ LP, data = allData[allData$neurType == "Type2", ], pch = 21, bg = "lightblue", main = "Type2", xlim = c(0,250000), ylim = c(0, 300000))
  336. abline(h=1000, v=1000, col = "gray", lty = "dashed")
  337. plot(LR ~ LP, data = allData[allData$neurType == "Type3", ], pch = 21, bg = "lightgreen", main = "Type3", xlim = c(0,250000), ylim = c(0, 300000))
  338. abline(h=1000, v=1000, col = "gray", lty = "dashed")
  339. plot(LR ~ LP, data = allData[allData$neurType == "Type4", ], pch = 21, bg = "violet", main = "Type4", xlim = c(0,250000), ylim = c(0, 300000))
  340. abline(h=1000, v=1000, col = "gray", lty = "dashed")
  341. ```
  342. ####LR vs LP by Plane
  343. ```{r, fig.width=15}
  344. par(mfrow = c(1,3))
  345. plot(LR ~ LP, data = allData[allData$Plane == "XY", ], pch = 21, bg = "red", main = "XY", xlim = c(0,250000), ylim = c(0, 300000))
  346. abline(h=1000, v=1000, col = "gray", lty = "dashed")
  347. plot(LR ~ LP, data = allData[allData$Plane == "XZ", ], pch = 21, bg = "blue", main = "XZ", xlim = c(0,250000), ylim = c(0, 300000))
  348. abline(h=1000, v=1000, col = "gray", lty = "dashed")
  349. plot(LR ~ LP, data = allData[allData$Plane == "YZ", ], pch = 21, bg = "green", main = "YZ", xlim = c(0,250000), ylim = c(0, 300000))
  350. abline(h=1000, v=1000, col = "gray", lty = "dashed")
  351. ```
  352. ####LR vs LP by Neuron*Plane
  353. ```{r}
  354. # Duplicate data to avoid confusion
  355. data3D <- allData
  356. # Add a number identifyng every neuron-plane combination
  357. data3D$plaNum <- c(rep(c(1,2,3), numberTypes[1]), rep(c(4,5,6), numberTypes[2]), rep(c(7,8,9), numberTypes[3]), rep(c(10,11,12), numberTypes[4]))
  358. # Plot
  359. car::scatter3d(x = data3D$LP, y = data3D$LR, z = data3D$plaNum, group = data3D$neurType,
  360. surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
  361. # Plot
  362. dataXY.1v4 <- data3D[data3D$Plane == "XY" & (data3D$neurType == "Type1" | data3D$neurType == "Type4"), ]
  363. car::scatter3d(x = dataXY.1v4$LP, y = dataXY.1v4$LR, z = dataXY.1v4$plaNum, group = dataXY.1v4$neurType,
  364. surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
  365. ```
  366. ####Planes distance
  367. ```{r, fig.width=20, fig.height=5}
  368. distWithin <- distPlan(allData)
  369. distWithin$Combin <- interaction(distWithin$type, distWithin$substraction, lex.order = TRUE)
  370. boxplot(distPlanes ~ substraction + type, distWithin)
  371. # Boxplot
  372. png(filename=paste("boxDistance.png", sep=""), type="cairo",
  373. units="in", width=5, height=5, pointsize=12,
  374. res=300)
  375. bx1 <- distWithin %>%
  376. ggplot( aes(x=Combin, y=distPlanes, fill=type)) +
  377. geom_boxplot() +
  378. scale_fill_viridis(discrete = TRUE, alpha=0.6) +
  379. theme_ipsum() +
  380. theme(
  381. legend.position="none",
  382. plot.title = element_text(size=11),
  383. axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
  384. ) +
  385. ggtitle("Distances plane boxplot") +
  386. xlab("")
  387. bx1
  388. dev.off()
  389. # Boxlot with jitter
  390. bx2 <- distWithin %>%
  391. ggplot( aes(x=Combin, y=distPlanes, fill=type)) +
  392. geom_boxplot() +
  393. scale_fill_viridis(discrete = TRUE, alpha=0.6) +
  394. geom_jitter(color = "black", size=0.1, alpha=0.9) +
  395. scale_color_viridis(discrete = TRUE, alpha=0.6) +
  396. theme_ipsum() +
  397. theme(
  398. legend.position="none",
  399. plot.title = element_text(size=11),
  400. axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
  401. ) +
  402. ggtitle("A boxplot with jitter") +
  403. xlab("")
  404. # Vertical violin
  405. bx3 <- distWithin %>%
  406. ggplot(aes(fill=substraction, y=distPlanes, x=type)) +
  407. geom_violin(position="dodge", alpha=0.5, outlier.colour="transparent") +
  408. scale_fill_viridis(discrete=T, name="") +
  409. theme_ipsum() +
  410. xlab("") +
  411. ylab("Distance Within Types")
  412. # Horizontal violin
  413. bx4 <- distWithin %>%
  414. ggplot( aes(x=Combin, y=distPlanes, fill=type)) +
  415. geom_violin(width=2, size=0.1) +
  416. scale_fill_viridis(discrete=TRUE) +
  417. scale_color_viridis(discrete=TRUE) +
  418. theme_ipsum() +
  419. theme(
  420. legend.position="none"
  421. ) +
  422. coord_flip() + # This switch X and Y axis and allows to get the horizontal version
  423. xlab("") +
  424. ylab("Distance Within Types")
  425. ggarrange(bx1, bx2, bx3, bx4, ncol = 4, nrow = 1)
  426. ```
  427. #####Distances ggplot
  428. ```{r, fig.width=15, fig.height=5}
  429. rest1 <- ggplot(distWithin[distWithin$substraction == "XY - XZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) +
  430. geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) +
  431. scale_fill_viridis(name = "LP", option = "D") +
  432. labs(title = 'XY - XZ') +
  433. theme_ipsum() +
  434. theme(
  435. legend.position="none",
  436. panel.spacing = unit(0.1, "lines"),
  437. strip.text.x = element_text(size = 8)
  438. )
  439. rest2 <- ggplot(distWithin[distWithin$substraction == "XY - YZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) +
  440. geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) +
  441. scale_fill_viridis(name = "LP", option = "D") +
  442. labs(title = 'XY - YZ') +
  443. theme_ipsum() +
  444. theme(
  445. legend.position="none",
  446. panel.spacing = unit(0.1, "lines"),
  447. strip.text.x = element_text(size = 8)
  448. )
  449. rest3 <- ggplot(distWithin[distWithin$substraction == "YZ - XZ", ], aes(x = `distPlanes`, y = `type`, fill = ..x..)) +
  450. geom_density_ridges_gradient(scale = 2, rel_min_height = 0.01) +
  451. scale_fill_viridis(name = "LP", option = "D") +
  452. labs(title = 'YZ - XZ') +
  453. theme_ipsum() +
  454. theme(
  455. legend.position="none",
  456. panel.spacing = unit(0.1, "lines"),
  457. strip.text.x = element_text(size = 8)
  458. )
  459. ggarrange(rest1, rest2, rest3, ncol = 3, nrow = 1)
  460. car::scatter3d(x = distWithin[distWithin$substraction == "XY - XZ", 3],
  461. y = distWithin[distWithin$substraction == "XY - YZ", 3],
  462. z = distWithin[distWithin$substraction == "YZ - XZ", 3],
  463. group = factor(rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)),
  464. surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
  465. plot(x = distWithin[distWithin$substraction == "XY - XZ", 3],
  466. y = distWithin[distWithin$substraction == "XY - YZ", 3],
  467. pch = 21,
  468. bg = factor(rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)))
  469. ```
  470. ##Empirical error
  471. ```{r, fig.width=15}
  472. # https://stackoverflow.com/questions/12394321/r-what-algorithm-does-geom-density-use-and-how-to-extract-points-equation-of
  473. # https://stackoverflow.com/questions/24985361/understanding-bandwidth-smoothing-in-ggplot2
  474. dataEmpirical <- allData
  475. dataEmpirical$alpha <- dataEmpirical$LR/dataEmpirical$LP
  476. #######################################
  477. # Estimate alphas and use grand alpha #
  478. #######################################
  479. # Grand alpha
  480. dataEmpirical$error <- 100*(1 - mean(dataEmpirical$alpha)/dataEmpirical$alpha)
  481. # Plot
  482. ga <- ggplot(dataEmpirical, aes(x = `error`, y = `interact`, fill = neurType)) +
  483. geom_density_ridges(alpha = 0.8) +
  484. theme_ridges() +
  485. theme(legend.position = "none") +
  486. ggtitle("Grand Alpha")
  487. #######################################
  488. # Estimate alphas and use local alpha #
  489. #######################################
  490. # Find mean alpha per group
  491. meanAlpha <- aggregate(alpha ~ neurType + Plane, dataEmpirical, mean)
  492. dataEmpirical$localErr <- dataEmpirical$error
  493. # Divide by max LR per axon type and plane
  494. for (i in c("Type1", "Type2", "Type3", "Type4")) {
  495. for (j in c("XY", "XZ", "YZ")) {
  496. # Subset and divide alpha
  497. alphaVec <- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 8]
  498. localAlpha <- meanAlpha[meanAlpha$neurType == i & meanAlpha$Plane == j, 3]
  499. dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 10] <- 100*(1 - localAlpha/alphaVec)
  500. }
  501. }
  502. # Plot
  503. la <-ggplot(dataEmpirical, aes(x = `localErr`, y = `interact`, fill = neurType)) +
  504. geom_density_ridges(alpha = 0.8) +
  505. theme_ridges() +
  506. theme(legend.position = "none") +
  507. ggtitle("Local Alpha")
  508. ###############
  509. # Fixed alpha #
  510. ###############
  511. dataEmpirical$fixedError <- 100*(1 - 1.268/dataEmpirical$alpha)
  512. fa <- ggplot(dataEmpirical, aes(x = `fixedError`, y = `interact`, fill = neurType)) +
  513. geom_density_ridges(alpha = 0.8) +
  514. theme_ridges() +
  515. theme(legend.position = "none") +
  516. ggtitle("Fixed Alpha")
  517. ##############
  518. # Joint plot #
  519. ##############
  520. png(filename=paste("alphasComparison_LRbyLP.png", sep=""), type="cairo",
  521. units="in", width=15, height=6, pointsize=12,
  522. res=300)
  523. ggarrange(ga, la, fa, ncol=3, nrow=1)
  524. dev.off()
  525. ```
  526. ###Equal Bandwidth
  527. ```{r, fig.width=15}
  528. # https://stackoverflow.com/questions/12394321/r-what-algorithm-does-geom-density-use-and-how-to-extract-points-equation-of
  529. # https://stackoverflow.com/questions/24985361/understanding-bandwidth-smoothing-in-ggplot2
  530. dataEmpirical <- allData
  531. dataEmpirical$alpha <- dataEmpirical$LR/dataEmpirical$LP
  532. bw <- 1
  533. #######################################
  534. # Estimate alphas and use grand alpha #
  535. #######################################
  536. # Grand alpha
  537. dataEmpirical$error <- 100*(1 - mean(dataEmpirical$alpha)/dataEmpirical$alpha)
  538. # Plot
  539. ga <- ggplot(dataEmpirical, aes(x = `error`, y = `interact`, fill = neurType)) +
  540. geom_density_ridges(alpha = 0.8, bandwidth = bw) +
  541. theme_ridges() +
  542. theme(legend.position = "none") +
  543. ggtitle("Grand Alpha")
  544. #######################################
  545. # Estimate alphas and use local alpha #
  546. #######################################
  547. # Find mean alpha per group
  548. meanAlpha <- aggregate(alpha ~ neurType + Plane, dataEmpirical, mean)
  549. dataEmpirical$localErr <- dataEmpirical$error
  550. # Divide by max LR per axon type and plane
  551. for (i in c("Type1", "Type2", "Type3", "Type4")) {
  552. for (j in c("XY", "XZ", "YZ")) {
  553. # Subset and divide alpha
  554. alphaVec <- dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 8]
  555. localAlpha <- meanAlpha[meanAlpha$neurType == i & meanAlpha$Plane == j, 3]
  556. dataEmpirical[dataEmpirical$neurType == i & dataEmpirical$Plane == j, 10] <- 100*(1 - localAlpha/alphaVec)
  557. }
  558. }
  559. # Plot
  560. la <-ggplot(dataEmpirical, aes(x = `localErr`, y = `interact`, fill = neurType)) +
  561. geom_density_ridges(alpha = 0.8, bandwidth = bw) +
  562. theme_ridges() +
  563. theme(legend.position = "none") +
  564. ggtitle("Local Alpha")
  565. ###############
  566. # Fixed alpha #
  567. ###############
  568. dataEmpirical$fixedError <- 100*(1 - 1.268/dataEmpirical$alpha)
  569. fa <- ggplot(dataEmpirical, aes(x = `fixedError`, y = `interact`, fill = neurType)) +
  570. geom_density_ridges(alpha = 0.8, bandwidth = bw) +
  571. theme_ridges() +
  572. theme(legend.position = "none") +
  573. ggtitle("Fixed Alpha")
  574. ##############
  575. # Joint plot #
  576. ##############
  577. png(filename=paste("alphasComparison_LRbyLP_bw", bw, ".png", sep=""), type="cairo",
  578. units="in", width=15, height=6, pointsize=12,
  579. res=300)
  580. ggarrange(ga, la, fa, ncol=3, nrow=1)
  581. dev.off()
  582. ```
  583. ###Histograms
  584. ```{r, fig.width=8, fig.height=8}
  585. # Use dataShort for plotting
  586. histogram( ~ error | Plane + neurType, dataEmpirical, nint = 200, type ="density")
  587. histogram( ~ localErr | Plane + neurType, dataEmpirical, nint = 200, type ="density")
  588. histogram( ~ fixedError | Plane + neurType, dataEmpirical, nint = 200, type ="density")
  589. ```
  590. ###Error vs LP
  591. ```{r}
  592. ## Create cuts:
  593. x_c <- cut(dataEmpirical$LP, 50)
  594. y_c <- cut(dataEmpirical$fixedError, 50)
  595. ## Calculate joint counts at cut levels:
  596. z <- table(x_c, y_c)
  597. ## Plot as a 3D histogram:
  598. plot3D::hist3D(dataEmpirical$LP, dataEmpirical$fixedError, border="black")
  599. ## Plot as a 2D heatmap:
  600. plot3D::image2D(z=z, border="black")
  601. gplots::hist2d(dataEmpirical[, c(3,11)], nbins=50)
  602. ```
  603. ##OLS
  604. ```{r, fig.width=15, fig.height=10}
  605. ##############
  606. # Fit models #
  607. ##############
  608. # mod3 <- lm(LR ~ LP*neurType*Plane, data = allData[allData$neurType != "Type4", ])
  609. mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData[allData$neurType != "Type4", ])
  610. mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData)
  611. mod2 <- lm(LR ~ LP*neurType + LP*Plane + neurType*Plane, data = allData)
  612. mod1 <- lm(LR ~ LP + neurType + Plane, data = allData)
  613. # Summmaries and anovas
  614. anova(mod3, mod2) # 3rd level interaction seems significant
  615. summary(mod3)
  616. car::Anova(mod3)
  617. # RLS2 robust variance
  618. summaryRobust(mod3, robust = TRUE)
  619. # Do some plotting to check lines
  620. interact_plot(model = mod3, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
  621. mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
  622. interact_plot(model = mod3, pred = LP, modx = neurType, mod2 = Plane, plot.points = TRUE, interval = T,
  623. modx.labels = c("Type1", "Type2", "Type3", "Type4"), mod2.labels = c("XY", "XZ", "YZ"))
  624. # Pruebas
  625. mod3.XY <- lm(LR ~ LP*neurType - 1, data = allData[allData$Plane == "XY" & allData$neurType != "Type4", ])
  626. car::Anova(mod3.XY)
  627. summary(mod3.XY)
  628. #######
  629. # NAs #
  630. #######
  631. # Test that NAs happen, when no intercept or main effects are present, for the dummy factor level that is not used as reference because one level is a linear combination of the others
  632. mod.Intercept <- lm(LR ~ LP + LP:neurType + LP:Plane + neurType:Plane + LP:neurType:Plane, data = allData)
  633. modNA.neur4 <- lm(LR ~ LP + LP:neurType + LP:Plane + neurType:Plane + LP:neurType:Plane - 1, data = allData)
  634. modNA.planYZ <- lm(LR ~ LP + LP:Plane + LP:neurType + neurType:Plane + LP:Plane:neurType - 1, data = allData)
  635. summary(modNA.neur4)
  636. # Do some plotting to check lines
  637. interact_plot(model = mod.Intercept, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
  638. mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
  639. interact_plot(model = modNA.neur4, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
  640. mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
  641. ```
  642. ###Diagnosis
  643. ```{r, fig.width=15, fig.height=10}
  644. # Diagnosis
  645. par(mfrow = c(2, 3))
  646. plot(mod3, 1:5)
  647. abline(h=c(-3,3), col="gray", lty="dashed")
  648. ############################
  649. # Discard by Cook distance #
  650. ############################
  651. # Augment model with diagnosis
  652. mod3.diag.metrics <- broom::augment(mod3)
  653. # Find observations that are highly influential
  654. cookDist <- mod3.diag.metrics[mod3.diag.metrics$.cooksd > 0.02, ]
  655. # Find rows and IDs of those observations to eliminate its participation in all planes
  656. remRows <- sapply(cookDist$.rownames, function(x) which(rownames(allData) == x))
  657. remIDs <- sapply(cookDist$.rownames, function(x) allData[which(rownames(allData) == x), 1])
  658. remRowsIDs <- as.vector(sapply(unique(remIDs), function(x) which(allData$id == x)))
  659. # Discard highly influential observations
  660. allData2 <- allData[-remRowsIDs, ]
  661. # Refit
  662. mod3.2 <- lm(LR ~ LP + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData2)
  663. summary(mod3.2)
  664. # Diagnosis
  665. par(mfrow = c(2, 3))
  666. plot(mod3.2, 1:5)
  667. abline(h=c(-3,3), col="gray", lty="dashed")
  668. ############################
  669. # Discard by Strd Residual #
  670. ############################
  671. # Augment model with diagnosis
  672. mod3.diag.metrics <- broom::augment(mod3)
  673. # Find observations that are highly influential
  674. stdResid <- mod3.diag.metrics[mod3.diag.metrics$.std.resid > 3, ]
  675. # Find rows and IDs of those observations to eliminate its participation in all planes
  676. remRows <- sapply(stdResid$.rownames, function(x) which(rownames(allData) == x))
  677. remIDs <- sapply(stdResid$.rownames, function(x) allData[which(rownames(allData) == x), 1])
  678. remRowsIDs <- as.vector(sapply(unique(remIDs), function(x) which(allData$id == x)))
  679. # Discard highly influential observations
  680. allData3 <- allData[-remRowsIDs, ]
  681. # Refit
  682. mod3.3 <- lm(LR ~ LP*neurType*Plane, data = allData3)
  683. summary(mod3.3)
  684. # Diagnosis
  685. par(mfrow = c(2, 3))
  686. plot(mod3.3, 1:5)
  687. abline(h=c(-3,3), col="gray", lty="dashed")
  688. ####################
  689. # Discard outliers #
  690. ####################
  691. # Use multivariate Mahalanobis distance and add to allData
  692. mahalDist <- by(allData, allData$neurType, function(x) mahalanobis(x[,2:3], colMeans(x[,2:3]), cov(x[,2:3])))
  693. allData$mahalDist <- unlist(mahalDist)
  694. # Sort from higher from lower Mahalanobis distance per neuron type
  695. mahalSort <- by(allData, allData$neurType, function(x) x[order(x$mahalDist, decreasing = TRUE),])
  696. # Store as data frame
  697. mahalSort <- do.call("rbind", mahalSort)
  698. # Discard observations with Mahalanobis distance >= 30
  699. allData4 <- mahalSort[mahalSort$mahalDist < 20, ]
  700. # Refit
  701. mod3.4 <- lm(LR ~ LP*neurType*Plane, data = allData4)
  702. summary(mod3.4)
  703. # Diagnosis
  704. par(mfrow = c(2, 3))
  705. plot(mod3.4, 1:5)
  706. abline(h=c(-3,3), col="gray", lty="dashed")
  707. # 3D PLOT Add a number identifyng every neuron-plane combination
  708. data3D$mahalDist <- unlist(mahalDist)
  709. data3Dmahal <- data3D[data3D$mahalDist < 20, ]
  710. rgl::open3d()
  711. car::scatter3d(x = data3Dmahal$LP, y = data3Dmahal$LR, z = data3Dmahal$plaNum, group = data3Dmahal$neurType,
  712. surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
  713. # Do some plotting to check lines
  714. interact_plot(model = mod3.4, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
  715. mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
  716. ###########################
  717. # Discard outliers by iqr #
  718. ###########################
  719. # https://easystats.github.io/performance/reference/check_outliers.html
  720. iqr <- by(allData[,2:3], attributes(check_outliers(mod3, method = "mahalanobis")))
  721. mod3.5 <- update(mod3, .~., data = allData[-as.numeric(which(iqr$data$Outlier == TRUE)), ])
  722. summary(mod3.5)
  723. # Diagnosis
  724. par(mfrow = c(2, 3))
  725. plot(mod3.5, 1:5)
  726. abline(h=c(-3,3), col="gray", lty="dashed")
  727. # 3D PLOT Add a number identifyng every neuron-plane combination
  728. data3Diqr <- data3D[-as.numeric(which(iqr$data$Outlier == TRUE)), ]
  729. rgl::open3d()
  730. car::scatter3d(x = data3Diqr$LP, y = data3Diqr$LR, z = data3Diqr$plaNum, group = data3Diqr$neurType,
  731. surface.col = c("red", "blue", "green", "purple"), surface=FALSE, ellipsoid = FALSE)
  732. # Do some plotting to check lines
  733. interact_plot(model = mod3.5, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
  734. mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
  735. ```
  736. ###One hot coding
  737. ```{r, fig.width=8, fig.height=8}
  738. onehotData <- one_hot(as.data.table(allData))
  739. # Fit models
  740. mod1 <- lm(LR ~ LP + neurType_Type2 + neurType_Type3 + neurType_Type4 + Plane_XZ + Plane_YZ + LP:neurType_Type2 + LP:neurType_Type3, data = onehotData)
  741. # Summmaries and anovas
  742. summary(mod1)
  743. # Diagnosis
  744. par(mfrow = c(2, 2))
  745. plot(mod1)
  746. ```
  747. ###Partial regression
  748. ```{r}
  749. # http://www.colbyimaging.com/wiki/statistics/multiple-regression
  750. lm.lr <- lm(LR ~ neurType*Plane, data = allData)
  751. lm.lp <- lm(LP ~ neurType*Plane, data = allData)
  752. lm.Res <- lm(lm.lr$res ~ lm.lp$res:as.factor(allData$NeurPlane) + 0)
  753. summary(lm.Res)
  754. ```
  755. ###Fixed/variable intercept/slopes
  756. ```{r}
  757. ############################
  758. # Allow variable intercept #
  759. ############################
  760. modIncp.neur <- lm(LR ~ LP + neurType, data = allData)
  761. modIncp.plane <- lm(LR ~ LP + Plane, data = allData)
  762. modIncp <- lm(LR ~ LP + neurType + Plane, data = allData)
  763. summary(modIncp.neur)
  764. summary(modIncp.plane)
  765. summary(modIncp)
  766. aggregate(LR ~ neurType, allData, mean)
  767. aggregate(LP ~ neurType, allData, mean)
  768. aggregate(LR ~ Plane, allData, mean)
  769. ############################
  770. # Allow variable slope #
  771. ############################
  772. modSlope.neur <- lm(LR ~ LP + LP:neurType, data = allData)
  773. modSlope.plane <- lm(LR ~ LP + LP:Plane, data = allData)
  774. modSlope <- lm(LR ~ LP + LP:neurType + LP:Plane, data = allData)
  775. summary(modSlope.neur)
  776. summary(modSlope.plane)
  777. summary(modSlope)
  778. ```
  779. ###Contr sum
  780. ```{r}
  781. dataSum <- allData
  782. contr.sum(5)
  783. contrasts(dataSum$neurType) <- contr.sum(4)
  784. contrasts(dataSum$Plane) <- contr.sum(3)
  785. mod.Sum <- lm(LR ~ LP + LP:neurType + LP:Plane + LP:neurType:Plane, data = dataSum)
  786. summary(mod.Sum)
  787. ```
  788. ```{r}
  789. nT <- allData$neurType
  790. p <- allData$Plane
  791. l <- allData$LP
  792. lr <- allData$LR
  793. X0 <- model.matrix(~ nT + p,
  794. contrasts.arg = list(nT = contr.treatment(n = 4, contrasts = FALSE),
  795. p = contr.treatment(n = 3, contrasts = FALSE)))
  796. f <- sample(gl(3, 4, labels = letters[1:3]))
  797. unname( rowSums(X0[, c("nT1", "nT2", "nT3", "nT4")]) )
  798. unname( rowSums(X0[, c("p1", "p2", "p3")]) )
  799. qr(X0)$rank
  800. summary(lm(lr ~ l + X0 - 1))
  801. ```
  802. ##RLS1
  803. ```{r, fig.width=15, fig.height=10}
  804. # Weight observations by residual size
  805. # https://stats.idre.ucla.edu/r/dae/robust-regression/
  806. # mod3 <- lm(LR ~ LP*neurType*Plane, data = allData[allData$neurType != "Type4", ])
  807. # mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData[allData$neurType != "Type4", ])
  808. mod3R <- rlm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData)
  809. mod2R <- rlm(LR ~ LP*neurType + LP*Plane + neurType*Plane - 1, data = allData)
  810. mod1R <- rlm(LR ~ LP + neurType + Plane - 1, data = allData)
  811. # Summmaries and anovas
  812. (sumMod <- summary(mod3R))
  813. # Get which coefficients are significative, although it may not be correct to use t distribution for H0 in RLS
  814. # https://stat.ethz.ch/pipermail/r-help/2006-July/108659.html
  815. attributes(sumMod$coefficients)$dimnames[[1]][which(rob.pvals(mod3R) < 0.05)]
  816. car::Anova(mod3R)
  817. # Do some plotting to check lines
  818. interact_plot(model = mod3R, pred = LP, mod2 = neurType, modx = Plane, plot.points = TRUE, interval = T,
  819. mod2.labels = c("Type1", "Type2", "Type3", "Type4"), modx.labels = c("XY", "XZ", "YZ"))
  820. interact_plot(model = mod3R, pred = LP, modx = neurType, mod2 = Plane, plot.points = TRUE, interval = T,
  821. modx.labels = c("Type1", "Type2", "Type3", "Type4"), mod2.labels = c("XY", "XZ", "YZ"))
  822. ```
  823. ##RLS2
  824. ```{r}
  825. # Robust variance and clustering
  826. mod3 <- estimatr::lm_robust(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = allData, clusters = id)
  827. summary(mod3)
  828. ```
  829. ###Diagnosis
  830. ```{r}
  831. # Diagnosis
  832. par(mfrow = c(2, 3))
  833. plot(mod3R, 1:5)
  834. abline(h=c(-3,3), col="gray", lty="dashed")
  835. ```
  836. ##GEE
  837. ```{r, fig.width=15, fig.height=10}
  838. # Fit models
  839. strucor <- "independence"
  840. strucor <- "exchangeable"
  841. dataNo4 <- allData[allData$neurType != "Type4", ]; dataNo4$neurType <- factor(dataNo4$neurType)
  842. mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataNo4, id = id, corstr = strucor)
  843. # mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = allData, id = id, corstr = strucor)
  844. mod2Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane - 1, data = dataNo4, id = id)
  845. mod1Gee <- geeglm(LR ~ LP + neurType + Plane - 1, data = dataNo4, id = id)
  846. # Summmaries and anovas
  847. coef(mod3Gee)
  848. anova(mod3Gee, mod2Gee, test = "F")
  849. summary(mod3Gee)
  850. anova(mod3Gee, test = "Chi")
  851. # Varriable selection
  852. gee_stepper(mod1Gee, formula(mod3Gee)) # also needs strucor
  853. ```
  854. ###Change ref level
  855. ```{r}
  856. # Change reference level
  857. dataNo4$neurType <- relevel(dataNo4$neurType, ref="Type2")
  858. dataNo4$Plane <- relevel(dataNo4$Plane, ref="YZ")
  859. mod3Gee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataNo4, id = id, corstr = strucor)
  860. summary(mod3Gee)
  861. ```
  862. ###Check correlation matrix
  863. We can also use the naive and robust variance estimates to select a correlation model. The model whose robust variance estimates most closely resembles its naive variance estimates is the better correlation model. To obtain a single summary statistic for this comparison I use the entire parameter covariance matrix and sum the absolute differences between the naive and robust covariance matrices.
  864. Link: https://sakai.unc.edu/access/content/group/2842013b-58f5-4453-aa8d-3e01bacbfc3d/public/Ecol562_Spring2012/docs/lectures/lecture23.htm#checking
  865. ```{r}
  866. # Select data
  867. datToMod <- allData
  868. # Check empirical correlation matrix
  869. mod3 <- lm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod)
  870. xyRes <- mod3$residuals[datToMod$Plane == "XY"]
  871. xzRes <- mod3$residuals[datToMod$Plane == "XZ"]
  872. yzRes <- mod3$residuals[datToMod$Plane == "YZ"]
  873. Hmisc::rcorr(cbind(xyRes, xzRes, yzRes))
  874. # Fit models with differente corr mat
  875. mod3Gee.In <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod,
  876. id = id, corstr = "independence")
  877. mod3Gee.Ex <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod,
  878. id = id, corstr = "exchangeable")
  879. mod3Gee.Ar <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod,
  880. id = id, corstr = "ar1")
  881. mod3Gee.Un <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane, data = datToMod,
  882. id = id, corstr = "unstructured")
  883. # Deviation from naive variance
  884. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
  885. # QIC criteria
  886. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
  887. # Check multiple comparisons
  888. K1 <- glht(mod3Gee.Ex, mcp(neurType = "Tukey"))$linfct
  889. K2 <- glht(mod3Gee.Ex, mcp(Plane = "Tukey"))$linfct
  890. summary(glht(mod3Gee.Ex, linfct = rbind(K1, K2)))
  891. mod3Gee.check <- geeglm(LR ~ LP + interact + LP:interact, data = datToMod,
  892. id = id, corstr = "exchangeable")
  893. summary(glht(mod3Gee.check, linfct = mcp(interact = "Tukey")))
  894. # Calculate means
  895. meanCalc <- aggregate(LR ~ neurType + Plane, data = allData, mean)
  896. summary(geeglm(LR ~ LP + neurType + Plane, data = datToMod,
  897. id = id, corstr = "exchangeable"))
  898. ```
  899. ###log transformation
  900. ```{r}
  901. # Select data
  902. datToMod <- dataNo4
  903. datToMod$logLP <- log(datToMod$LP)
  904. datToMod$logLR <- log(datToMod$LR)
  905. # Fit models with differente corr mat
  906. mod3Gee.In <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1,
  907. data = datToMod, id = id, corstr = "independence")
  908. mod3Gee.Ex <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1,
  909. data = datToMod, id = id, corstr = "exchangeable")
  910. mod3Gee.Ar <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1,
  911. data = datToMod, id = id, corstr = "ar1")
  912. mod3Gee.Un <- geeglm(logLR ~ logLP + neurType + Plane + logLP:neurType + logLP:Plane + logLP:neurType:Plane - 1,
  913. data = datToMod, id = id, corstr = "unstructured")
  914. # Deviation from naive variance
  915. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
  916. # QIC criteria
  917. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
  918. # Check the best option
  919. summary(mod3Gee.Ex)
  920. anova(mod3Gee.Ex)
  921. ```
  922. ###Standarize
  923. ```{r}
  924. # List order is: Type1XY, Type2XY, Type3XY, Type1XZ...
  925. LPstand <- by(dataNo4$LP, list(dataNo4$neurType, dataNo4$Plane), scale)
  926. LRstand <- by(dataNo4$LR, list(dataNo4$neurType, dataNo4$Plane), scale)
  927. # Number of observations
  928. numObs <- unlist(lapply(LPstand[1:3], function(x) length(x)))
  929. # Create data frame
  930. dataStand <- data.frame(id = rep(1:sum(numObs), 3),
  931. LR = unlist(LRstand),
  932. LP = unlist(LPstand),
  933. Plane = rep(c("XY", "XZ", "YZ"), each = sum(numObs)),
  934. neurType = rep(rep(c("Type1", "Type2", "Type3"), times = numObs), 3))
  935. dataStand <- dataStand[order(dataStand$id), ]
  936. dataStand$interact <- interaction(dataStand$neurType, dataStand$Plane, lex.order=T)
  937. # Plot
  938. ggplot(dataStand, aes(x = `LP`, y = `interact`, fill = neurType)) +
  939. geom_density_ridges(alpha = 0.8) +
  940. theme_ridges() +
  941. theme(legend.position = "none")
  942. # Fit Gee
  943. # dataStand$neurType <- relevel(dataNo4$neurType, ref="Type1")
  944. # dataStand$Plane <- relevel(dataNo4$Plane, ref="XY")
  945. standGee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataStand, id = id, corstr = "exchangeable")
  946. standMain <- geeglm(LR ~ LP + neurType + Plane - 1, data = dataStand, id = id, corstr = "exchangeable")
  947. summary(standGee)
  948. anova(standGee)
  949. anova(standGee, standMain)
  950. ```
  951. ####Bootstrap
  952. ```{r, fig.width=10, fig.height=5}
  953. # Bootstrap for inference
  954. # Ensure reproducibility
  955. # set.seed(12345)
  956. # bootInter <- bootCoefint(dataStand, fullMod = T, "exchangeable", 500)
  957. # saveRDS(bootInter, "bootCoefsInteraction.rds")
  958. # Store as data frame
  959. options(digits=10)
  960. bootFrame <- data.frame(Type1XY = bootInter$LP,
  961. Type1XZ = bootInter$LP + bootInter$LP.PlaneXZ,
  962. Type1YZ = bootInter$LP + bootInter$LP.PlaneYZ,
  963. Type2XY = bootInter$LP + bootInter$LP.neurTypeType2,
  964. Type2XZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneXZ,
  965. Type2YZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneYZ,
  966. Type3XY = bootInter$LP + bootInter$LP.neurTypeType3,
  967. Type3XZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneXZ,
  968. Type3YZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneYZ)
  969. bootMelt <- reshape2::melt(bootFrame)
  970. bootMelt$neurType <- rep(c("Type1", "Type2", "Type3"), each = 1500)
  971. bootMelt$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = 500), 3)
  972. #Represent LP
  973. ggNeur <- ggplot(data=bootMelt, aes(x=value, fill=Plane)) +
  974. geom_density(adjust=1.5, alpha = .4) +
  975. theme_bw() +
  976. facet_wrap(~neurType, ncol = 3) +
  977. theme(
  978. legend.position="bottom",
  979. panel.spacing = unit(1, "lines"),
  980. axis.ticks.x=element_blank()
  981. )
  982. ggPlanes <- ggplot(data=bootMelt, aes(x=value, fill=neurType)) +
  983. geom_density(adjust=1.5, alpha = .4) +
  984. theme_bw() +
  985. facet_wrap(~Plane, ncol = 3) +
  986. theme(
  987. legend.position="bottom",
  988. panel.spacing = unit(1, "lines"),
  989. axis.ticks.x=element_blank()
  990. )
  991. ggarrange(ggNeur, ggPlanes, ncol = 2, nrow = 1)
  992. ```
  993. #####CI95 plot
  994. ```{r}
  995. # Extract the quantiles we need
  996. quant <- apply(bootFrame, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7))
  997. quantFrame <- reshape2::melt(quant)
  998. quantFrame$y <- rep(1:9, each=3)
  999. # Plot CI95
  1000. plot(0, type="n", xlim=c(0.9,1.1), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
  1001. ytick <- seq(1, 9, by=1)
  1002. axis(side=2, at=ytick, labels = FALSE)
  1003. text(par("usr")[1], ytick,
  1004. labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)
  1005. for (i in seq(1, 27,3)) {
  1006. segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
  1007. points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
  1008. }
  1009. abline(v=c(1.268), lty = "dashed", col = "gray")
  1010. text(x=1.2765, y=10.1, "1.268",col="gray", font=2, cex=0.6)
  1011. ```
  1012. ###Divide max LR
  1013. ```{r}
  1014. # Find max LR
  1015. maxLR <- aggregate(LR ~ neurType + Plane, dataNo4, max)
  1016. dataMax <- dataNo4
  1017. # Divide by max LR per axon type and plane
  1018. for (i in c("Type1", "Type2", "Type3")) {
  1019. for (j in c("XY", "XZ", "YZ")) {
  1020. # Subset and divide LR
  1021. lr <- dataMax[dataMax$neurType == i & dataMax$Plane == j, 2]
  1022. dataMax[dataMax$neurType == i & dataMax$Plane == j, 2] <- lr/maxLR[maxLR$neurType == i & maxLR$Plane == j, 3]
  1023. # Subset and divide LP
  1024. lp <- dataMax[dataMax$neurType == i & dataMax$Plane == j, 3]
  1025. dataMax[dataMax$neurType == i & dataMax$Plane == j, 3] <- lp/maxLR[maxLR$neurType == i & maxLR$Plane == j, 3]
  1026. }
  1027. }
  1028. # Plot
  1029. ggplot(dataMax, aes(x = `LP`, y = `interact`, fill = neurType)) +
  1030. geom_density_ridges(alpha = 0.8) +
  1031. theme_ridges() +
  1032. theme(legend.position = "none")
  1033. # Fit GEE
  1034. maxGee <- geeglm(LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1, data = dataMax, id = id, corstr = "exchangeable")
  1035. summary(maxGee)
  1036. anova(maxGee)
  1037. ```
  1038. ###neurType*Plane included
  1039. ```{r}
  1040. # Select data
  1041. datToMod <- dataNo4
  1042. contrasts(datToMod$neurType) <- contr.treatment
  1043. contrasts(datToMod$Plane) <- contr.treatment
  1044. # Fit models with differente corr mat
  1045. mod3Gee.In <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "independence")
  1046. mod3Gee.Ex <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "exchangeable")
  1047. mod3Gee.Ar <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "ar1")
  1048. mod3Gee.Un <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, corstr = "unstructured")
  1049. # Deviation from naive variance
  1050. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
  1051. # QIC criteria
  1052. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
  1053. # Exchangeable seems the best option
  1054. mod3Gee.Ex.Int <- geeglm(LR ~ LP*interact, data = datToMod, id = id, corstr = "exchangeable")
  1055. # Multiple comparisons FWER
  1056. comparMult <- glht(mod3Gee.Ex.Int, linfct = mcp(interact = "Tukey"))
  1057. summary(comparMult)
  1058. # Multiple comparisons FDR
  1059. summary(comparMult, adjust = "bonferroni")
  1060. # Variable selection
  1061. mod1Gee <- geeglm(LR ~ 1, data = datToMod, id = id, corstr = "exchangeable")
  1062. gee_stepper(mod1Gee, formula(mod3Gee.Ex))
  1063. summary(mod3Gee.Ex)
  1064. anova(mod3Gee.Ex)
  1065. ```
  1066. ####log transformation
  1067. ```{r}
  1068. # Select data
  1069. datToMod <- dataNo4
  1070. datToMod$interact <- factor(datToMod$interact)
  1071. datToMod$logLP <- log(datToMod$LP)
  1072. datToMod$logLR <- log(datToMod$LR)
  1073. contrasts(datToMod$neurType) <- contr.treatment
  1074. contrasts(datToMod$Plane) <- contr.treatment
  1075. # Fit models with differente corr mat
  1076. mod3Gee.In <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "independence")
  1077. mod3Gee.Ex <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "exchangeable")
  1078. mod3Gee.Ar <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "ar1")
  1079. mod3Gee.Un <- geeglm(logLR ~ logLP*neurType*Plane, data = datToMod, id = id, corstr = "unstructured")
  1080. # Deviation from naive variance
  1081. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
  1082. # QIC criteria
  1083. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
  1084. # Check the best option
  1085. summary(mod3Gee.Ex)
  1086. ```
  1087. ####1/(y^2)
  1088. ```{r}
  1089. # Select data
  1090. datToMod <- allData
  1091. datToMod$interact <- factor(datToMod$interact)
  1092. datToMod$wei <- 1/((datToMod$LR)^2)
  1093. contrasts(datToMod$neurType) <- contr.treatment
  1094. contrasts(datToMod$Plane) <- contr.treatment
  1095. # Fit models with differente corr mat
  1096. mod3Gee.In <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "independence")
  1097. mod3Gee.Ex <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "exchangeable")
  1098. mod3Gee.Ar <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "ar1")
  1099. mod3Gee.Un <- geeglm(LR ~ LP*neurType*Plane, data = datToMod, id = id, weights = wei, corstr = "unstructured")
  1100. # Deviation from naive variance
  1101. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), var.crit)
  1102. # QIC criteria
  1103. sapply(list(mod3Gee.In, mod3Gee.Ex, mod3Gee.Ar, mod3Gee.Un), function(x) QIC(x))
  1104. # Check the best option
  1105. summary(mod3Gee.Un)
  1106. ```
  1107. ##Bootstrapped coefficients
  1108. ```{r}
  1109. # Store bootstrapped coefficients in an empty list
  1110. listBoot <- list()
  1111. count <- 1
  1112. reps <- 4000
  1113. # Iterate through all axon types and planes
  1114. # Ensure reproducibility
  1115. # set.seed(12345)
  1116. for (i in c("Type1", "Type2", "Type3", "Type4")) {
  1117. for (j in c("XY", "XZ", "YZ")) {
  1118. listBoot[[count]] <- bootCoef(allData, i, j, "exchangeable", reps)
  1119. count <- count + 1
  1120. }
  1121. }
  1122. # saveRDS(listBoot, "bootCoefs400reps.rds")
  1123. # Store as data frame
  1124. bootFrame <- do.call(rbind, listBoot)
  1125. bootFrame$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), each = reps*3)
  1126. bootFrame$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = reps), 4)
  1127. bootFrame$NeurPlane <- interaction(bootFrame$neurType, bootFrame$Plane, lex.order=T)
  1128. # Extract the quantiles we need
  1129. listQuant <- list()
  1130. count <- 1
  1131. quantToEst <- c(0.025,0.05,0.95,0.975)
  1132. # Iterate through all axon types and planes
  1133. for (i in c("Type1", "Type2", "Type3", "Type4")) {
  1134. for (j in c("XY", "XZ", "YZ")) {
  1135. listQuant[[count]] <- quantile(bootFrame[bootFrame$neurType == i & bootFrame$Plane == j, 2], c(0.025,0.05,0.95,0.975))
  1136. count <- count + 1
  1137. }
  1138. }
  1139. # Store quantiles in data frame
  1140. quantMat <- as.data.frame(do.call(rbind, listQuant))
  1141. quantMat$NeurPlane <- factor(paste(rep(c("Type1", "Type2", "Type3", "Type4"), each = 3), rep(c("XY", "XZ", "YZ"), each = 4), sep="."))
  1142. quantFrame <- reshape2::melt(quantMat)
  1143. ```
  1144. ```{r}
  1145. #Represent the CI
  1146. ggplot(data=bootFrame, aes(x=LP, fill=Plane)) +
  1147. geom_density(adjust=1.5, alpha = .4) +
  1148. theme_bw() +
  1149. facet_wrap(~neurType, ncol = 4) +
  1150. theme(
  1151. legend.position="bottom",
  1152. panel.spacing = unit(1, "lines"),
  1153. axis.ticks.x=element_blank()
  1154. )
  1155. ```
  1156. ###Simulate experimental noise
  1157. ####Check intra-cluster variance
  1158. ```{r}
  1159. intraVar <- aggregate(LP ~ id, data = allData, function(x) sd(x)/mean(x))
  1160. intraVar$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)
  1161. png(filename=paste("intraclusterCV.png", sep=""), type="cairo",
  1162. units="in", width=5, height=5, pointsize=12,
  1163. res=300)
  1164. intraVar %>%
  1165. ggplot( aes(x=neurType, y=LP, fill=neurType)) +
  1166. geom_boxplot() +
  1167. scale_fill_viridis(discrete = TRUE, alpha=0.6) +
  1168. theme_bw() +
  1169. theme(
  1170. legend.position="none",
  1171. plot.title = element_text(size=11),
  1172. axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
  1173. ) +
  1174. xlab("Axonal type") +
  1175. ylab("Intracluster Variance")
  1176. dev.off()
  1177. ```
  1178. ####Add normal noise
  1179. ```{r}
  1180. set.seed(12345)
  1181. # type1Noise <- rnorm()
  1182. ```
  1183. ##Figure scripts OLD
  1184. ####Intracluster variance
  1185. Same chunk as "Check intra-cluster variance" within "Bootstrapped coefficients"
  1186. ```{r}
  1187. intraVar <- aggregate(LP ~ id, data = allData, function(x) sd(x)/mean(x))
  1188. intraVar$neurType <- rep(c("Type1", "Type2", "Type3", "Type4"), numberTypes)
  1189. #Ggplot
  1190. png(filename=paste("intraclusterCV.png", sep=""), type="cairo",
  1191. units="in", width=5, height=5, pointsize=12,
  1192. res=300)
  1193. intraVar %>%
  1194. ggplot( aes(x=neurType, y=LP, fill=neurType)) +
  1195. geom_boxplot() +
  1196. scale_fill_viridis(discrete = TRUE, alpha=0.6) +
  1197. theme_bw() +
  1198. theme(
  1199. legend.position="none",
  1200. plot.title = element_text(size=11),
  1201. axis.text.x = element_text(angle = 45, hjust = 1, size = 9)
  1202. ) +
  1203. xlab("Axonal type") +
  1204. ylab("Intracluster Variance")
  1205. dev.off()
  1206. #Base R
  1207. png(filename=paste("intraclusterCV_baseR.png", sep=""), type="cairo",
  1208. units="in", width=5, height=5, pointsize=12,
  1209. res=300)
  1210. boxplot(LP ~ neurType, data = intraVar, col= c("violet", "lightblue", "lightgreen", "tan1"),
  1211. outpch = 19, outbg = "black", outcol = "black", cex = .7, ylim = c(0,0.4))
  1212. dev.off()
  1213. ```
  1214. ###Inference
  1215. #####Original model bootstrap
  1216. Same chunk as "Check correlation matrix" within "GEE"
  1217. ```{r, fig.width=10, fig.height=5}
  1218. # Bootstrap for inference
  1219. # Ensure reproducibility
  1220. # set.seed(12345)
  1221. # bootInter <- bootCoefint(dataNo4, "LR ~ LP + neurType + Plane + LP:neurType + LP:Plane + LP:neurType:Plane - 1", "exchangeable", 2000)
  1222. # saveRDS(bootInter, "bootCoefsInteraction.rds")
  1223. bootInter <- readRDS("bootCoefsInteraction.rds")
  1224. # Store as data frame
  1225. options(digits=10)
  1226. bootFrame <- data.frame(Type1XY = bootInter$LP,
  1227. Type1XZ = bootInter$LP + bootInter$LP.PlaneXZ,
  1228. Type1YZ = bootInter$LP + bootInter$LP.PlaneYZ,
  1229. Type2XY = bootInter$LP + bootInter$LP.neurTypeType2,
  1230. Type2XZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneXZ,
  1231. Type2YZ = bootInter$LP + bootInter$LP.neurTypeType2.PlaneYZ,
  1232. Type3XY = bootInter$LP + bootInter$LP.neurTypeType3,
  1233. Type3XZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneXZ,
  1234. Type3YZ = bootInter$LP + bootInter$LP.neurTypeType3.PlaneYZ)
  1235. bootMelt <- reshape2::melt(bootFrame)
  1236. bootMelt$neurType <- rep(c("Type1", "Type2", "Type3"), each = 6000)
  1237. bootMelt$Plane <- rep(rep(c("XY", "XZ", "YZ"), each = 2000), 3)
  1238. #Represent LP
  1239. ggNeur <- ggplot(data=bootMelt, aes(x=value, fill=Plane)) +
  1240. geom_density(adjust=1.5, alpha = .4) +
  1241. theme_bw() +
  1242. facet_wrap(~neurType, ncol = 3) +
  1243. theme(
  1244. legend.position="bottom",
  1245. panel.spacing = unit(1, "lines"),
  1246. axis.ticks.x=element_blank()
  1247. )
  1248. ggPlanes <- ggplot(data=bootMelt, aes(x=value, fill=neurType)) +
  1249. geom_density(adjust=1.5, alpha = .4) +
  1250. theme_bw() +
  1251. facet_wrap(~Plane, ncol = 3) +
  1252. theme(
  1253. legend.position="bottom",
  1254. panel.spacing = unit(1, "lines"),
  1255. axis.ticks.x=element_blank()
  1256. )
  1257. png(filename=paste("bootstrapLP_neurvsplane.png", sep=""), type="cairo",
  1258. units="in", width=10, height=3, pointsize=12,
  1259. res=300)
  1260. ggarrange(ggNeur, ggPlanes, ncol = 2, nrow = 1)
  1261. dev.off()
  1262. ```
  1263. #####CI95 plot
  1264. ```{r}
  1265. # Extract the quantiles we need
  1266. quant <- apply(bootFrame, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7))
  1267. quantFrame <- reshape2::melt(quant)
  1268. quantFrame$y <- rep(1:9, each=3)
  1269. # Plot CI95
  1270. png(filename=paste("ci95.png", sep=""), type="cairo",
  1271. units="in", width=5, height=5, pointsize=12,
  1272. res=300)
  1273. plot(0, type="n", xlim=c(1.20,1.35), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
  1274. ytick <- seq(1, 9, by=1)
  1275. axis(side=2, at=ytick, labels = FALSE)
  1276. text(par("usr")[1], ytick,
  1277. labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)
  1278. for (i in seq(1, 27,3)) {
  1279. segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
  1280. points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
  1281. }
  1282. abline(v=c(1.268), lty = "dashed", col = "gray")
  1283. text(x=1.2765, y=10.1, "1.268",col="gray", font=2, cex=0.6)
  1284. dev.off()
  1285. # Zoom to check whether 2 and 7 overlap or not
  1286. png(filename=paste("ci95zoom.png", sep=""), type="cairo",
  1287. units="in", width=5, height=5, pointsize=12,
  1288. res=300)
  1289. plot(0,type="n", xlim=c(1.26,1.27), ylim=c(0,10), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
  1290. ytick<-seq(1, 9, by=1)
  1291. axis(side=2, at=ytick, labels = FALSE)
  1292. text(par("usr")[1], ytick,
  1293. labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)
  1294. for (i in seq(1, 27,3)) {
  1295. segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
  1296. points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
  1297. }
  1298. abline(v=c(1.268), lty = "dashed", col = "gray")
  1299. dev.off()
  1300. ```
  1301. #####Global LP
  1302. ```{r}
  1303. # Grand LP by sum of bootstrapped coeffients
  1304. globalLP <- data.frame(LP = bootInter$LP + bootInter$LP.PlaneXZ + bootInter$LP.PlaneYZ + bootInter$LP.neurTypeType2 + bootInter$LP.neurTypeType2.PlaneXZ + bootInter$LP.neurTypeType2.PlaneYZ + bootInter$LP.neurTypeType3 + bootInter$LP.neurTypeType3.PlaneXZ + bootInter$LP.neurTypeType3.PlaneYZ)
  1305. globalQuant <- quantile(globalLP$LP, c(0.025, 0.5, 0.975), type=7)
  1306. sumLP <- ggplot(data=globalLP, aes(x=LP)) +
  1307. geom_density(adjust=1.5, alpha = .4) +
  1308. geom_vline(xintercept = globalQuant, linetype="dotted",
  1309. color = "blue") +
  1310. theme_bw() +
  1311. ggtitle("Sum LP") +
  1312. theme(
  1313. legend.position="bottom",
  1314. panel.spacing = unit(1, "lines"),
  1315. axis.ticks.x=element_blank()
  1316. )
  1317. # Grand LP by bootstrapping
  1318. # Ensure reproducibility
  1319. # set.seed(12345)
  1320. # bootLP <- bootCoefint(dataNo4, "LR ~ LP - 1", "exchangeable", 2000)
  1321. bootLPQuant <- quantile(bootLP$LP, c(0.025, 0.5, 0.975), type=7)
  1322. bootLP <- ggplot(data=bootLP, aes(x=LP)) +
  1323. geom_density(adjust=1.5, alpha = .4) +
  1324. geom_vline(xintercept = bootLPQuant, linetype="dotted",
  1325. color = "blue") +
  1326. theme_bw() +
  1327. ggtitle("Boot LP") +
  1328. theme(
  1329. legend.position="bottom",
  1330. panel.spacing = unit(1, "lines"),
  1331. axis.ticks.x=element_blank()
  1332. )
  1333. png(filename=paste("LPestimations.png", sep=""), type="cairo",
  1334. units="in", width=10, height=5, pointsize=12,
  1335. res=300)
  1336. ggarrange(sumLP, bootLP, ncol = 2, nrow = 1)
  1337. dev.off()
  1338. ```
  1339. ##Figure scripts NEW
  1340. ###Estimate alpha per neuron type
  1341. ```{r}
  1342. # Alpha per neurType
  1343. options(digits = 10)
  1344. #######
  1345. # GEE #
  1346. #######
  1347. # Check correlation matrix
  1348. geeNeur.in <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "independence")
  1349. geeNeur.ex <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "exchangeable")
  1350. geeNeur.ar <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "ar1")
  1351. geeNeur.un <- geeglm(LR ~ LP:neurType - 1, data = allData, id = id, corstr = "unstructured")
  1352. # Deviation from naive variance
  1353. sapply(list(geeNeur.in, geeNeur.ex, geeNeur.ar, geeNeur.un), var.crit)
  1354. # Bootstrap
  1355. # Ensure reproducibility
  1356. # set.seed(12345)
  1357. # bootNeur <- bootCoefint(allData, "LR ~ LP:neurType - 1", "exchangeable", 2000)
  1358. # saveRDS(bootNeur, "bootCoefs_perNeuron.rds")
  1359. # bootNeur <- readRDS("bootCoefs_perNeuron.rds")
  1360. # colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")
  1361. #######
  1362. # OLS #
  1363. #######
  1364. # Bootstrap
  1365. # Ensure reproducibility
  1366. # set.seed(12345)
  1367. # bootNeur <- bootCoefintLM(allData, "LR ~ LP:neurType - 1", 2000)
  1368. # saveRDS(bootNeur, "bootCoefs_perNeuron_OLS.rds")
  1369. bootNeur <- readRDS("ProyeccionAnalysis/bootCoefs_perNeuron_OLS.rds")
  1370. colnames(bootNeur) <- c("Type1", "Type2", "Type3", "Type4")
  1371. # Get R2 value
  1372. summary(lm(LR ~ LP:neurType - 1, allData))
  1373. ```
  1374. ####CI95 plot
  1375. ```{r}
  1376. # Extract the quantiles we need
  1377. quant <- apply(bootNeur, 2, function(x) quantile(x, c(0.025, 0.5, 0.975), type=7))
  1378. quantFrame <- reshape2::melt(quant)
  1379. quantFrame$y <- rep(1:4, each=3)
  1380. # Plot CI95
  1381. png(filename=paste("ci95_perNeuron_OLS.png", sep=""), type="cairo",
  1382. units="in", width=5, height=5, pointsize=12,
  1383. res=300)
  1384. plot(0, type="n", xlim=c(1.265, 1.293), ylim=c(0.5,4.5), xlab="Alpha (CI95%)", yaxt="n", ylab = "", cex.axis = 0.7)
  1385. ytick <- seq(1, 4, by=1)
  1386. axis(side=2, at=ytick, labels = FALSE)
  1387. text(par("usr")[1], ytick,
  1388. labels = unique(quantFrame$Var2), pos = 2, xpd = TRUE, cex = 0.7)
  1389. for (i in seq(1, 12,3)) {
  1390. segments(quantFrame[i,3], quantFrame[i,4], quantFrame[i+2,3], quantFrame[i+2,4])
  1391. points(quantFrame[i+1,3], quantFrame[i+1,4], pch = 21, col = NA, bg="red")
  1392. }
  1393. rect(quantFrame[quantFrame$Var1 == "2.5%" & quantFrame$Var2 == "Type4", 3], -1,
  1394. quantFrame[quantFrame$Var1 == "97.5%" & quantFrame$Var2 == "Type3", 3], 6,
  1395. col = rgb(0.5,0.5,0.5,1/4), lty = "dashed", border = "darkgray")
  1396. dev.off()
  1397. ```
  1398. ####Line with shades
  1399. ```{r, fig.width=8, fig.height=8}
  1400. png(filename=paste("lines_perNeuron_OLS.png", sep=""), type="cairo",
  1401. units="in", width=5, height=5, pointsize=12,
  1402. res=300)
  1403. op <- par(mfrow = c(2,2),
  1404. oma = c(0,0,0,0) + 0.1,
  1405. mar = c(0,0,1,1) + 0.1)
  1406. for (i in (1:4)) {
  1407. # Get axis range
  1408. xRange <- allData[allData$neurType == paste("Type", i, sep=""), 3]
  1409. yRange <- allData[allData$neurType == paste("Type", i, sep=""), 2]
  1410. # Get max, mean and min alphas
  1411. alphaVec <- c(max(bootNeur[,i]), median(bootNeur[,i]), min(bootNeur[,1]))
  1412. # Plot observations, mean line and shade with span of all booted lines
  1413. plot(LR ~ LP, data = allData[allData$neurType == paste("Type", i, sep=""), ], pch = 21, cex = 0.5, bg = "gray", col = NULL,
  1414. xlab = "Projected 2D Length", ylab = "Real 3D Length", xaxt="n", yaxt = "n")
  1415. lines(xRange, alphaVec[2]*xRange, col = "red", lwd = 0.5)
  1416. polygon(x = c(min(xRange), min(xRange), max(xRange), max(xRange)),
  1417. y = c(min(alphaVec[3]*xRange), min(alphaVec[1]*xRange), max(alphaVec[3]*xRange), max(alphaVec[1]*xRange)),
  1418. col = rgb(1,0,0, alpha = 0.3), border = NA)
  1419. # plotrix::corner.label(x = -1, y = 1, paste("Type", i, sep=""))
  1420. }
  1421. par(op)
  1422. dev.off()
  1423. ```