2 years ago · 015f015d54
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,6 @@
 
				+.Rproj.user
			
 
				+.Rhistory
			
 
				+.RData
			
 
				+.Ruserdata
			
 
				+*html
			
 
				+*Rproj
			
--- a/README.md
+++ b/README.md
@@ -1,3 +1,174 @@
 
				-# geo-htseq-paper
			
 
				+Working with model objects
			
 
				+================
			
 
				 
			
 
				-A field-wide assessment of differential high throughput sequencing reveals widespread bias
			
 
				+## Install
			
 
				+
			
 
				+-   Download and install R <https://www.r-project.org> (and RStudio
			
 
				+    <https://www.rstudio.com/products/rstudio/>).
			
 
				+
			
 
				+-   Go to R console or open scripts/README.Rmd in RStudio.
			
 
				+
			
 
				+-   Install these packages (this needs to be done once).
			
 
				+
			
 
				+``` r
			
 
				+if(!require(brms)){
			
 
				+  install.packages(c("brms", "here"))
			
 
				+  library(brms)
			
 
				+  library(here)
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Run
			
 
				+
			
 
				+In R console OR in .Rmd file:
			
 
				+
			
 
				+-   Load packages to R environment.
			
 
				+
			
 
				+``` r
			
 
				+library(brms)
			
 
				+library(here)
			
 
				+```
			
 
				+
			
 
				+-   Load the model object and print model summary.
			
 
				+
			
 
				+``` r
			
 
				+m <- readRDS(here("models/anticons_detool.rds"))
			
 
				+print(m)
			
 
				+```
			
 
				+
			
 
				+    ##  Family: bernoulli 
			
 
				+    ##   Links: mu = logit 
			
 
				+    ## Formula: anticons ~ de_tool 
			
 
				+    ##    Data: data (Number of observations: 2109) 
			
 
				+    ## Samples: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
			
 
				+    ##          total post-warmup samples = 4000
			
 
				+    ## 
			
 
				+    ## Population-Level Effects: 
			
 
				+    ##                Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
			
 
				+    ## Intercept         -3.51      0.23    -4.00    -3.09 1.00      942     1128
			
 
				+    ## de_tooldeseq       2.66      0.25     2.19     3.18 1.00     1080     1153
			
 
				+    ## de_tooledger       3.57      0.26     3.07     4.10 1.00     1051     1435
			
 
				+    ## de_toollimma       3.58      0.42     2.77     4.42 1.00     1693     2093
			
 
				+    ## de_toolunknown     2.46      0.25     1.99     2.98 1.00     1052     1153
			
 
				+    ## 
			
 
				+    ## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
			
 
				+    ## and Tail_ESS are effective sample size measures, and Rhat is the potential
			
 
				+    ## scale reduction factor on split chains (at convergence, Rhat = 1).
			
 
				+
			
 
				+-   Get fitted coefficients with 95% credible intervals.
			
 
				+
			
 
				+``` r
			
 
				+posterior_summary(m)
			
 
				+```
			
 
				+
			
 
				+    ##                     Estimate Est.Error        Q2.5       Q97.5
			
 
				+    ## b_Intercept        -3.514582 0.2300317   -4.001753   -3.088126
			
 
				+    ## b_de_tooldeseq      2.656732 0.2492621    2.188271    3.183202
			
 
				+    ## b_de_tooledger      3.571890 0.2642955    3.074863    4.103799
			
 
				+    ## b_de_toollimma      3.579595 0.4163380    2.773039    4.424586
			
 
				+    ## b_de_toolunknown    2.461885 0.2476724    1.990004    2.975438
			
 
				+    ## lp__             -969.443536 1.6153116 -973.461001 -967.323674
			
 
				+
			
 
				+-   Get the full posterior.
			
 
				+
			
 
				+``` r
			
 
				+post <- posterior_samples(m)
			
 
				+head(post)
			
 
				+```
			
 
				+
			
 
				+    ##   b_Intercept b_de_tooldeseq b_de_tooledger b_de_toollimma b_de_toolunknown      lp__
			
 
				+    ## 1   -3.300792       2.355140       3.418613       3.433826         2.191998 -968.0241
			
 
				+    ## 2   -3.485360       2.606697       3.559653       3.530865         2.329601 -967.5360
			
 
				+    ## 3   -3.628822       2.705505       3.763745       3.932852         2.560540 -967.7857
			
 
				+    ## 4   -3.621123       2.917484       3.643992       3.647139         2.545874 -968.5439
			
 
				+    ## 5   -3.795040       2.929855       3.797475       3.520305         2.633857 -968.9733
			
 
				+    ## 6   -3.248306       2.532679       3.443725       3.528865         2.130235 -969.7214
			
 
				+
			
 
				+-   What is the estimated difference in the proportion of
			
 
				+    anti-conservative p value histograms between DESeq2 and EdgeR?
			
 
				+
			
 
				+``` r
			
 
				+posterior_deseq_edger <- inv_logit_scaled(post$b_de_tooldeseq - post$b_de_tooledger)
			
 
				+hist(posterior_deseq_edger, breaks = 40)
			
 
				+```
			
 
				+
			
 
				+![](plots/posterior.png)
			
 
				+
			
 
				+-   The posterior summary for the effect size.
			
 
				+
			
 
				+``` r
			
 
				+posterior_summary(posterior_deseq_edger) 
			
 
				+```
			
 
				+
			
 
				+    ##       Estimate  Est.Error      Q2.5     Q97.5
			
 
				+    ## [1,] 0.2870903 0.03315653 0.2267546 0.3541821
			
 
				+
			
 
				+The estimated effect size is somewhere between 23% and 35%.
			
 
				+
			
 
				+-   Get data from model object.
			
 
				+
			
 
				+``` r
			
 
				+data <- m$data
			
 
				+head(data)
			
 
				+```
			
 
				+
			
 
				+    ##   anticons  de_tool
			
 
				+    ## 1        1  unknown
			
 
				+    ## 2        0  unknown
			
 
				+    ## 3        1    edger
			
 
				+    ## 4        0 cuffdiff
			
 
				+    ## 5        0    limma
			
 
				+    ## 6        0  unknown
			
 
				+
			
 
				+-   Extract stan code from model object. This is the fullest model
			
 
				+    description.
			
 
				+
			
 
				+``` r
			
 
				+stancode(m)
			
 
				+```
			
 
				+
			
 
				+    ## // generated with brms 2.15.0
			
 
				+    ## functions {
			
 
				+    ## }
			
 
				+    ## data {
			
 
				+    ##   int<lower=1> N;  // total number of observations
			
 
				+    ##   int Y[N];  // response variable
			
 
				+    ##   int<lower=1> K;  // number of population-level effects
			
 
				+    ##   matrix[N, K] X;  // population-level design matrix
			
 
				+    ##   int prior_only;  // should the likelihood be ignored?
			
 
				+    ## }
			
 
				+    ## transformed data {
			
 
				+    ##   int Kc = K - 1;
			
 
				+    ##   matrix[N, Kc] Xc;  // centered version of X without an intercept
			
 
				+    ##   vector[Kc] means_X;  // column means of X before centering
			
 
				+    ##   for (i in 2:K) {
			
 
				+    ##     means_X[i - 1] = mean(X[, i]);
			
 
				+    ##     Xc[, i - 1] = X[, i] - means_X[i - 1];
			
 
				+    ##   }
			
 
				+    ## }
			
 
				+    ## parameters {
			
 
				+    ##   vector[Kc] b;  // population-level effects
			
 
				+    ##   real Intercept;  // temporary intercept for centered predictors
			
 
				+    ## }
			
 
				+    ## transformed parameters {
			
 
				+    ## }
			
 
				+    ## model {
			
 
				+    ##   // likelihood including constants
			
 
				+    ##   if (!prior_only) {
			
 
				+    ##     target += bernoulli_logit_glm_lpmf(Y | Xc, Intercept, b);
			
 
				+    ##   }
			
 
				+    ##   // priors including constants
			
 
				+    ##   target += student_t_lpdf(Intercept | 3, 0, 2.5);
			
 
				+    ## }
			
 
				+    ## generated quantities {
			
 
				+    ##   // actual population-level intercept
			
 
				+    ##   real b_Intercept = Intercept - dot_product(means_X, b);
			
 
				+    ## }
			
 
				+
			
 
				+## This document
			
 
				+
			
 
				+This README.md was generated by running:
			
 
				+
			
 
				+``` r
			
 
				+rmarkdown::render("scripts/README.Rmd", output_file = here::here("README.md"))
			
 
				+```
			
--- a/plots/posterior.png
+++ b/plots/posterior.png
--- a/scripts/README.Rmd
+++ b/scripts/README.Rmd
@@ -0,0 +1,104 @@
 
				+---
			
 
				+title: "Working with model objects"
			
 
				+output: github_document
			
 
				+---
			
 
				+
			
 
				+## Install
			
 
				+
			
 
				+- Download and install R [https://www.r-project.org](https://www.r-project.org) (and RStudio [https://www.rstudio.com/products/rstudio/](https://www.rstudio.com/products/rstudio/)).
			
 
				+
			
 
				+- Go to R console or open scripts/README.Rmd in RStudio.
			
 
				+
			
 
				+- Install these packages (this needs to be done once).
			
 
				+
			
 
				+```{r}
			
 
				+if(!require(brms)){
			
 
				+  install.packages(c("brms", "here"))
			
 
				+  library(brms)
			
 
				+  library(here)
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+## Run
			
 
				+
			
 
				+In R console OR in .Rmd file:
			
 
				+
			
 
				+- Load packages to R environment.
			
 
				+
			
 
				+```{r, message=FALSE}
			
 
				+library(brms)
			
 
				+library(here)
			
 
				+```
			
 
				+
			
 
				+- Load the model object and print model summary.
			
 
				+
			
 
				+```{r}
			
 
				+m <- readRDS(here("models/anticons_detool.rds"))
			
 
				+print(m)
			
 
				+```
			
 
				+
			
 
				+- Get fitted coefficients with 95% credible intervals.
			
 
				+
			
 
				+```{r}
			
 
				+posterior_summary(m)
			
 
				+```
			
 
				+
			
 
				+- Get the full posterior.
			
 
				+
			
 
				+```{r}
			
 
				+post <- posterior_samples(m)
			
 
				+head(post)
			
 
				+```
			
 
				+
			
 
				+- What is the estimated difference in the proportion of anti-conservative p value histograms between DESeq2 and EdgeR?
			
 
				+
			
 
				+```{r, eval=FALSE}
			
 
				+posterior_deseq_edger <- inv_logit_scaled(post$b_de_tooldeseq - post$b_de_tooledger)
			
 
				+hist(posterior_deseq_edger, breaks = 40)
			
 
				+```
			
 
				+
			
 
				+
			
 
				+```{r, echo=FALSE, message=FALSE}
			
 
				+posterior_deseq_edger <- inv_logit_scaled(post$b_de_tooldeseq - post$b_de_tooledger)
			
 
				+png(here("plots/posterior.png"))
			
 
				+hist(posterior_deseq_edger, breaks = 40)
			
 
				+invisible(dev.off())
			
 
				+```
			
 
				+
			
 
				+![](plots/posterior.png)
			
 
				+
			
 
				+- The posterior summary for the effect size.
			
 
				+
			
 
				+```{r}
			
 
				+posterior_summary(posterior_deseq_edger) 
			
 
				+```
			
 
				+
			
 
				+```{r, echo=FALSE}
			
 
				+ps <- posterior_summary(posterior_deseq_edger)
			
 
				+```
			
 
				+
			
 
				+The estimated effect size is somewhere between `r paste(scales::percent(ps[1,3:4]), collapse = " and ")`.
			
 
				+
			
 
				+- Get data from model object.
			
 
				+
			
 
				+```{r}
			
 
				+data <- m$data
			
 
				+head(data)
			
 
				+```
			
 
				+
			
 
				+- Extract stan code from model object. This is the fullest model description.
			
 
				+
			
 
				+```{r}
			
 
				+stancode(m)
			
 
				+```
			
 
				+
			
 
				+
			
 
				+## This document
			
 
				+
			
 
				+This README.md was generated by running:
			
 
				+
			
 
				+```{r, eval=FALSE}
			
 
				+rmarkdown::render("scripts/README.Rmd", output_file = here::here("README.md"))
			
 
				+```
			
 
				+
			
 
				+