pipeline.json 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. {
  2. "id": "wiedemann_atac_paired_end",
  3. "name": "Wiedemann ATAC paired end pipeline",
  4. "description": "<p>This pipeline processes paired end ATAC data.</p><p><b>Input specifications</b><br />The input is expected as gzipped FASTQ files (<var>.fq.gz</var> / <var>.fastq.gz</var>) and might be organised in sub-folders. Paired reads must always reside in the same folder and end with <var>_1</var> or <var>_2</var> respectively.</p><p><b>System requirements</b><br />64 GB of RAM are recommended for pipeline execution.</p>",
  5. "steps": [
  6. {
  7. "id": "qc_initial",
  8. "name": "Initial QC",
  9. "description": "<p>Performs an initial quality control of sequenced reads.</p><p><b>References</b><ol><li>Andrews S. (2010). FastQC: a quality control tool for high throughput sequence data. Available online at: <a href=\"http://www.bioinformatics.babraham.ac.uk/projects/fastqc\">Babraham Bioinformatics</a></li></ol></p>",
  10. "container": "fastqc_initial",
  11. "dependencies": [],
  12. "variables": [
  13. {
  14. "id": "ADAPTERS",
  15. "name": "Adapter sequences",
  16. "description": "A custom list of sequencing adapters. The global data repository must contain a file called <var>qc_adapters.txt</var> at its root.",
  17. "category": {
  18. "tag": "Global"
  19. }
  20. },
  21. {
  22. "id": "KMERS",
  23. "name": "Kmer length",
  24. "description": "An integer that specifies the Kmer length to be used for Kmer content detection. Must be between 2 and 10.",
  25. "category": {
  26. "tag": "Number"
  27. }
  28. },
  29. {
  30. "id": "SVG",
  31. "name": "SVG images",
  32. "description": "Saves all images as vector files instead of bitmaps.",
  33. "category": {
  34. "tag": "Boolean"
  35. }
  36. }
  37. ]
  38. },
  39. {
  40. "id": "trimming",
  41. "name": "Trimming",
  42. "description": "<p>Performs trimming of adapters and low quality bases.</p><p><b>References</b><ol><li>Bolger, A. M., Lohse, M., & Usadel, B. (2014). Trimmomatic: A flexible trimmer for Illumina Sequence Data. Bioinformatics, btu170.</li></ol></p>",
  43. "container": "trimmomatic",
  44. "dependencies": [],
  45. "variables": [
  46. {
  47. "id": "ADAPTERS_CUSTOM",
  48. "name": "Custom adapter sequences",
  49. "description": "A custom list of sequencing adapters. The global data repository must contain a file called <var>trimming_adapters.fa</var> at its root. This will overwrite any of the predefined adapter sequences.",
  50. "category": {
  51. "tag": "Global"
  52. }
  53. },
  54. {
  55. "id": "ADAPTERS_FIXED",
  56. "name": "Predefined adapter sequences",
  57. "description": "Specify which of the predefined adapter sequences to use for trimming.",
  58. "required": false,
  59. "category": {
  60. "tag": "Option",
  61. "content": [
  62. {
  63. "name": "Nextera-PE",
  64. "value": "NexteraPE-PE.fa"
  65. },
  66. {
  67. "name": "TrueSeq2-PE",
  68. "value": "TrueSeq2-PE.fa"
  69. },
  70. {
  71. "name": "TrueSeq3-PE",
  72. "value": "TrueSeq3-PE.fa"
  73. },
  74. {
  75. "name": "TrueSeq3-PE-2",
  76. "value": "TrueSeq3-PE-2.fa"
  77. }
  78. ]
  79. }
  80. },
  81. {
  82. "id": "PHRED",
  83. "name": "PHRED score",
  84. "description": "Specify which PHRED score encoding to use.",
  85. "required": false,
  86. "category": {
  87. "tag": "Option",
  88. "content": [
  89. {
  90. "name": "PHRED33",
  91. "value": "PHRED33"
  92. },
  93. {
  94. "name": "PHRED64",
  95. "value": "PHRED64"
  96. }
  97. ]
  98. }
  99. }
  100. ]
  101. },
  102. {
  103. "id": "qc_trimming",
  104. "name": "Trimming QC",
  105. "description": "<p>Performs a quality control of the trimmed reads.</p><p><b>References</b><ol><li>Andrews S. (2010). FastQC: a quality control tool for high throughput sequence data. Available online at: <a href=\"http://www.bioinformatics.babraham.ac.uk/projects/fastqc\">Babraham Bioinformatics</a></li></ol></p>",
  106. "container": "fastqc_trimming",
  107. "dependencies": ["trimming"],
  108. "variables": [
  109. {
  110. "id": "ADAPTERS",
  111. "name": "Adapter sequences",
  112. "description": "A custom list of sequencing adapters. The global data repository must contain a file called <var>qc_adapters.txt</var> at its root.",
  113. "category": {
  114. "tag": "Global"
  115. }
  116. },
  117. {
  118. "id": "KMERS",
  119. "name": "Kmer length",
  120. "description": "An integer that specifies the Kmer length to be used for Kmer content detection. Must be between 2 and 10.",
  121. "category": {
  122. "tag": "Number"
  123. }
  124. },
  125. {
  126. "id": "SVG",
  127. "name": "SVG images",
  128. "description": "Saves all images as vector files instead of bitmaps.",
  129. "category": {
  130. "tag": "Boolean"
  131. }
  132. }
  133. ]
  134. },
  135. {
  136. "id": "alignment",
  137. "name": "Alignment",
  138. "description": "<p>Aligns trimmed reads against a reference genome and sorts the output.</p><p><b>References</b><ol><li>Langmead B, Salzberg S. Fast gapped-read alignment with Bowtie 2. Nature Methods. 2012, 9:357-359.</li><li>HTSlib: C library for reading/writing high-throughput sequencing data. James K Bonfield, John Marshall, Petr Danecek, Heng Li, Valeriu Ohan, Andrew Whitwham, Thomas Keane, Robert M Davies. GigaScience, Volume 10, Issue 2, February 2021, giab007, <a href=\"https://doi.org/10.1093/gigascience/giab007\">https://doi.org/10.1093/gigascience/giab007</a></li><li>Twelve years of SAMtools and BCFtools. Petr Danecek, James K Bonfield, Jennifer Liddle, John Marshall, Valeriu Ohan, Martin O Pollard, Andrew Whitwham, Thomas Keane, Shane A McCarthy, Robert M Davies, Heng Li. GigaScience, Volume 10, Issue 2, February 2021, giab008, <a href=\"https://doi.org/10.1093/gigascience/giab008\">https://doi.org/10.1093/gigascience/giab008</a></li></ol></p>",
  139. "container": "bowtie",
  140. "dependencies": ["trimming"],
  141. "variables": [
  142. {
  143. "id": "GENOME",
  144. "name": "Reference genome",
  145. "description": "The reference genome to align the reads against. The global data repository must contain a file called <var>genome.fa</var> and the according indices at its root. Indices can be generated by the reference genome preprocessing pipeline.",
  146. "category": {
  147. "tag": "Global"
  148. },
  149. "required": true
  150. }
  151. ]
  152. },
  153. {
  154. "id": "alignment_filtering",
  155. "name": "Post alignment filtering",
  156. "description": "<p>Filters low quality and misaligned reads.</p><p><b>References</b><ol><li>HTSlib: C library for reading/writing high-throughput sequencing data. James K Bonfield, John Marshall, Petr Danecek, Heng Li, Valeriu Ohan, Andrew Whitwham, Thomas Keane, Robert M Davies. GigaScience, Volume 10, Issue 2, February 2021, giab007, <a href=\"https://doi.org/10.1093/gigascience/giab007\">https://doi.org/10.1093/gigascience/giab007</a></li><li>Twelve years of SAMtools and BCFtools. Petr Danecek, James K Bonfield, Jennifer Liddle, John Marshall, Valeriu Ohan, Martin O Pollard, Andrew Whitwham, Thomas Keane, Shane A McCarthy, Robert M Davies, Heng Li. GigaScience, Volume 10, Issue 2, February 2021, giab008, <a href=\"https://doi.org/10.1093/gigascience/giab008\">https://doi.org/10.1093/gigascience/giab008</a></li></ol></p>",
  157. "container": "alignment_filtering",
  158. "dependencies": ["alignment"],
  159. "variables": [
  160. {
  161. "id": "REMOVE_M",
  162. "name": "Remove mitochondrial reads",
  163. "description": "Remove all reads mapping to the mitochondiral genome.",
  164. "category": {
  165. "tag": "Boolean"
  166. }
  167. },
  168. {
  169. "id": "REMOVE_INVALID",
  170. "name": "Removes misaligned reads",
  171. "description": "Remove all read pairs that are not properly aligned.",
  172. "category": {
  173. "tag": "Boolean"
  174. }
  175. },
  176. {
  177. "id": "QUALITY_FILTER",
  178. "name": "Quality filtering",
  179. "description": "Filters all alignments with a PHRED quality score lower than the defined value.",
  180. "category": {
  181. "tag": "Number"
  182. }
  183. }
  184. ]
  185. },
  186. {
  187. "id": "splitting",
  188. "name": "Splitting",
  189. "description": "<p>Splits the alignment into nucleosome specific chuncks.</p><p><b>References</b><ol><li>Buenrostro JD, Giresi PG, Zaba LC, Chang HY, Greenleaf WJ. Transposition of native chromatin for fast and sensitive epigenomic profiling of open chromatin, DNA-binding proteins and nucleosome position. Nat Methods. 2013 Dec;10(12):1213-8. doi: 10.1038/nmeth.2688. Epub 2013 Oct 6. PMID: 24097267; PMCID: PMC3959825.</li><li>HTSlib: C library for reading/writing high-throughput sequencing data. James K Bonfield, John Marshall, Petr Danecek, Heng Li, Valeriu Ohan, Andrew Whitwham, Thomas Keane, Robert M Davies. GigaScience, Volume 10, Issue 2, February 2021, giab007, <a href=\"https://doi.org/10.1093/gigascience/giab007\">https://doi.org/10.1093/gigascience/giab007</a></li><li>Twelve years of SAMtools and BCFtools. Petr Danecek, James K Bonfield, Jennifer Liddle, John Marshall, Valeriu Ohan, Martin O Pollard, Andrew Whitwham, Thomas Keane, Shane A McCarthy, Robert M Davies, Heng Li. GigaScience, Volume 10, Issue 2, February 2021, giab008, <a href=\"https://doi.org/10.1093/gigascience/giab008\">https://doi.org/10.1093/gigascience/giab008</a></li></ol></p>",
  190. "container": "splitting",
  191. "dependencies": ["alignment_filtering"],
  192. "variables": []
  193. }
  194. ]
  195. }