3 Commits 4e63d992cd ... 345cdc06b5

Author SHA1 Message Date
  at-robins 345cdc06b5 [7] Updated mount injection 8 months ago
  at-robins 0037ab9372 [7] Added experiment run endpoint 8 months ago
  at-robins 979ee95ff7 [7] Updated ATAC pipeline 8 months ago

+ 28 - 4
backend/src/application/config.rs

@@ -198,9 +198,7 @@ impl Configuration {
     ) -> PathBuf {
         let mut path: PathBuf = self.experiment_steps_path(experiment_id);
         // Hashing the ID prevents invalid characters in file paths.
-        let mut hasher = XxHash64::with_seed(154);
-        step_id.as_ref().hash(&mut hasher);
-        path.push(hasher.finish().to_string());
+        path.push(Self::hash_string(step_id));
         path
     }
 
@@ -229,6 +227,18 @@ impl Configuration {
     pub fn server_address_and_port(&self) -> String {
         format!("{}:{}", self.server_address(), self.server_port())
     }
+
+    /// Hashes the specified string and returns the resulting number as a string.
+    /// The hash is constant for the same string on repeated uses.
+    ///
+    /// # Parameters
+    ///
+    /// * `value` - the string to hash
+    pub fn hash_string<T: AsRef<str>>(value: T) -> String {
+        let mut hasher = XxHash64::with_seed(154);
+        value.as_ref().hash(&mut hasher);
+        hasher.finish().to_string()
+    }
 }
 
 #[cfg(test)]
@@ -288,7 +298,21 @@ mod tests {
     fn test_experiment_step_path() {
         let config = Configuration::new("", "", "", "", "./application/context", "");
         // Hash of step_id.
-        let path: PathBuf = "./application/context/experiments/experiment_id/steps/4363919453614495606".into();
+        let path: PathBuf =
+            "./application/context/experiments/experiment_id/steps/4363919453614495606".into();
         assert_eq!(config.experiment_step_path("experiment_id", "step_id"), path);
     }
+
+    #[test]
+    fn test_hash_string() {
+        let random_string = "39012rtuj132-0t1jp41-9/n\n\t@#$%^&*()|}{\"?>¡ªº£€˚„";
+        let hash = Configuration::hash_string(random_string);
+        let allowed_characters = vec!['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'];
+        assert!(hash.len() > 0);
+        // u64 max
+        assert!(hash.len() <= 20);
+        for character in hash.chars() {
+            assert!(allowed_characters.contains(&character));
+        }
+    }
 }

+ 51 - 2
backend/src/controller/experiment_controller.rs

@@ -1,3 +1,5 @@
+use std::collections::HashMap;
+
 use crate::{
     application::{
         config::Configuration,
@@ -221,15 +223,62 @@ pub async fn get_experiment_pipelines(
 ) -> Result<web::Json<Vec<ExperimentPipelineBlueprint>>, SeqError> {
     let experiment_id: i32 = id.into_inner();
     let mut connection = database_manager.database_connection()?;
+    let all_experiment_stati =
+        ExperimentExecution::get_by_experiment(experiment_id, &mut connection)?;
     let mut experiment_pipelines = Vec::new();
     for pipeline in pipelines.pipelines() {
         let values = crate::model::db::pipeline_step_variable::PipelineStepVariable::get_values_by_experiment_and_pipeline(experiment_id, pipeline.pipeline().id(), &mut connection)?;
-        experiment_pipelines
-            .push(ExperimentPipelineBlueprint::from_internal(pipeline.pipeline(), values));
+        let stati: HashMap<String, String> = all_experiment_stati
+            .iter()
+            .filter(|execution| &execution.pipeline_id == pipeline.pipeline().id())
+            .map(|execution| {
+                (execution.pipeline_step_id.clone(), execution.execution_status.clone())
+            })
+            .collect();
+        experiment_pipelines.push(ExperimentPipelineBlueprint::from_internal(
+            pipeline.pipeline(),
+            values,
+            stati,
+        ));
     }
     Ok(web::Json(experiment_pipelines))
 }
 
+pub async fn get_experiment_pipeline_run(
+    database_manager: web::Data<DatabaseManager>,
+    pipelines: web::Data<LoadedPipelines>,
+    id: web::Path<i32>,
+) -> Result<web::Json<Option<ExperimentPipelineBlueprint>>, SeqError> {
+    let experiment_id: i32 = id.into_inner();
+    let mut connection = database_manager.database_connection()?;
+    Experiment::exists_err(experiment_id, &mut connection)?;
+    let experiment = Experiment::get(experiment_id, &mut connection)?;
+    let experiment_pipeline = if let Some(pipeline_id) = experiment.pipeline_id {
+        if let Some(pipeline) = pipelines.get(&pipeline_id) {
+            let values = crate::model::db::pipeline_step_variable::PipelineStepVariable::get_values_by_experiment_and_pipeline(experiment_id, pipeline.pipeline().id(), &mut connection)?;
+            let stati: HashMap<String, String> =
+                ExperimentExecution::get_by_experiment(experiment_id, &mut connection)?
+                    .into_iter()
+                    .filter(|execution| &execution.pipeline_id == &pipeline_id)
+                    .map(|execution| {
+                        (execution.pipeline_step_id, execution.execution_status)
+                    })
+                    .collect();
+            Some(ExperimentPipelineBlueprint::from_internal(pipeline.pipeline(), values, stati))
+        } else {
+            return Err(SeqError::new(
+                "Not Found",
+                SeqErrorType::NotFoundError,
+                format!("No pipeline with ID {} is currently loaded.", pipeline_id),
+                "The pipeline ID is invalid.",
+            ));
+        }
+    } else {
+        None
+    };
+    Ok(web::Json(experiment_pipeline))
+}
+
 pub async fn post_experiment_pipeline_variable(
     database_manager: web::Data<DatabaseManager>,
     pipelines: web::Data<LoadedPipelines>,

+ 2 - 1
backend/src/controller/routing.rs

@@ -8,7 +8,7 @@ use super::{
     experiment_controller::{
         create_experiment, delete_experiment, get_experiment, get_experiment_pipelines,
         list_experiment, patch_experiment_comment, patch_experiment_mail, patch_experiment_name,
-        patch_experiment_pipeline, post_experiment_pipeline_variable, post_execute_experiment, get_experiment_execution_status, post_experiment_execution_abort, post_experiment_execution_reset,
+        patch_experiment_pipeline, post_experiment_pipeline_variable, post_execute_experiment, get_experiment_execution_status, post_experiment_execution_abort, post_experiment_execution_reset, get_experiment_pipeline_run,
     },
     file_controller::{delete_files_by_path, get_files, post_add_file, post_add_folder},
     global_data_controller::{
@@ -56,6 +56,7 @@ pub fn routing_config(cfg: &mut ServiceConfig) {
     .route("/api/experiments/{id}/pipeline", web::patch().to(patch_experiment_pipeline))
     .route("/api/experiments/{id}/pipelines", web::get().to(get_experiment_pipelines))
     .route("/api/experiments/{id}/reset", web::post().to(post_experiment_execution_reset))
+    .route("/api/experiments/{id}/run", web::get().to(get_experiment_pipeline_run))
     .route("/api/experiments/{id}/status", web::get().to(get_experiment_execution_status))
     .route("/api/experiments/{id}/variable", web::post().to(post_experiment_pipeline_variable))
     // Global data repositories

+ 51 - 9
backend/src/model/exchange/experiment_pipeline.rs

@@ -33,15 +33,23 @@ impl ExperimentPipelineBlueprint {
     /// * `pipeline` - the pipeline to convert
     /// * `values` - a map of variable values, where the keys are a concatenation of the
     /// pipeline step ID and variable ID
-    pub fn from_internal<T: Borrow<PipelineBlueprint>, S: Borrow<HashMap<String, String>>>(
-        pipeline: T,
-        values: S,
+    /// * `stati` - a map of stati, where the keys are the pipeline step ID
+    pub fn from_internal<
+        PipelineType: Borrow<PipelineBlueprint>,
+        ValueMapType: Borrow<HashMap<String, String>>,
+        StatusMapType: Borrow<HashMap<String, String>>,
+    >(
+        pipeline: PipelineType,
+        values: ValueMapType,
+        stati: StatusMapType,
     ) -> Self {
         let steps = pipeline
             .borrow()
             .steps()
             .iter()
-            .map(|s| ExperimentPipelineStepBlueprint::from_internal(s, values.borrow()))
+            .map(|s| {
+                ExperimentPipelineStepBlueprint::from_internal(s, values.borrow(), stati.borrow())
+            })
             .collect();
         Self {
             id: pipeline.borrow().id().clone(),
@@ -73,6 +81,9 @@ pub struct ExperimentPipelineStepBlueprint {
     /// The variables that can be specified for the pipeline step.
     #[getset(get = "pub")]
     variables: Vec<ExperimentPipelineStepVariable>,
+    /// The execution status of the pipeline step.
+    #[getset(get = "pub")]
+    status: Option<String>,
 }
 
 impl ExperimentPipelineStepBlueprint {
@@ -84,9 +95,15 @@ impl ExperimentPipelineStepBlueprint {
     /// * `pipeline_step` - the step to convert
     /// * `values` - a map of variable values, where the keys are a concatenation of the
     /// pipeline step ID and variable ID
-    pub fn from_internal<T: Borrow<PipelineStepBlueprint>, S: Borrow<HashMap<String, String>>>(
-        pipeline_step: T,
-        values: S,
+    /// * `stati` - a map of stati, where the keys are the pipeline step ID
+    pub fn from_internal<
+        StepType: Borrow<PipelineStepBlueprint>,
+        ValueMapType: Borrow<HashMap<String, String>>,
+        StatusMapType: Borrow<HashMap<String, String>>,
+    >(
+        pipeline_step: StepType,
+        values: ValueMapType,
+        stati: StatusMapType,
     ) -> Self {
         let variables = pipeline_step
             .borrow()
@@ -102,6 +119,10 @@ impl ExperimentPipelineStepBlueprint {
                 )
             })
             .collect();
+        let status: Option<String> = stati
+            .borrow()
+            .get(pipeline_step.borrow().id())
+            .map(|s| s.clone());
         Self {
             id: pipeline_step.borrow().id().clone(),
             name: pipeline_step.borrow().name().clone(),
@@ -109,6 +130,7 @@ impl ExperimentPipelineStepBlueprint {
             container: pipeline_step.borrow().container().clone(),
             dependencies: pipeline_step.borrow().dependencies().clone(),
             variables,
+            status,
         }
     }
 }
@@ -168,6 +190,8 @@ impl ExperimentPipelineStepVariable {
 #[cfg(test)]
 mod tests {
 
+    use crate::model::internal::step::PipelineStepStatus;
+
     use super::*;
 
     #[test]
@@ -299,14 +323,18 @@ mod tests {
         values.insert("fastqcglobal".to_string(), value_global.clone());
         values.insert("fastqcbool".to_string(), value_bool.clone());
 
+        let mut stati = HashMap::new();
+        stati.insert("fastqc".to_string(), PipelineStepStatus::Pending.to_string());
+
         let experiment_step =
-            ExperimentPipelineStepBlueprint::from_internal(&pipeline_step, values);
+            ExperimentPipelineStepBlueprint::from_internal(&pipeline_step, values, stati);
         assert_eq!(experiment_step.id(), pipeline_step.id());
         assert_eq!(experiment_step.name(), pipeline_step.name());
         assert_eq!(experiment_step.description(), pipeline_step.description());
         assert_eq!(experiment_step.container(), pipeline_step.container());
         assert_eq!(experiment_step.dependencies(), pipeline_step.dependencies());
         assert_eq!(experiment_step.variables().len(), pipeline_step.variables().len());
+        assert_eq!(experiment_step.status(), &Some(PipelineStepStatus::Pending.to_string()));
 
         let experiment_vars = experiment_step.variables();
         let pipeline_vars = pipeline_step.variables();
@@ -399,7 +427,11 @@ mod tests {
         values.insert("fastqc2bool".to_string(), "10".to_string());
         values.insert("fastqc2global".to_string(), "11".to_string());
 
-        let experiment_pipeline = ExperimentPipelineBlueprint::from_internal(&pipeline, values);
+        let mut stati = HashMap::new();
+        stati.insert("fastqc2".to_string(), PipelineStepStatus::Failed.to_string());
+
+        let experiment_pipeline =
+            ExperimentPipelineBlueprint::from_internal(&pipeline, values, stati);
         assert_eq!(experiment_pipeline.id(), pipeline.id());
         assert_eq!(experiment_pipeline.name(), pipeline.name());
         assert_eq!(experiment_pipeline.description(), pipeline.description());
@@ -414,6 +446,16 @@ mod tests {
             assert_eq!(experiment_steps[i].container(), pipeline_steps[i].container());
             assert_eq!(experiment_steps[i].dependencies(), pipeline_steps[i].dependencies());
             assert_eq!(experiment_steps[i].variables().len(), pipeline_steps[i].variables().len());
+
+            if experiment_steps[i].id() == "fastqc2" {
+                assert_eq!(
+                    experiment_steps[i].status(),
+                    &Some(PipelineStepStatus::Failed.to_string())
+                );
+            } else {
+                assert_eq!(experiment_steps[i].status(), &None);
+            }
+
             let experiment_vars = experiment_steps[i].variables();
             let pipeline_vars = pipeline_steps[i].variables();
             for j in 0..experiment_vars.len() {

+ 44 - 14
backend/src/service/container_service.rs

@@ -1,6 +1,6 @@
 use std::{
+    collections::HashMap,
     ffi::OsString,
-    hash::{Hash, Hasher},
     io::{BufWriter, Write},
     path::Path,
     process::{Child, Command, Output, Stdio},
@@ -9,7 +9,6 @@ use std::{
 use actix_web::web;
 use chrono::NaiveDateTime;
 use diesel::{ExpressionMethods, QueryDsl, RunQueryDsl};
-use twox_hash::XxHash64;
 
 use crate::{
     application::{
@@ -28,6 +27,9 @@ use crate::{
 
 use super::pipeline_service::LoadedPipelines;
 
+/// The container environment variable specifying all mounts.
+const CONTAINER_ENV_MOUNT: &str = "MOUNT_PATHS";
+
 /// Builds the specifc pipeline step container at the specified context.
 ///
 /// # Parameters
@@ -91,35 +93,56 @@ pub fn run_pipeline_step<T: AsRef<str>>(
         format_container_name(&pipeline_id, step.id()).into(),
         "--rm".into(),
     ];
+
     arguments.extend(pipeline_step_mount(output_path, "/output", false));
     // Set initial sample input mount.
     arguments.extend(pipeline_step_mount(
         app_config.experiment_input_path(&experiment_id),
-        "/input/samples",
+        "/input/base",
         true,
     ));
     // Set input mounts / dependencies.
+    let mut mount_map_dependencies = serde_json::Map::new();
     for dependency_id in step.dependencies() {
+        let target = format!("/input/steps/{}", Configuration::hash_string(dependency_id));
+        mount_map_dependencies
+            .insert(dependency_id.to_string(), serde_json::Value::String(target.clone()));
         arguments.extend(pipeline_step_mount(
             app_config.experiment_step_path(&experiment_id, dependency_id),
-            format!("/input/steps/{}", dependency_id),
+            target,
             true,
         ));
     }
     // Set global mounts.
+    let mut mount_map_globals = serde_json::Map::new();
     step.variables()
         .iter()
         .filter(|var_instance| var_instance.is_global_data_reference())
         // Filter out variables without values.
         .filter_map(|var_instance| var_instance.value().as_ref().map(|value| (var_instance.id(), value)))
         .for_each(|(global_var_id, global_var_value)| {
+            let target = format!("/input/globals/{}", Configuration::hash_string(global_var_id));
+            mount_map_globals.insert(
+                global_var_id.to_string(),
+                serde_json::Value::String(target.clone()),
+            );
             arguments.extend(pipeline_step_mount(
                 app_config.global_data_path(global_var_value),
-                format!("/input/globals/{}", global_var_id),
+                target,
                 true,
             ));
         });
 
+    // Set mount envrionment variable.
+    let mut mount_map_top = serde_json::Map::new();
+    mount_map_top.insert("input".to_string(), serde_json::Value::String("/input/base".to_string()));
+    mount_map_top.insert("output".to_string(), serde_json::Value::String("/output".to_string()));
+    mount_map_top.insert("dependencies".to_string(), serde_json::Value::Object(mount_map_dependencies));
+    mount_map_top.insert("globals".to_string(), serde_json::Value::Object(mount_map_globals));
+    let mount_paths = serde_json::Value::Object(mount_map_top);
+    arguments.push("--env".into());
+    arguments.push(format!("{}={}", CONTAINER_ENV_MOUNT, mount_paths.to_string()).into());
+
     // Set other variables.
     step.variables()
         .iter()
@@ -127,8 +150,12 @@ pub fn run_pipeline_step<T: AsRef<str>>(
         // Filter out variables without values.
         .filter_map(|var_instance| var_instance.value().as_ref().map(|value| (var_instance.id(), value)))
         .for_each(|(other_var_id, other_var_value)| {
-            arguments.push("--env".into());
-            arguments.push(format!("{}={}", other_var_id, other_var_value).into());
+            if other_var_id != CONTAINER_ENV_MOUNT {
+                arguments.push("--env".into());
+                arguments.push(format!("{}={}", other_var_id, other_var_value).into());
+            } else {
+                log::warn!("Pipeline {} step {} tried to overwrite the reserved environment variable {} with value {}.", pipeline_id.as_ref(), step.id(), other_var_id, other_var_value);
+            }
         });
 
     // Set container to run.
@@ -176,9 +203,7 @@ fn format_container_name<T: AsRef<str>, R: AsRef<str>>(
     pipeline_step_id: R,
 ) -> String {
     let name = format!("{}{}", pipeline_id.as_ref(), pipeline_step_id.as_ref());
-    let mut hasher = XxHash64::with_seed(154);
-    name.hash(&mut hasher);
-    hasher.finish().to_string()
+    Configuration::hash_string(name)
 }
 
 pub struct ContainerHandler {
@@ -330,10 +355,10 @@ impl ContainerHandler {
 
     /// Aborts the the pipeline step belonging to the specified experiment
     /// if currently executed.
-    /// 
+    ///
     /// # Parameters
-    /// 
-    /// * `experiment_id` - the ID of the experiment to abort 
+    ///
+    /// * `experiment_id` - the ID of the experiment to abort
     pub fn abort(&mut self, experiment_id: i32) -> Result<(), SeqError> {
         if let Some(step) = &self.executed_step {
             if experiment_id == step.experiment_id {
@@ -549,7 +574,12 @@ impl ContainerHandler {
         if let Some(pipeline) = self.loaded_pipelines.get(&step.pipeline_id) {
             let mut connection = self.database_manager.database_connection()?;
             let values = crate::model::db::pipeline_step_variable::PipelineStepVariable::get_values_by_experiment_and_pipeline(step.experiment_id, pipeline.pipeline().id(), &mut connection)?;
-            let pipeline = ExperimentPipelineBlueprint::from_internal(pipeline.pipeline(), values);
+            // The stati of the pipeline steps should be None at this point so an empty map is supplied instead of loading them from the database.
+            let pipeline = ExperimentPipelineBlueprint::from_internal(
+                pipeline.pipeline(),
+                values,
+                HashMap::new(),
+            );
             if let Some(step_blueprint) = pipeline
                 .steps()
                 .iter()

+ 0 - 34
pipelines/wiedemann_atac_pipeline/container/fastqc/run_fastqc.sh

@@ -1,34 +0,0 @@
-#!/bin/sh
-
-FASTQC_OPTIONS=""
-# If a specific environment variable is set, appends the respective option.
-if [[ ! -z "${ADAPTERS}" ]]
-then
-    FASTQC_OPTIONS="${FASTQC_OPTIONS} --adapters /input/globals/${ADAPTERS}/adapters.txt";
-fi
-
-if [[ ! -z "${KMERS}" ]]
-then
-    FASTQC_OPTIONS="${FASTQC_OPTIONS} --kmers ${KMERS}";
-fi
-
-if [[ ! -z "${SVG}" ]] && [[ "${SVG}" == 'true' ]]
-then
-    FASTQC_OPTIONS="${FASTQC_OPTIONS} --svg";
-fi
-
-# Prints the specified options.
-if [[ ! -z "${FASTQC_OPTIONS}" ]]
-then
-    echo "Specified options: ${FASTQC_OPTIONS}";
-else
-    echo Running with default options.
-fi
-
-# Iterates over all sample directories and processes them conserving the directory structure.
-for directory in /input/samples/*/
-do
-    mkdir -p /output/$(basename $directory)
-    perl -- /FastQC/fastqc --outdir=/output/$(basename $directory) $FASTQC_OPTIONS ${directory}*.fastq.gz
-    perl -- /FastQC/fastqc --outdir=/output/$(basename $directory) $FASTQC_OPTIONS ${directory}*.fq.gz
-done

+ 3 - 3
pipelines/wiedemann_atac_pipeline/container/fastqc/Dockerfile

@@ -1,5 +1,5 @@
-FROM alpine:3.17.3
+FROM python:3.11.6-alpine3.18
 RUN apk -U upgrade && apk add --no-cache openjdk17-jre unzip perl fontconfig ttf-dejavu
 RUN wget 'https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.12.1.zip' && unzip 'fastqc_v0.12.1.zip' && rm 'fastqc_v0.12.1.zip'
-COPY run_fastqc.sh /run_fastqc.sh
-ENTRYPOINT ["/bin/sh", "run_fastqc.sh"]
+COPY run_fastqc.py /run_fastqc.py
+ENTRYPOINT ["python", "run_fastqc.py"]

+ 49 - 0
pipelines/wiedemann_atac_pipeline/container/fastqc_initial/run_fastqc.py

@@ -0,0 +1,49 @@
+#!/usr/bin/python
+"""This module runs the initial QC process."""
+
+import json
+import os
+import sys
+from contextlib import suppress
+
+BASE_COMMAND = "perl -- /FastQC/fastqc"
+MOUNT_PATHS = json.loads(os.environ.get("MOUNT_PATHS"))
+INPUT_FOLDER = MOUNT_PATHS["input"] + "/"
+
+# If a specific environment variable is set, appends the respective option.
+options = ""
+
+with suppress(Exception):
+    adapters = MOUNT_PATHS["globals"]["ADAPTERS"]
+    options += f" --adapters {adapters}/adapters.txt"
+
+kmers = os.environ.get("KMERS")
+if kmers is not None:
+    options += f" --kmers {kmers}"
+
+svg = os.environ.get("SVG")
+if svg is not None and svg == "true":
+    options += " --svg"
+
+if not options:
+    print("Running with default options.")
+else:
+    print("Specified options:" + options)
+
+# Iterates over all sample directories and processes them conserving the directory structure.
+for root, dirs, files in os.walk(INPUT_FOLDER):
+    if len(files) > 0:
+        for file in files:
+            if file.casefold().endswith(".fq.gz") or file.casefold().endswith(".fastq.gz"):
+                file_input_path = os.path.join(root, file)
+                folder_output_path = os.path.join(
+                    MOUNT_PATHS["output"],
+                    root.removeprefix(INPUT_FOLDER)
+                )
+                full_command = (f"{BASE_COMMAND}{options} "
+                f"--outdir={folder_output_path} "
+                f"{file_input_path}")
+                os.makedirs(folder_output_path, exist_ok = True)
+                exit_code = os.waitstatus_to_exitcode(os.system(full_command))
+                if exit_code != 0:
+                    sys.exit(exit_code)

+ 5 - 0
pipelines/wiedemann_atac_pipeline/container/fastqc_trimming/Dockerfile

@@ -0,0 +1,5 @@
+FROM python:3.11.6-alpine3.18
+RUN apk -U upgrade && apk add --no-cache openjdk17-jre unzip perl fontconfig ttf-dejavu
+RUN wget 'https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v0.12.1.zip' && unzip 'fastqc_v0.12.1.zip' && rm 'fastqc_v0.12.1.zip'
+COPY run_fastqc.py /run_fastqc.py
+ENTRYPOINT ["python", "run_fastqc.py"]

+ 50 - 0
pipelines/wiedemann_atac_pipeline/container/fastqc_trimming/run_fastqc.py

@@ -0,0 +1,50 @@
+#!/usr/bin/python
+"""This module runs the after trimming QC process."""
+
+import json
+import os
+import sys
+from contextlib import suppress
+
+BASE_COMMAND = "perl -- /FastQC/fastqc"
+MOUNT_PATHS = json.loads(os.environ.get("MOUNT_PATHS"))
+INPUT_FOLDER = MOUNT_PATHS["dependencies"]["trimming"] + "/"
+
+print(MOUNT_PATHS)
+# If a specific environment variable is set, appends the respective option.
+options = ""
+
+with suppress(Exception):
+    adapters = MOUNT_PATHS["globals"]["ADAPTERS"]
+    options += f" --adapters {adapters}/adapters.txt"
+
+kmers = os.environ.get("KMERS")
+if kmers is not None:
+    options += f" --kmers {kmers}"
+
+svg = os.environ.get("SVG")
+if svg is not None and svg == "true":
+    options += " --svg"
+
+if not options:
+    print("Running with default options.")
+else:
+    print("Specified options:" + options)
+
+# Iterates over all sample directories and processes them conserving the directory structure.
+for root, dirs, files in os.walk(INPUT_FOLDER):
+    if len(files) > 0:
+        for file in files:
+            if file.casefold().endswith(".fq.gz") or file.casefold().endswith(".fastq.gz"):
+                file_input_path = os.path.join(root, file)
+                folder_output_path = os.path.join(
+                    MOUNT_PATHS["output"],
+                    root.removeprefix(INPUT_FOLDER)
+                )
+                full_command = (f"{BASE_COMMAND}{options} "
+                f"--outdir={folder_output_path} "
+                f"{file_input_path}")
+                os.makedirs(folder_output_path, exist_ok = True)
+                exit_code = os.waitstatus_to_exitcode(os.system(full_command))
+                if exit_code != 0:
+                    sys.exit(exit_code)

+ 5 - 0
pipelines/wiedemann_atac_pipeline/container/trimmomatic/Dockerfile

@@ -0,0 +1,5 @@
+FROM python:3.11.6-alpine3.18
+RUN apk -U upgrade && apk add --no-cache openjdk17-jre unzip
+RUN wget 'https://github.com/usadellab/Trimmomatic/files/5854859/Trimmomatic-0.39.zip' && unzip 'Trimmomatic-0.39.zip' && rm 'Trimmomatic-0.39.zip'
+COPY run_trimming.py /run_trimming.py
+ENTRYPOINT ["python", "run_trimming.py"]

+ 60 - 0
pipelines/wiedemann_atac_pipeline/container/trimmomatic/run_trimming.py

@@ -0,0 +1,60 @@
+#!/usr/bin/python
+"""This module runs the trimming process."""
+
+import json
+import os
+import sys
+
+BASE_COMMAND = "java -jar /Trimmomatic-0.39/trimmomatic-0.39.jar PE"
+STEP_OPTIONS = ("ILLUMINACLIP:/Trimmomatic-0.39/adapters/NexteraPE-PE.fa:2:30:10:2:True "
+"LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36")
+MOUNT_PATHS = json.loads(os.environ.get("MOUNT_PATHS"))
+INPUT_FOLDER = MOUNT_PATHS["input"] + "/"
+
+# If a specific environment variable is set, appends the respective option.
+options = ""
+phred = os.environ.get("PHRED")
+if phred is not None:
+    if phred == "PHRED33":
+        options += " -phred33"
+    elif phred == "PHRED64":
+        options += " -phred64"
+    else:
+        print(f"Unknown PHRED score option: {phred}", file=sys.stderr)
+
+if not options:
+    print("Running with default options.")
+else:
+    print("Specified options:" + options)
+
+# Iterates over all sample directories and processes them conserving the directory structure.
+for root, dirs, files in os.walk(INPUT_FOLDER):
+    if len(files) > 0:
+        for file in files:
+            input_files = ""
+            file_base_name = ""
+            file_base_input_path = ""
+            if file.casefold().endswith("_1.fq.gz"):
+                file_base_name = file.removesuffix("_1.fq.gz")
+                file_base_input_path = os.path.join(root, file_base_name)
+                input_files = f"{file_base_input_path}_1.fq.gz {file_base_input_path}_2.fq.gz"
+            elif file.casefold().endswith("_1.fastq.gz"):
+                file_base_name = file.removesuffix("_1.fastq.gz")
+                file_base_input_path = os.path.join(root, file_base_name)
+                input_files = f"{file_base_input_path}_1.fastq.gz {file_base_input_path}_2.fastq.gz"
+
+            if input_files:
+                file_base_output_path = os.path.join(
+                    MOUNT_PATHS["output"],
+                    file_base_input_path.removeprefix(INPUT_FOLDER)
+                )
+                full_command = (f"{BASE_COMMAND}{options} {input_files} "
+                f"{file_base_output_path}_1_paired.fq.gz "
+                f"{file_base_output_path}_1_unpaired.fq.gz "
+                f"{file_base_output_path}_2_paired.fq.gz "
+                f"{file_base_output_path}_2_unpaired.fq.gz "
+                f"{STEP_OPTIONS}")
+                os.makedirs(os.path.dirname(file_base_output_path), exist_ok = True)
+                exit_code = os.waitstatus_to_exitcode(os.system(full_command))
+                if exit_code != 0:
+                    sys.exit(exit_code)

+ 65 - 4
pipelines/wiedemann_atac_pipeline/pipeline.json

@@ -1,13 +1,13 @@
 {
   "id": "wiedemann_atac_paired_end",
   "name": "Wiedemann ATAC paired end pipeline",
-  "description": "<p>This pipeline processes paired end ATAC data.</p>",
+  "description": "<p>This pipeline processes paired end ATAC data. The input is expected as gzipped FASTQ files (.fq.gz / .fastq.gz) and might be organised in sub-folders.</p>",
   "steps": [
     {
-      "id": "fastqc_initial",
-      "name": "FastQC",
+      "id": "qc_initial",
+      "name": "Initial QC",
       "description": "Performs an initial quality control of sequenced reads.",
-      "container": "fastqc",
+      "container": "fastqc_initial",
       "dependencies": [],
       "variables": [
         {
@@ -35,6 +35,67 @@
           }
         }
       ]
+    },
+    {
+      "id": "trimming",
+      "name": "Trimming",
+      "description": "Performs trimming of adapters and low quality bases.",
+      "container": "trimmomatic",
+      "dependencies": [],
+      "variables": [
+        {
+          "id": "PHRED",
+          "name": "PHRED score",
+          "description": "Specify which PHRED score encoding to use.",
+          "required": false,
+          "category": {
+            "tag": "Option",
+            "content": [
+              {
+                "name": "PHRED33",
+                "value": "PHRED33"
+              },
+              {
+                "name": "PHRED64",
+                "value": "PHRED64"
+              }
+            ]
+          }
+        }
+      ]
+    },
+    {
+      "id": "qc_trimming",
+      "name": "Trimming QC",
+      "description": "Performs a quality control of the trimmed reads.",
+      "container": "fastqc_trimming",
+      "dependencies": ["trimming"],
+      "variables": [
+        {
+          "id": "ADAPTERS",
+          "name": "Adapter sequences",
+          "description": "A custom list of sequencing adapters. The global data repository must contain a file called <var>adapters.txt</var> at its root.",
+          "category": {
+            "tag": "Global"
+          }
+        },
+        {
+          "id": "KMERS",
+          "name": "Kmer length",
+          "description": "An integer that specifies the Kmer length to be used for Kmer content detection. Must be between 2 and 10.",
+          "category": {
+            "tag": "Number"
+          }
+        },
+        {
+          "id": "SVG",
+          "name": "SVG images",
+          "description": "Saves all images as vector files instead of bitmaps.",
+          "category": {
+            "tag": "Boolean"
+          }
+        }
+      ]
     }
   ]
 }