Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

run_trimming.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. #!/usr/bin/python
  2. """This module runs the trimming process."""
  3. import json
  4. import math
  5. import multiprocessing
  6. import os
  7. import sys
  8. from contextlib import suppress
  9. BASE_COMMAND = "java -jar /Trimmomatic-0.39/trimmomatic-0.39.jar PE"
  10. MOUNT_PATHS = json.loads(os.environ.get("MOUNT_PATHS"))
  11. INPUT_FOLDER = MOUNT_PATHS["input"] + "/"
  12. # If a specific environment variable is set, appends the respective option.
  13. options = ""
  14. threads = math.floor(multiprocessing.cpu_count() * 0.8)
  15. if threads > 0:
  16. options += f" -threads {threads}"
  17. phred = os.environ.get("PHRED")
  18. if phred is not None:
  19. if phred == "PHRED33":
  20. options += " -phred33"
  21. elif phred == "PHRED64":
  22. options += " -phred64"
  23. else:
  24. print(f"Unknown PHRED score option: {phred}", file=sys.stderr)
  25. if not options:
  26. print("Running with default options.")
  27. else:
  28. print("Specified options:" + options)
  29. # Define the step options.
  30. step_options = ""
  31. adapters = ""
  32. with suppress(Exception):
  33. adapters = f"{MOUNT_PATHS['globals']['ADAPTERS_CUSTOM']}/trimming_adapters.fa"
  34. adapters_fixed = os.environ.get("ADAPTERS_FIXED")
  35. if not adapters and adapters_fixed is not None:
  36. adapters = f"/Trimmomatic-0.39/adapters/{adapters_fixed}"
  37. else:
  38. # Defaults to Nextera adapters as those are standard for ATAC sequencing.
  39. adapters = "/Trimmomatic-0.39/adapters/NexteraPE-PE.fa"
  40. if adapters:
  41. step_options += f" ILLUMINACLIP:{adapters}:2:30:10:2:True"
  42. step_options += " LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36"
  43. if not options:
  44. print("Running with default step options.")
  45. else:
  46. print("Specified step options:" + options)
  47. # Iterates over all sample directories and processes them conserving the directory structure.
  48. for root, dirs, files in os.walk(INPUT_FOLDER):
  49. if len(files) > 0:
  50. for file in files:
  51. input_files = ""
  52. file_base_name = ""
  53. file_base_input_path = ""
  54. if file.casefold().endswith("_1.fq.gz"):
  55. file_base_name = file.removesuffix("_1.fq.gz")
  56. file_base_input_path = os.path.join(root, file_base_name)
  57. input_files = f"{file_base_input_path}_1.fq.gz {file_base_input_path}_2.fq.gz"
  58. elif file.casefold().endswith("_1.fastq.gz"):
  59. file_base_name = file.removesuffix("_1.fastq.gz")
  60. file_base_input_path = os.path.join(root, file_base_name)
  61. input_files = f"{file_base_input_path}_1.fastq.gz {file_base_input_path}_2.fastq.gz"
  62. if input_files:
  63. file_base_output_path = os.path.join(
  64. MOUNT_PATHS["output"],
  65. file_base_input_path.removeprefix(INPUT_FOLDER)
  66. )
  67. full_command = (f"{BASE_COMMAND}{options} {input_files} "
  68. f"{file_base_output_path}_1_paired.fq.gz "
  69. f"{file_base_output_path}_1_unpaired.fq.gz "
  70. f"{file_base_output_path}_2_paired.fq.gz "
  71. f"{file_base_output_path}_2_unpaired.fq.gz"
  72. f"{step_options}")
  73. os.makedirs(os.path.dirname(file_base_output_path), exist_ok = True)
  74. exit_code = os.waitstatus_to_exitcode(os.system(full_command))
  75. if exit_code != 0:
  76. sys.exit(exit_code)