highspeed-heudiconv-cluster.sh 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #!/usr/bin/bash
  2. # ==============================================================================
  3. # SCRIPT INFORMATION:
  4. # ==============================================================================
  5. # SCRIPT: PARALLELIZE BIDS CONVERSION USING HEUDICONV ON THE MPIB CLUSTER
  6. # PROJECT NAME: HIGHSPEED
  7. # WRITTEN BY LENNART WITTKUHN, 2018 - 2020
  8. # CONTACT: WITTKUHN AT MPIB HYPHEN BERLIN DOT MPG DOT DE
  9. # MAX PLANCK RESEARCH GROUP NEUROCODE
  10. # MAX PLANCK INSTITUTE FOR HUMAN DEVELOPMENT
  11. # MAX PLANCK UCL CENTRE FOR COMPUTATIONAL PSYCHIATRY AND AGEING RESEARCH
  12. # LENTZEALLEE 94, 14195 BERLIN, GERMANY
  13. # ==============================================================================
  14. # DEFINE ALL PATHS:
  15. # ==============================================================================
  16. PATH_BASE="${HOME}"
  17. # define the name of the project:
  18. PROJECT_NAME="highspeed"
  19. # define the path to the input directory:
  20. PATH_INPUT="${PATH_BASE}/${PROJECT_NAME}/rawdata/mri"
  21. # define the path to the output directory
  22. PATH_OUTPUT="${PATH_BASE}/${PROJECT_NAME}/bids"
  23. # define the path to the singularity container:
  24. PATH_CONTAINER="${PATH_BASE}/tools/heudiconv/heudiconv_0.6.0.sif"
  25. # define the path to the code main directory:
  26. PATH_CODE="${PATH_BASE}/${PROJECT_NAME}/${PROJECT_NAME}_analysis/code"
  27. # path to the heudiconv heuristic file:
  28. HEURISTIC_FILE="highspeed_heudiconv_heuristic.py"
  29. # define path to the python executable file that anonymizes the subject ids:
  30. ANON_FILE="highspeed_heudiconv_anonymizer.py"
  31. # make the anonymizer file executable:
  32. chmod +x "${PATH_CODE}/heudiconv/$ANON_FILE"
  33. # path to the directory where error and out path_logs of cluster jobs are saved:
  34. PATH_LOGS="${PATH_BASE}/${PROJECT_NAME}/logs/heudiconv/$(date '+%Y%m%d_%H%M%S')"
  35. # path to the text file with all subject ids:
  36. PATH_SUB_LIST="${PATH_CODE}/parameters/highspeed_participant_list.txt"
  37. # ==============================================================================
  38. # CREATE RELEVANT DIRECTORIES:
  39. # ==============================================================================
  40. # create output directory:
  41. if [ ! -d ${PATH_OUTPUT} ]; then
  42. mkdir -p ${PATH_OUTPUT}
  43. echo "created ${PATH_OUTPUT}"
  44. fi
  45. # create directory for log files:
  46. if [ ! -d ${PATH_LOGS} ]; then
  47. mkdir -p ${PATH_LOGS}
  48. echo "created ${PATH_LOGS}"
  49. fi
  50. # ==============================================================================
  51. # DEFINE PARAMETERS:
  52. # ==============================================================================
  53. # maximum number of cpus per process:
  54. N_CPUS=1
  55. # memory demand in *GB*
  56. MEM_GB=4
  57. # memory demand in *MB*
  58. MEM_MB="$((${MEM_GB} * 1000))"
  59. # read subject ids from the list of the text file
  60. SUB_LIST=$(cat ${PATH_SUB_LIST} | tr '\n' ' ')
  61. # ==============================================================================
  62. # RUN HEUDICONV:
  63. # ==============================================================================
  64. # initalize a subject counter:
  65. SUB_COUNT=0
  66. # loop over all subjects:
  67. for SUB in ${SUB_LIST}; do
  68. # update the subject counter:
  69. let SUB_COUNT=SUB_COUNT+1
  70. # get the subject number with zero padding:
  71. SUB_PAD=$(printf "%02d\n" $SUB_COUNT)
  72. # loop over all sessions:
  73. for SES in `seq 1 2`; do
  74. # get the session number with zero padding:
  75. SES_PAD=$(printf "%02d\n" $SES)
  76. # define the dicom template for the heudiconv command:
  77. DICOM_DIR_TEMPLATE="HIGHSPEED_{subject}_HIGHSPEED_{subject}_${SES}*/*/*/*IMA"
  78. # check the existence of the input files and continue if data is missing:
  79. if [ ! -d ${PATH_INPUT}/HIGHSPEED_${SUB}_HIGHSPEED_${SUB}_${SES}_* ]; then
  80. echo "No data input available for sub-${SUB} ses-${SES_PAD}!"
  81. continue
  82. fi
  83. # name of the job:
  84. echo "#PBS -N heudiconv_sub-${SUB_PAD}_ses-${SES_PAD}" > job
  85. # set the expected maximum running time for the job:
  86. echo "#PBS -l walltime=12:00:00" >> job
  87. # determine how much RAM your operation needs:
  88. echo "#PBS -l mem=${MEM_GB}GB" >> job
  89. # request multiple cpus
  90. echo "#PBS -l nodes=1:ppn=${N_CPUS}" >> job
  91. # write (output) log to log folder:
  92. echo "#PBS -o ${PATH_LOGS}" >> job
  93. # write (error) log to log folder:
  94. echo "#PBS -e ${PATH_LOGS}" >> job
  95. # email notification on abort/end, use 'n' for no notification:
  96. echo "#PBS -m n" >> job
  97. # define the heudiconv command:
  98. echo "singularity run -B ${PATH_INPUT}:/input:ro \
  99. -B ${PATH_OUTPUT}:/output:rw -B ${PATH_CODE}:/code:ro \
  100. ${PATH_CONTAINER} -d /input/${DICOM_DIR_TEMPLATE} -s ${SUB} \
  101. --ses ${SES_PAD} -o /output -f /code/heudiconv/${HEURISTIC_FILE} \
  102. --anon-cmd /code/heudiconv/${ANON_FILE} -c dcm2niix -b --overwrite" >> job
  103. # submit job to cluster queue and remove it to avoid confusion:
  104. qsub job
  105. rm -f job
  106. done
  107. done