participant_job 3.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. #!/bin/bash
  2. # activate virtual environment for DataLad 0.16.+
  3. source ~/datalad_venv/datalad-dev/bin/activate
  4. # the job assumes that it is a good idea to run everything in PWD
  5. # the job manager should make sure that is true
  6. # fail whenever something is fishy, use -x to get verbose logfiles
  7. set -e -u -x
  8. dssource="$1"
  9. pushgitremote="$2"
  10. subid="$3"
  11. # get the analysis dataset, which includes the inputs as well
  12. # importantly, we do not clone from the lcoation that we want to push the
  13. # results too, in order to avoid too many jobs blocking access to
  14. # the same location and creating a throughput bottleneck
  15. datalad clone "${dssource}" ds
  16. # all following actions are performed in the context of the superdataset
  17. cd ds
  18. # in order to avoid accumulation temporary git-annex availability information
  19. # and to avoid a syncronization bottleneck by having to consolidate the
  20. # git-annex branch across jobs, we will only push the main tracking branch
  21. # back to the output store (plus the actual file content). Final availability
  22. # information can be establish via an eventual "git-annex fsck -f ReproVBM_out-storage".
  23. # this remote is never fetched, it accumulates a larger number of branches
  24. # and we want to avoid progressive slowdown. Instead we only ever push
  25. # a unique branch per each job (subject AND process specific name)
  26. git remote add outputstore "$pushgitremote"
  27. # all results of this job will be put into a dedicated branch
  28. git checkout -b "job-${JOBID}"
  29. # we pull down the input subject manually in order to discover relevant
  30. # files. We do this outside the recorded call, because on a potential
  31. # re-run we want to be able to do fine-grained recomputing of individual
  32. # outputs. The recorded calls will have specific paths that will enable
  33. # recomputation outside the scope of the original Condor setup
  34. datalad get -n "inputs/ds004169"
  35. # the meat of the matter
  36. # look for T1w files in the input data for the given participant
  37. # it is critical for reproducibility that the command given to
  38. # "containers-run" does not rely on any property of the immediate
  39. # computational environment (env vars, services, etc)
  40. find \
  41. inputs/ds004169/${subid} \
  42. -name '*T1w.nii.gz' \
  43. -exec sh -c '
  44. odir=$(echo {} | cut -d / -f3);
  45. datalad -c datalad.annex.retry=12 containers-run \
  46. -m "Compute $odir" \
  47. -n cat12-8 \
  48. --explicit \
  49. -o $odir \
  50. -i {} \
  51. -i code/finalize_job_outputs_ENIGMA.sh \
  52. sh -e -u -x -c "
  53. rm -rf {outputs[0]} ;
  54. mkdir -p {outputs[0]} \
  55. && cp {inputs[0]} {outputs[0]} \
  56. && /singularity -b code/cat_standalone_segment_enigma.m {outputs[0]}/*.nii.gz \
  57. && rm {outputs[0]}/*.nii* \
  58. && gzip {outputs[0]}/*/*.nii \
  59. " \
  60. ' \;
  61. # remove big files from results after hashing before pushing to ria
  62. datalad drop --what filecontent --reckless kill ${subid}/mri/iy* ${subid}/mri/y* ${subid}/mri/anon_m* ${subid}/mri/wj* ${subid}/*/*.pdf ${subid}/surf/*sphere* ${subid}/surf/*pial* ${subid}/surf/*white*
  63. # it may be that the above command did not yield any outputs
  64. # and no commit was made (no T1s found for the given participant)
  65. # we nevertheless push the branch to have a records that this was
  66. # attempted and did not fail
  67. # file content first -- does not need a lock, no interaction with Git
  68. datalad push --to ReproVBM_out-storage
  69. # and the output branch
  70. flock --verbose $DSLOCKFILE git push outputstore
  71. echo SUCCESS
  72. # job handler should clean up workspace