participant_job 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #!/bin/bash
  2. # the job assumes that it is a good idea to run everything in PWD
  3. # the job manager should make sure that is true
  4. # fail whenever something is fishy, use -x to get verbose logfiles
  5. set -e -u -x
  6. dssource="$1"
  7. pushgitremote="$2"
  8. subid="$3"
  9. # get the analysis dataset, which includes the inputs as well
  10. # importantly, we do not clone from the lcoation that we want to push the
  11. # results too, in order to avoid too many jobs blocking access to
  12. # the same location and creating a throughput bottleneck
  13. datalad clone "${dssource}" ds
  14. # all following actions are performed in the context of the superdataset
  15. cd ds
  16. # in order to avoid accumulation temporary git-annex availability information
  17. # and to avoid a syncronization bottleneck by having to consolidate the
  18. # git-annex branch across jobs, we will only push the main tracking branch
  19. # back to the output store (plus the actual file content). Final availability
  20. # information can be establish via an eventual `git-annex fsck -f joc-storage`.
  21. # this remote is never fetched, it accumulates a larger number of branches
  22. # and we want to avoid progressive slowdown. Instead we only ever push
  23. # a unique branch per each job (subject AND process specific name)
  24. git remote add outputstore "$pushgitremote"
  25. # all results of this job will be put into a dedicated branch
  26. git checkout -b "job-$JOBID"
  27. # we pull down the input subject manually in order to discover relevant
  28. # files. We do this outside the recorded call, because on a potential
  29. # re-run we want to be able to do fine-grained recomputing of individual
  30. # outputs. The recorded calls will have specific paths that will enable
  31. # recomputation outside the scope of the original Condor setup
  32. datalad get -n "inputs/data/${subid}"
  33. # ------------------------------------------------------------------------------
  34. # FIXME: Replace the datalad containers-run command starting below with a
  35. # command that fits your analysis. Here, it invokes the script "runfmriprep.sh"
  36. # that contains an fmriprep parametrization.
  37. datalad containers-run \
  38. -m "Compute ${subid}" \
  39. -n bids-fmriprep \
  40. --explicit \
  41. -o fmriprep/${subid} \
  42. -i inputs/data/${subid} \
  43. -i code/license.txt \
  44. "sh code/runfmriprep.sh $subid"
  45. # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  46. # push result file content first - does not need a lock, no interaction with Git
  47. datalad push --to output-storage
  48. # and the output branch next - needs a lock to prevent concurrency issues
  49. flock --verbose $DSLOCKFILE git push outputstore
  50. echo SUCCESS
  51. # job handler should clean up workspace