#!/bin/bash # the job assumes that it is a good idea to run everything in PWD # the job manager should make sure that is true # fail whenever something is fishy, use -x to get verbose logfiles set -e -u -x dssource="$1" pushgitremote="$2" subid="$3" # get the analysis dataset, which includes the inputs as well # importantly, we do not clone from the lcoation that we want to push the # results too, in order to avoid too many jobs blocking access to # the same location and creating a throughput bottleneck datalad clone "${dssource}" ds # all following actions are performed in the context of the superdataset cd ds # in order to avoid accumulation temporary git-annex availability information # and to avoid a syncronization bottleneck by having to consolidate the # git-annex branch across jobs, we will only push the main tracking branch # back to the output store (plus the actual file content). Final availability # information can be establish via an eventual `git-annex fsck -f joc-storage`. # this remote is never fetched, it accumulates a larger number of branches # and we want to avoid progressive slowdown. Instead we only ever push # a unique branch per each job (subject AND process specific name) git remote add outputstore "$pushgitremote" # all results of this job will be put into a dedicated branch git checkout -b "job-$JOBID" # we pull down the input subject manually in order to discover relevant # files. We do this outside the recorded call, because on a potential # re-run we want to be able to do fine-grained recomputing of individual # outputs. The recorded calls will have specific paths that will enable # recomputation outside the scope of the original Condor setup datalad get -n "inputs/data/${subid}" # ------------------------------------------------------------------------------ # FIXME: Replace the datalad containers-run command starting below with a # command that fits your analysis. Here, it invokes the script "runfmriprep.sh" # that contains an fmriprep parametrization. datalad containers-run \ -m "Compute ${subid}" \ -n bids-fmriprep \ --explicit \ -o fmriprep/${subid} \ -i inputs/data/${subid}/anat/ \ -i code/license.txt \ "sh code/runfmriprep.sh $subid" # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # push result file content first - does not need a lock, no interaction with Git datalad push --to output-storage # and the output branch next - needs a lock to prevent concurrency issues flock --verbose $DSLOCKFILE git push outputstore echo SUCCESS # job handler should clean up workspace