adswa
/
processing-workflow-tutorial


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
							#!/bin/bash

# the job assumes that it is a good idea to run everything in PWD
# the job manager should make sure that is true

# fail whenever something is fishy, use -x to get verbose logfiles
set -e -u -x

dssource="$1"
pushgitremote="$2"
subid="$3"

# get the analysis dataset, which includes the inputs as well
# importantly, we do not clone from the lcoation that we want to push the
# results too, in order to avoid too many jobs blocking access to
# the same location and creating a throughput bottleneck
datalad clone "${dssource}" ds

# all following actions are performed in the context of the superdataset
cd ds

# in order to avoid accumulation temporary git-annex availability information
# and to avoid a syncronization bottleneck by having to consolidate the
# git-annex branch across jobs, we will only push the main tracking branch
# back to the output store (plus the actual file content). Final availability
# information can be establish via an eventual `git-annex fsck -f joc-storage`.
# this remote is never fetched, it accumulates a larger number of branches
# and we want to avoid progressive slowdown. Instead we only ever push
# a unique branch per each job (subject AND process specific name)
git remote add outputstore "$pushgitremote"

# all results of this job will be put into a dedicated branch
git checkout -b "job-$JOBID"

# we pull down the input subject manually in order to discover relevant
# files. We do this outside the recorded call, because on a potential
# re-run we want to be able to do fine-grained recomputing of individual
# outputs. The recorded calls will have specific paths that will enable
# recomputation outside the scope of the original Condor setup
datalad get -n "inputs/data/${subid}"

# ------------------------------------------------------------------------------
# FIXME: Replace the datalad containers-run command starting below with a
# command that fits your analysis. Here, it invokes the script "runfmriprep.sh"
# that contains an fmriprep parametrization.

datalad containers-run \
  -m "Compute ${subid}" \
  -n bids-fmriprep \
  --explicit \
  -o fmriprep/${subid} \
  -i inputs/data/${subid}/anat/ \
  -i code/license.txt \
  "sh code/runfmriprep.sh $subid"

# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


# push result file content first - does not need a lock, no interaction with Git
datalad push --to output-storage
# and the output branch next - needs a lock to prevent concurrency issues
flock --verbose $DSLOCKFILE git push outputstore

echo SUCCESS
# job handler should clean up workspace