3 lat temu · debc7dbc11
--- a/code/participant_job
+++ b/code/participant_job
@@ -0,0 +1,65 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# the job assumes that it is a good idea to run everything in PWD
			
 
				+# the job manager should make sure that is true
			
 
				+
			
 
				+# fail whenever something is fishy, use -x to get verbose logfiles
			
 
				+set -e -u -x
			
 
				+
			
 
				+dssource="$1"
			
 
				+pushgitremote="$2"
			
 
				+subid="$3"
			
 
				+
			
 
				+# get the analysis dataset, which includes the inputs as well
			
 
				+# importantly, we do not clone from the lcoation that we want to push the
			
 
				+# results too, in order to avoid too many jobs blocking access to
			
 
				+# the same location and creating a throughput bottleneck
			
 
				+datalad clone "${dssource}" ds
			
 
				+
			
 
				+# all following actions are performed in the context of the superdataset
			
 
				+cd ds
			
 
				+
			
 
				+# in order to avoid accumulation temporary git-annex availability information
			
 
				+# and to avoid a syncronization bottleneck by having to consolidate the
			
 
				+# git-annex branch across jobs, we will only push the main tracking branch
			
 
				+# back to the output store (plus the actual file content). Final availability
			
 
				+# information can be establish via an eventual `git-annex fsck -f joc-storage`.
			
 
				+# this remote is never fetched, it accumulates a larger number of branches
			
 
				+# and we want to avoid progressive slowdown. Instead we only ever push
			
 
				+# a unique branch per each job (subject AND process specific name)
			
 
				+git remote add outputstore "$pushgitremote"
			
 
				+
			
 
				+# all results of this job will be put into a dedicated branch
			
 
				+git checkout -b "job-$JOBID"
			
 
				+
			
 
				+# we pull down the input subject manually in order to discover relevant
			
 
				+# files. We do this outside the recorded call, because on a potential
			
 
				+# re-run we want to be able to do fine-grained recomputing of individual
			
 
				+# outputs. The recorded calls will have specific paths that will enable
			
 
				+# recomputation outside the scope of the original Condor setup
			
 
				+datalad get -n "inputs/data/${subid}"
			
 
				+
			
 
				+# ------------------------------------------------------------------------------
			
 
				+# FIXME: Replace the datalad containers-run command starting below with a
			
 
				+# command that fits your analysis. Here, it invokes the script "runfmriprep.sh"
			
 
				+# that contains an fmriprep parametrization.
			
 
				+
			
 
				+datalad containers-run \
			
 
				+  -m "Compute ${subid}" \
			
 
				+  -n bids-fmriprep \
			
 
				+  --explicit \
			
 
				+  -o fmriprep/${subid} \
			
 
				+  -i inputs/data/${subid}/anat/ \
			
 
				+  -i code/license.txt \
			
 
				+  "sh code/runfmriprep.sh $subid"
			
 
				+
			
 
				+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
			
 
				+
			
 
				+
			
 
				+# push result file content first - does not need a lock, no interaction with Git
			
 
				+datalad push --to output-storage
			
 
				+# and the output branch next - needs a lock to prevent concurrency issues
			
 
				+flock --verbose $DSLOCKFILE git push outputstore
			
 
				+
			
 
				+echo SUCCESS
			
 
				+# job handler should clean up workspace
			
--- a/code/runfmriprep.sh
+++ b/code/runfmriprep.sh
@@ -0,0 +1,29 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+subid=$1
			
 
				+
			
 
				+# -----------------------------------------------------------------------------
			
 
				+# create workdir for fmriprep inside the dataset to simplify singularity call
			
 
				+# PWD will be available in the container
			
 
				+mkdir -p .git/tmp/wdir
			
 
				+
			
 
				+# pybids (inside fmriprep) will try to read all JSON files in a dataset. In case
			
 
				+# of a recomputation, JSON files of other subjects can be dangling symlinks.
			
 
				+# We prevent pybids from crashing the fmriprep run when it can't read those, by
			
 
				+# wiping them out temporarily via renaming.
			
 
				+# We spare only those that belong to the participant we want to process.
			
 
				+# After job completion, the jsons will be restored.
			
 
				+# See https://github.com/bids-standard/pybids/issues/631 for more information.
			
 
				+
			
 
				+find inputs/data -mindepth 2 -name '*.json' -a ! -wholename "$subid" | sed -e "p;s/json/xyz/" | xargs -n2 mv
			
 
				+
			
 
				+# execute fmriprep. Its runscript is available as /singularity within the
			
 
				+# container. Custom fmriprep parametrization can be done here.
			
 
				+/singularity inputs/data . participant --participant-label $subid \
			
 
				+    --anat-only -w .git/tmp/wdir --fs-no-reconall --skip-bids-validation \
			
 
				+    --fs-license-file code/license.txt
			
 
				+
			
 
				+
			
 
				+# restore the jsons we have moved out of the way
			
 
				+find inputs/data -mindepth 2 -name '*.xyz' -a ! -wholename "$subid" | sed -e "p;s/xyz/json/" | xargs -n2 mv
			
 
				+