Browse Source

Participant compute job implementation

Adina Wagner 3 years ago
parent
commit
debc7dbc11
2 changed files with 94 additions and 0 deletions
  1. 65 0
      code/participant_job
  2. 29 0
      code/runfmriprep.sh

+ 65 - 0
code/participant_job

@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# the job assumes that it is a good idea to run everything in PWD
+# the job manager should make sure that is true
+
+# fail whenever something is fishy, use -x to get verbose logfiles
+set -e -u -x
+
+dssource="$1"
+pushgitremote="$2"
+subid="$3"
+
+# get the analysis dataset, which includes the inputs as well
+# importantly, we do not clone from the lcoation that we want to push the
+# results too, in order to avoid too many jobs blocking access to
+# the same location and creating a throughput bottleneck
+datalad clone "${dssource}" ds
+
+# all following actions are performed in the context of the superdataset
+cd ds
+
+# in order to avoid accumulation temporary git-annex availability information
+# and to avoid a syncronization bottleneck by having to consolidate the
+# git-annex branch across jobs, we will only push the main tracking branch
+# back to the output store (plus the actual file content). Final availability
+# information can be establish via an eventual `git-annex fsck -f joc-storage`.
+# this remote is never fetched, it accumulates a larger number of branches
+# and we want to avoid progressive slowdown. Instead we only ever push
+# a unique branch per each job (subject AND process specific name)
+git remote add outputstore "$pushgitremote"
+
+# all results of this job will be put into a dedicated branch
+git checkout -b "job-$JOBID"
+
+# we pull down the input subject manually in order to discover relevant
+# files. We do this outside the recorded call, because on a potential
+# re-run we want to be able to do fine-grained recomputing of individual
+# outputs. The recorded calls will have specific paths that will enable
+# recomputation outside the scope of the original Condor setup
+datalad get -n "inputs/data/${subid}"
+
+# ------------------------------------------------------------------------------
+# FIXME: Replace the datalad containers-run command starting below with a
+# command that fits your analysis. Here, it invokes the script "runfmriprep.sh"
+# that contains an fmriprep parametrization.
+
+datalad containers-run \
+  -m "Compute ${subid}" \
+  -n bids-fmriprep \
+  --explicit \
+  -o fmriprep/${subid} \
+  -i inputs/data/${subid}/anat/ \
+  -i code/license.txt \
+  "sh code/runfmriprep.sh $subid"
+
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+# push result file content first - does not need a lock, no interaction with Git
+datalad push --to output-storage
+# and the output branch next - needs a lock to prevent concurrency issues
+flock --verbose $DSLOCKFILE git push outputstore
+
+echo SUCCESS
+# job handler should clean up workspace

+ 29 - 0
code/runfmriprep.sh

@@ -0,0 +1,29 @@
+#!/bin/bash
+
+subid=$1
+
+# -----------------------------------------------------------------------------
+# create workdir for fmriprep inside the dataset to simplify singularity call
+# PWD will be available in the container
+mkdir -p .git/tmp/wdir
+
+# pybids (inside fmriprep) will try to read all JSON files in a dataset. In case
+# of a recomputation, JSON files of other subjects can be dangling symlinks.
+# We prevent pybids from crashing the fmriprep run when it can't read those, by
+# wiping them out temporarily via renaming.
+# We spare only those that belong to the participant we want to process.
+# After job completion, the jsons will be restored.
+# See https://github.com/bids-standard/pybids/issues/631 for more information.
+
+find inputs/data -mindepth 2 -name '*.json' -a ! -wholename "$subid" | sed -e "p;s/json/xyz/" | xargs -n2 mv
+
+# execute fmriprep. Its runscript is available as /singularity within the
+# container. Custom fmriprep parametrization can be done here.
+/singularity inputs/data . participant --participant-label $subid \
+    --anat-only -w .git/tmp/wdir --fs-no-reconall --skip-bids-validation \
+    --fs-license-file code/license.txt
+
+
+# restore the jsons we have moved out of the way
+find inputs/data -mindepth 2 -name '*.xyz' -a ! -wholename "$subid" | sed -e "p;s/xyz/json/" | xargs -n2 mv
+