universe       = vanilla
# resource requirements for each job
request_cpus   = 1
request_memory = 3G
request_disk   = 4G

# be nice and only use free resources
# nice_user = true

# tell condor that a job is self contained and the executable
# is enough to bootstrap the computation on the execute node
should_transfer_files = yes
# explicitly do not transfer anything back
# we are using datalad for everything that matters
transfer_output_files = ""

# the actual job script, nothing condor-specific in it
executable     = $ENV(PWD)/code/participant_job

# the job expects these environment variables for labeling and synchronization
# - JOBID: subject AND process specific ID to make a branch name from
#     (must be unique across all (even multiple) submissions)
#     including the cluster ID will enable sorting multiple computing attempts
# - DSLOCKFILE: lock (must be accessible from all compute jobs) to synchronize
#     write access to the output dataset
# - DATALAD_GET_SUBDATASET__SOURCE__CANDIDATE__...:
#     (additional) locations for datalad to locate relevant subdatasets, in case
#     a configured URL is outdated
# - GIT_AUTHOR_...: Identity information used to save dataset changes in compute
#     jobs
environment = "\
  JOBID=$(subject).$(Cluster) \
  DSLOCKFILE=$ENV(PWD)/.condor_datalad_lock \
  GIT_AUTHOR_NAME='Felix Hoffstaedter' \
  GIT_AUTHOR_EMAIL='f.hoffstaedter@fz-juelich.de' \
  "

# place the job logs into PWD/logs, using the same name as for the result branches
# (JOBID)
log    = $ENV(PWD)/logs/$(Cluster).log
output = $ENV(PWD)/logs/$(Cluster).out
error  = $ENV(PWD)/logs/$(Cluster).err
# essential args for "participant_job"
# 1: where to clone the analysis dataset
# 2: location to push the result git branch to. The "ria+" prefix is stripped.
# 3: ID of the subject to process
arguments = "\
  ria+file:///data/project/cat_preprocessed/inputstore#6c5791d8-1803-48a1-bbaa-2b5e23b5f707 \
  /data/project/cat_preprocessed/dataladstore/6c5/791d8-1803-48a1-bbaa-2b5e23b5f707 \
  $(subject) \
  "
queue