3 years ago · 3487380fbf
--- a/.gitattributes
+++ b/.gitattributes
@@ -2,4 +2,5 @@
 
				 * annex.backend=MD5E
			
 
				 **/.git* annex.largefiles=nothing
			
 
				 CHANGELOG.md annex.largefiles=nothing
			
 
				-README.md annex.largefiles=nothing
			
 
				+README.md annex.largefiles=nothing
			
 
				+highspeed-decoding.Rproj annex.largefiles=nothing
			
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
 
				+.Rproj.user
			
 
				+.Rhistory
			
 
				+.RData
			
 
				+.Ruserdata
			
 
				+code/*/*.html
			
--- a/code/decoding/highspeed-decoding-cluster.sh
+++ b/code/decoding/highspeed-decoding-cluster.sh
@@ -0,0 +1,90 @@
 
				+#!/usr/bin/bash
			
 
				+# ==============================================================================
			
 
				+# SCRIPT INFORMATION:
			
 
				+# ==============================================================================
			
 
				+# SCRIPT: PARALELLIZE THE DECODING SCRIPT ON THE MPIB CLUSTER (TARDIS)
			
 
				+# PROJECT: HIGHSPEED
			
 
				+# WRITTEN BY LENNART WITTKUHN, 2018 - 2020
			
 
				+# CONTACT: WITTKUHN AT MPIB HYPHEN BERLIN DOT MPG DOT DE
			
 
				+# MAX PLANCK RESEARCH GROUP NEUROCODE
			
 
				+# MAX PLANCK INSTITUTE FOR HUMAN DEVELOPMENT (MPIB)
			
 
				+# MAX PLANCK UCL CENTRE FOR COMPUTATIONAL PSYCHIATRY AND AGEING RESEARCH
			
 
				+# LENTZEALLEE 94, 14195 BERLIN, GERMANY
			
 
				+# ==============================================================================
			
 
				+# DEFINE ALL PATHS:
			
 
				+# ==============================================================================
			
 
				+# define the name of the current task:
			
 
				+TASK_NAME="highspeed-decoding"
			
 
				+# define the name of the project:
			
 
				+PROJECT_NAME="highspeed"
			
 
				+# path to the home directory:
			
 
				+PATH_HOME=/home/mpib/wittkuhn
			
 
				+# path to the current code folder:
			
 
				+PATH_CODE=${PATH_HOME}/${PROJECT_NAME}/${TASK_NAME}/code/decoding
			
 
				+# cd into the directory of the current script:
			
 
				+cd ${PATH_CODE}
			
 
				+# path to the current script:
			
 
				+PATH_SCRIPT=${PATH_CODE}/highspeed_decoding.py
			
 
				+# path to the output directory:
			
 
				+PATH_OUT=${PATH_HOME}/${PROJECT_NAME}/${TASK_NAME}/decoding
			
 
				+# path to the working directory:
			
 
				+PATH_WORK=${PATH_HOME}/${PROJECT_NAME}/${TASK_NAME}/work
			
 
				+# path to the log directory:
			
 
				+PATH_LOG=${PATH_HOME}/${PROJECT_NAME}/${TASK_NAME}/logs/$(date '+%Y%m%d_%H%M%S')
			
 
				+# ==============================================================================
			
 
				+# CREATE RELEVANT DIRECTORIES:
			
 
				+# ==============================================================================
			
 
				+# create output directory:
			
 
				+if [ ! -d ${PATH_OUT} ]; then
			
 
				+	mkdir -p ${PATH_OUT}
			
 
				+fi
			
 
				+# create working directory:
			
 
				+if [ ! -d ${PATH_WORK} ]; then
			
 
				+	mkdir -p ${PATH_WORK}
			
 
				+fi
			
 
				+# create directory for log files:
			
 
				+if [ ! -d ${PATH_LOG} ]; then
			
 
				+	mkdir -p ${PATH_LOG}
			
 
				+fi
			
 
				+# ==============================================================================
			
 
				+# DEFINE PARAMETERS:
			
 
				+# ==============================================================================
			
 
				+# maximum number of cpus per process:
			
 
				+N_CPUS=1
			
 
				+# memory demand in *GB*
			
 
				+MEM=20GB
			
 
				+# user-defined subject list
			
 
				+PARTICIPANTS=$1
			
 
				+# ==============================================================================
			
 
				+# RUN THE DECODING:
			
 
				+# ==============================================================================
			
 
				+# initialize a counter for the subjects:
			
 
				+SUB_COUNT=0
			
 
				+for i in {1..40}; do
			
 
				+	# turn the subject id into a zero-padded number:
			
 
				+	SUB=$(printf "%02d\n" ${i})
			
 
				+	# name of the job
			
 
				+	echo "#PBS -N decoding_sub-${SUB}" >> job
			
 
				+	# set the expected maximum running time for the job:
			
 
				+	echo "#PBS -l walltime=12:00:00" >> job
			
 
				+	# determine how much RAM your operation needs:
			
 
				+	echo "#PBS -l mem=${MEM}" >> job
			
 
				+	# request multiple cpus:
			
 
				+	echo "#PBS -l nodes=1:ppn=${N_CPUS}" >> job
			
 
				+	# write (output) log to log folder:
			
 
				+	echo "#PBS -o ${PATH_LOG}" >> job
			
 
				+	# write (error) log to log folder:
			
 
				+	echo "#PBS -e ${PATH_LOG}" >> job
			
 
				+	# email notification on abort/end, use 'n' for no notification:
			
 
				+	echo "#PBS -m n" >> job
			
 
				+	# start in the current directory:
			
 
				+	echo "#PBS -d ${PATH_CODE}" >> job
			
 
				+	# activate virtual environment on cluster:
			
 
				+	echo "source /etc/bash_completion.d/virtualenvwrapper" >> job
			
 
				+	echo "workon decoding" >> job
			
 
				+	# define the main command:
			
 
				+	echo "python3 ${PATH_SCRIPT} ${SUB}" >> job
			
 
				+	# submit job to cluster queue and remove it to avoid confusion:
			
 
				+	qsub job
			
 
				+	rm -f job
			
 
				+done
			
--- a/code/decoding/highspeed-parameters.yaml
+++ b/code/decoding/highspeed-parameters.yaml
@@ -0,0 +1,16 @@
 
				+# highspeed parameters
			
 
				+
			
 
				+info:
			
 
				+  experimenter: lennart wittkuhn
			
 
				+  institution: max planck institute for human development
			
 
				+  project: highspeed
			
 
				+task:
			
 
				+  num_trial_odd: 600
			
 
				+  num_trial_seq: 75
			
 
				+  num_trial_rep: 45
			
 
				+mri:
			
 
				+  tr: 1.25
			
 
				+  thresh: 3
			
 
				+  num_sessions: 2
			
 
				+  num_runs: 8
			
 
				+  num_trigger: 5
			
--- a/code/decoding/highspeed_decoding.py
+++ b/code/decoding/highspeed_decoding.py
@@ -0,0 +1,883 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+# ======================================================================
			
 
				+# SCRIPT INFORMATION:
			
 
				+# ======================================================================
			
 
				+# SCRIPT: DECODING ANALYSIS
			
 
				+# PROJECT: HIGHSPEED
			
 
				+# WRITTEN BY LENNART WITTKUHN, 2018 - 2019
			
 
				+# CONTACT: WITTKUHN AT MPIB HYPHEN BERLIN DOT MPG DOT DE
			
 
				+# MAX PLANCK RESEARCH GROUP NEUROCODE
			
 
				+# MAX PLANCK INSTITUTE FOR HUMAN DEVELOPMENT
			
 
				+# MAX PLANCK UCL CENTRE FOR COMPUTATIONAL PSYCHIATRY AND AGEING RESEARCH
			
 
				+# LENTZEALLEE 94, 14195 BERLIN, GERMANY
			
 
				+'''
			
 
				+========================================================================
			
 
				+IMPORT RELEVANT PACKAGES:
			
 
				+========================================================================
			
 
				+'''
			
 
				+import glob
			
 
				+import os
			
 
				+import yaml
			
 
				+import logging
			
 
				+import time
			
 
				+from os.path import join as opj
			
 
				+import sys
			
 
				+import copy
			
 
				+from pprint import pformat
			
 
				+import numpy as np
			
 
				+from nilearn.input_data import NiftiMasker
			
 
				+from nilearn import plotting, image, masking
			
 
				+import pandas as pd
			
 
				+from matplotlib import pyplot as plt
			
 
				+import warnings
			
 
				+from nilearn.signal import clean
			
 
				+from sklearn.svm import SVC
			
 
				+from sklearn.svm import LinearSVC
			
 
				+from sklearn.linear_model import LogisticRegression
			
 
				+from sklearn.model_selection import LeaveOneGroupOut
			
 
				+from sklearn.model_selection import cross_val_score, cross_val_predict
			
 
				+from sklearn.preprocessing import StandardScaler
			
 
				+from collections import Counter
			
 
				+import datalad.api as dl
			
 
				+'''
			
 
				+========================================================================
			
 
				+DEAL WITH ERRORS, WARNING ETC.
			
 
				+========================================================================
			
 
				+'''
			
 
				+warnings.filterwarnings('ignore', message='numpy.dtype size changed*')
			
 
				+warnings.filterwarnings('ignore', message='numpy.ufunc size changed*')
			
 
				+mpl_logger = logging.getLogger('matplotlib')
			
 
				+mpl_logger.setLevel(logging.WARNING)
			
 
				+'''
			
 
				+========================================================================
			
 
				+SETUP NECESSARY PATHS ETC:
			
 
				+========================================================================
			
 
				+'''
			
 
				+# get start time of the script:
			
 
				+start = time.time()
			
 
				+# name of the current project:
			
 
				+project = 'highspeed'
			
 
				+# initialize empty variables:
			
 
				+sub = None
			
 
				+path_tardis = None
			
 
				+path_server = None
			
 
				+path_local = None
			
 
				+# define paths depending on the operating system (OS) platform:
			
 
				+if 'darwin' in sys.platform:
			
 
				+    # define the path to the cluster:
			
 
				+    path_tardis = opj(os.environ['HOME'], 'Volumes', 'tardis_beegfs', project)
			
 
				+    # define the path to the server:
			
 
				+    path_server = opj('/Volumes', 'MPRG-Neurocode', 'Data', project)
			
 
				+    # define the path to the local computer:
			
 
				+    path_local = opj(os.environ['HOME'], project, '*', '*')
			
 
				+    # define the subject id:
			
 
				+    sub = 'sub-14'
			
 
				+elif 'linux' in sys.platform:
			
 
				+    # path to the project root:
			
 
				+    project_name = 'highspeed-decoding'
			
 
				+    path_root = os.getcwd().split(project_name)[0] + project_name
			
 
				+    # define the path to the cluster:
			
 
				+    path_tardis = path_root
			
 
				+    # define the path to the server:
			
 
				+    path_server = path_tardis
			
 
				+    # define the path to the local computer:
			
 
				+    path_local = opj(path_tardis, 'code', 'decoding')
			
 
				+    # define the subject id:
			
 
				+    sub = 'sub-%s' % sys.argv[1]
			
 
				+'''
			
 
				+========================================================================
			
 
				+LOAD PROJECT PARAMETERS:
			
 
				+========================================================================
			
 
				+'''
			
 
				+path_params = glob.glob(opj(path_local, '*parameters.yaml'))[0]
			
 
				+with open(path_params, 'rb') as f:
			
 
				+    params = yaml.load(f, Loader=yaml.FullLoader)
			
 
				+f.close()
			
 
				+'''
			
 
				+========================================================================
			
 
				+CREATE PATHS TO OUTPUT DIRECTORIES:
			
 
				+========================================================================
			
 
				+'''
			
 
				+path_decoding = opj(path_tardis, 'decoding')
			
 
				+path_out = opj(path_decoding, sub)
			
 
				+path_out_figs = opj(path_out, 'plots')
			
 
				+path_out_data = opj(path_out, 'data')
			
 
				+path_out_logs = opj(path_out, 'logs')
			
 
				+path_out_masks = opj(path_out, 'masks')
			
 
				+'''
			
 
				+========================================================================
			
 
				+CREATE OUTPUT DIRECTORIES IF THE DO NOT EXIST YET:
			
 
				+========================================================================
			
 
				+'''
			
 
				+for path in [path_out_figs, path_out_data, path_out_logs, path_out_masks]:
			
 
				+    if not os.path.exists(path):
			
 
				+        os.makedirs(path)
			
 
				+'''
			
 
				+========================================================================
			
 
				+SETUP LOGGING:
			
 
				+========================================================================
			
 
				+'''
			
 
				+# remove all handlers associated with the root logger object:
			
 
				+for handler in logging.root.handlers[:]:
			
 
				+    logging.root.removeHandler(handler)
			
 
				+# get current data and time as a string:
			
 
				+timestr = time.strftime("%Y-%m-%d-%H-%M-%S")
			
 
				+# create path for the logging file
			
 
				+log = opj(path_out_logs, '%s-%s.log' % (timestr, sub))
			
 
				+# start logging:
			
 
				+logging.basicConfig(
			
 
				+    filename=log, level=logging.DEBUG, format='%(asctime)s %(message)s',
			
 
				+    datefmt='%d/%m/%Y %H:%M:%S')
			
 
				+'''
			
 
				+========================================================================
			
 
				+DEFINE DECODING SPECIFIC PARAMETERS:
			
 
				+========================================================================
			
 
				+'''
			
 
				+# define the mask to be used:
			
 
				+mask = 'visual'  # visual or whole
			
 
				+# applied time-shift to account for the BOLD delay, in seconds:
			
 
				+bold_delay = 4  # 4, 5 or 6 secs
			
 
				+# define the degree of smoothing of the functional data
			
 
				+smooth = 4
			
 
				+'''
			
 
				+========================================================================
			
 
				+ADD BASIC SCRIPT INFORMATION TO THE LOGGER:
			
 
				+========================================================================
			
 
				+'''
			
 
				+logging.info('Running decoding script')
			
 
				+logging.info('operating system: %s' % sys.platform)
			
 
				+logging.info('project name: %s' % project)
			
 
				+logging.info('participant: %s' % sub)
			
 
				+logging.info('mask: %s' % mask)
			
 
				+logging.info('bold delay: %d secs' % bold_delay)
			
 
				+logging.info('smoothing kernel: %d mm' % smooth)
			
 
				+'''
			
 
				+========================================================================
			
 
				+DEFINE RELEVANT VARIABLES:
			
 
				+========================================================================
			
 
				+'''
			
 
				+# time of repetition (TR), in seconds:
			
 
				+t_tr = params['mri']['tr']
			
 
				+# number of volumes (TRs) for each functional task run:
			
 
				+n_tr_run = 530
			
 
				+# acquisition time window of one sequence trial, in seconds:
			
 
				+t_win = 16
			
 
				+# number of measurements that are considered per sequence time window:
			
 
				+n_tr_win = round(t_win / t_tr)
			
 
				+# number of oddball trials in the experiment:
			
 
				+n_tr_odd = 600
			
 
				+# number of sequence trials in the experiment:
			
 
				+n_tr_seq = 75
			
 
				+# number of repetition trials in the experiment:
			
 
				+n_tr_rep = 45
			
 
				+# number of scanner triggers before the experiment starts:
			
 
				+n_tr_wait = params['mri']['num_trigger']
			
 
				+# number of functional task runs in total:
			
 
				+n_run = params['mri']['num_runs']
			
 
				+# number of experimental sessions in total:
			
 
				+n_ses = params['mri']['num_sessions']
			
 
				+# number of functional task runs per session:
			
 
				+n_run_ses = int(n_run / n_ses)
			
 
				+'''
			
 
				+========================================================================
			
 
				+LOAD BEHAVIORAL DATA (THE EVENTS.TSV FILES OF THE SUBJECT):
			
 
				+========================================================================
			
 
				+Load the events files for the subject. The 'oddball' data serves as the
			
 
				+training set of the classifier. The 'sequence' data constitutes the
			
 
				+test set.
			
 
				+'''
			
 
				+# paths to all events files of the current subject:
			
 
				+path_events = opj(path_tardis, 'bids', sub, 'ses-*', 'func', '*tsv')
			
 
				+dl.get(glob.glob(path_events))
			
 
				+path_events = sorted(glob.glob(path_events), key=lambda f: os.path.basename(f))
			
 
				+logging.info('found %d event files' % len(path_events))
			
 
				+logging.info('paths to events files (sorted):\n%s' % pformat(path_events))
			
 
				+# import events file and save data in dataframe:
			
 
				+df_events = pd.concat((pd.read_csv(f, sep='\t') for f in path_events),
			
 
				+                      ignore_index=True)
			
 
				+'''
			
 
				+========================================================================
			
 
				+CREATE PATHS TO THE MRI DATA:
			
 
				+========================================================================
			
 
				+'''
			
 
				+# define path to input directories:
			
 
				+path_fmriprep = opj(path_tardis, 'fmriprep', 'fmriprep', sub)
			
 
				+path_level1 = opj(path_tardis, 'glm', 'l1pipeline')
			
 
				+path_masks = opj(path_tardis, 'masks', 'masks')
			
 
				+
			
 
				+logging.info('path to fmriprep files: %s' % path_fmriprep)
			
 
				+logging.info('path to level 1 files: %s' % path_level1)
			
 
				+logging.info('path to mask files: %s' % path_masks)
			
 
				+paths = {
			
 
				+    'fmriprep': opj(path_tardis, 'fmriprep', 'fmriprep', sub),
			
 
				+    'level1': opj(path_tardis, 'glm', 'l1pipeline'),
			
 
				+    'masks': opj(path_tardis, 'masks', 'masks')
			
 
				+}
			
 
				+
			
 
				+# load the visual mask task files:
			
 
				+path_mask_vis_task = opj(path_masks, 'mask_visual', sub, '*', '*task-highspeed*.nii.gz')
			
 
				+path_mask_vis_task = sorted(glob.glob(path_mask_vis_task), key=lambda f: os.path.basename(f))
			
 
				+logging.info('found %d visual mask task files' % len(path_mask_vis_task))
			
 
				+logging.info('paths to visual mask task files:\n%s' % pformat(path_mask_vis_task))
			
 
				+dl.get(glob.glob(path_mask_vis_task))
			
 
				+
			
 
				+# load the hippocampus mask task files:
			
 
				+path_mask_hpc_task = opj(path_masks, 'mask_hippocampus', sub, '*', '*task-highspeed*.nii.gz')
			
 
				+path_mask_hpc_task = sorted(glob.glob(path_mask_hpc_task), key=lambda f: os.path.basename(f))
			
 
				+logging.info('found %d hpc mask files' % len(path_mask_hpc_task))
			
 
				+logging.info('paths to hpc mask task files:\n%s' % pformat(path_mask_hpc_task))
			
 
				+dl.get(glob.glob(path_mask_hpc_task))
			
 
				+
			
 
				+# load the whole brain mask files:
			
 
				+path_mask_whole_task = opj(path_fmriprep, '*', 'func', '*task-highspeed*T1w*brain_mask.nii.gz')
			
 
				+path_mask_whole_task = sorted(glob.glob(path_mask_whole_task), key=lambda f: os.path.basename(f))
			
 
				+logging.info('found %d whole-brain masks' % len(path_mask_whole_task))
			
 
				+logging.info('paths to whole-brain mask files:\n%s' % pformat(path_mask_whole_task))
			
 
				+dl.get(glob.glob(path_mask_whole_task))
			
 
				+
			
 
				+# load the functional mri task files:
			
 
				+path_func_task = opj(path_level1, 'smooth', sub, '*', '*task-highspeed*nii.gz')
			
 
				+path_func_task = sorted(glob.glob(path_func_task), key=lambda f: os.path.basename(f))
			
 
				+logging.info('found %d functional mri task files' % len(path_func_task))
			
 
				+logging.info('paths to functional mri task files:\n%s' % pformat(path_func_task))
			
 
				+dl.get(glob.glob(path_func_task))
			
 
				+
			
 
				+# define path to the functional resting state runs:
			
 
				+path_rest = opj(path_tardis, 'masks', 'masks', 'smooth', sub, '*', '*task-rest*nii.gz')
			
 
				+path_rest = sorted(glob.glob(path_rest), key=lambda f: os.path.basename(f))
			
 
				+logging.info('found %d functional mri rest files' % len(path_rest))
			
 
				+logging.info('paths to functional mri rest files:\n%s' % pformat(path_rest))
			
 
				+dl.get(glob.glob(path_rest))
			
 
				+
			
 
				+# load the anatomical mri file:
			
 
				+path_anat = opj(path_fmriprep, 'anat', '%s_desc-preproc_T1w.nii.gz' % sub)
			
 
				+path_anat = sorted(glob.glob(path_anat), key=lambda f: os.path.basename(f))
			
 
				+logging.info('found %d anatomical mri file' % len(path_anat))
			
 
				+logging.info('paths to anatoimical mri files:\n%s' % pformat(path_anat))
			
 
				+dl.get(glob.glob(path_anat))
			
 
				+
			
 
				+# load the confounds files:
			
 
				+path_confs_task = opj(path_fmriprep, '*', 'func', '*task-highspeed*confounds_regressors.tsv')
			
 
				+path_confs_task = sorted(glob.glob(path_confs_task), key=lambda f: os.path.basename(f))
			
 
				+logging.info('found %d confounds files' % len(path_confs_task))
			
 
				+logging.info('found %d confounds files' % len(path_confs_task))
			
 
				+logging.info('paths to confounds files:\n%s' % pformat(path_confs_task))
			
 
				+dl.get(glob.glob(path_confs_task))
			
 
				+
			
 
				+# load the spm.mat files:
			
 
				+path_spm_mat = opj(path_level1, 'contrasts', sub, '*', 'SPM.mat')
			
 
				+path_spm_mat = sorted(glob.glob(path_spm_mat), key=lambda f: os.path.dirname(f))
			
 
				+logging.info('found %d spm.mat files' % len(path_spm_mat))
			
 
				+logging.info('paths to spm.mat files:\n%s' % pformat(path_spm_mat))
			
 
				+dl.get(glob.glob(path_spm_mat))
			
 
				+
			
 
				+# load the t-maps of the first-level glm:
			
 
				+path_tmap = opj(path_level1, 'contrasts', sub, '*', 'spmT*.nii')
			
 
				+path_tmap = sorted(glob.glob(path_tmap), key=lambda f: os.path.dirname(f))
			
 
				+logging.info('found %d t-maps' % len(path_tmap))
			
 
				+logging.info('paths to t-maps files:\n%s' % pformat(path_tmap))
			
 
				+dl.get(glob.glob(path_tmap))
			
 
				+'''
			
 
				+========================================================================
			
 
				+LOAD THE MRI DATA:
			
 
				+========================================================================
			
 
				+'''
			
 
				+anat = image.load_img(path_anat[0])
			
 
				+logging.info('successfully loaded %s' % path_anat[0])
			
 
				+# load visual mask:
			
 
				+mask_vis = image.load_img(path_mask_vis_task[0]).get_data().astype(int)
			
 
				+logging.info('successfully loaded one visual mask file!')
			
 
				+# load tmap data:
			
 
				+tmaps = [image.load_img(i).get_data().astype(float) for i in copy.deepcopy(path_tmap)]
			
 
				+logging.info('successfully loaded the tmaps!')
			
 
				+# load hippocampus mask:
			
 
				+mask_hpc = [image.load_img(i).get_data().astype(int) for i in copy.deepcopy(path_mask_hpc_task)]
			
 
				+logging.info('successfully loaded one visual mask file!')
			
 
				+'''
			
 
				+FEATURE SELECTION
			
 
				+'''
			
 
				+# plot raw-tmaps on an anatomical background:
			
 
				+logging.info('plotting raw tmaps with anatomical as background:')
			
 
				+for i, path in enumerate(path_tmap):
			
 
				+    logging.info('plotting raw tmap %s (%d of %d)' % (path, i+1, len(path_tmap)))
			
 
				+    path_save = opj(path_out_figs, '%s_run-%02d_tmap_raw.png' % (sub, i+1))
			
 
				+    plotting.plot_roi(path, anat, title=os.path.basename(path_save),
			
 
				+                      output_file=path_save, colorbar=True)
			
 
				+
			
 
				+'''
			
 
				+========================================================================
			
 
				+FEATURE SELECTION: MASKS THE T-MAPS WITH THE ANATOMICAL MASKS
			
 
				+We create a combination of the t-maps and the anatomical mask.
			
 
				+To this end, we multiply the anatomical mask with each t-map.
			
 
				+As the anatomical conists of binary values (zeros and ones) a
			
 
				+multiplication results in t-map masked by the anatomical ROI.
			
 
				+========================================================================
			
 
				+'''
			
 
				+#v= tmaps_masked[0]
			
 
				+#fig = plt.figure()
			
 
				+#ax = fig.add_subplot(111, projection='3d')
			
 
				+#ax.scatter(v[:,0],v[:,1],v[:,2], zdir='z', c= 'red')
			
 
				+#plt.show()
			
 
				+
			
 
				+# check if any value in the supposedly binary mask is bigger than 1:
			
 
				+if np.any(mask_vis > 1):
			
 
				+    logging.info('WARNING: detected values > 1 in the anatomical ROI!')
			
 
				+    sys.exit("Values > 1 in the anatomical ROI!")
			
 
				+# get combination of anatomical mask and t-map
			
 
				+tmaps_masked = [np.multiply(mask_vis, i) for i in copy.deepcopy(tmaps)]
			
 
				+# masked tmap into image like object:
			
 
				+tmaps_masked_img = [image.new_img_like(i, j) for (i, j) in zip(path_tmap, copy.deepcopy(tmaps_masked))]
			
 
				+
			
 
				+for i, path in enumerate(tmaps_masked_img):
			
 
				+    path_save = opj(path_out_masks, '%s_run-%02d_tmap_masked.nii.gz' % (sub, i + 1))
			
 
				+    path.to_filename(path_save)
			
 
				+
			
 
				+# plot masked t-maps
			
 
				+logging.info('plotting masked tmaps with anatomical as background:')
			
 
				+for i, path in enumerate(tmaps_masked_img):
			
 
				+    logging.info('plotting masked tmap %d of %d' % (i+1, len(tmaps_masked_img)))
			
 
				+    path_save = opj(path_out_figs, '%s_run-%02d_tmap_masked.png' % (sub, i+1))
			
 
				+    plotting.plot_roi(path, anat, title=os.path.basename(path_save),
			
 
				+                      output_file=path_save, colorbar=True)
			
 
				+'''
			
 
				+========================================================================
			
 
				+FEATURE SELECTION: THRESHOLD THE MASKED T-MAPS
			
 
				+We threshold the masked t-maps, selecting only voxels above AND
			
 
				+below this threshold. We then extract these data and count how
			
 
				+many voxels were above and / or below the threshold in total.
			
 
				+========================================================================
			
 
				+'''
			
 
				+# set the threshold:
			
 
				+threshold = params['mri']['thresh']
			
 
				+logging.info('thresholding t-maps with a threshold of %s' % str(threshold))
			
 
				+# threshold the masked tmap image:
			
 
				+tmaps_masked_thresh_img = [image.threshold_img(i, threshold) for i in copy.deepcopy(tmaps_masked_img)]
			
 
				+
			
 
				+logging.info('plotting thresholded tmaps with anatomical as background:')
			
 
				+for i, path in enumerate(tmaps_masked_thresh_img):
			
 
				+    path_save = opj(path_out_figs, '%s_run-%02d_tmap_masked_thresh.png' % (sub, i+1))
			
 
				+    logging.info('plotting masked tmap %s (%d of %d)'
			
 
				+                 % (path_save, i + 1, len(tmaps_masked_thresh_img)))
			
 
				+    plotting.plot_roi(path, anat, title=os.path.basename(path_save),
			
 
				+                      output_file=path_save, colorbar=True)
			
 
				+
			
 
				+# extract data from the thresholded images
			
 
				+tmaps_masked_thresh = [image.load_img(i).get_data().astype(float) for i in tmaps_masked_thresh_img]
			
 
				+
			
 
				+# calculate the number of tmap voxels:
			
 
				+num_tmap_voxel = [np.size(i) for i in copy.deepcopy(tmaps_masked_thresh)]
			
 
				+logging.info('number of feature selected voxels: %s' % pformat(num_tmap_voxel))
			
 
				+
			
 
				+num_above_voxel = [np.count_nonzero(i) for i in copy.deepcopy(tmaps_masked_thresh)]
			
 
				+logging.info('number of voxels above threshold: %s' % pformat(num_above_voxel))
			
 
				+
			
 
				+num_below_voxel = [np.count_nonzero(i == 0) for i in copy.deepcopy(tmaps_masked_thresh)]
			
 
				+logging.info('number of voxels below threshold: %s' % pformat(num_below_voxel))
			
 
				+
			
 
				+# plot the distribution of t-values:
			
 
				+for i, run_mask in enumerate(tmaps_masked_thresh):
			
 
				+    masked_tmap_flat = run_mask.flatten()
			
 
				+    masked_tmap_flat = masked_tmap_flat[~np.isnan(masked_tmap_flat)]
			
 
				+    masked_tmap_flat = masked_tmap_flat[~np.isnan(masked_tmap_flat) & ~(masked_tmap_flat == 0)]
			
 
				+    path_save = opj(path_out_figs, '%s_run-%02d_tvalue_distribution.png' % (sub, i+1))
			
 
				+    logging.info('plotting thresholded t-value distribution %s (%d of %d)'
			
 
				+                 % (path_save, i+1, len(tmaps_masked_thresh)))
			
 
				+    fig = plt.figure()
			
 
				+    plt.hist(masked_tmap_flat, bins='auto')
			
 
				+    plt.xlabel('t-values')
			
 
				+    plt.ylabel('number')
			
 
				+    plt.title('t-value distribution (%s, run-%02d)' % (sub, i+1))
			
 
				+    plt.savefig(path_save)
			
 
				+
			
 
				+# create a dataframe with the number of voxels
			
 
				+df_thresh = pd.DataFrame({
			
 
				+    'id': [sub] * n_run,
			
 
				+    'run': np.arange(1,n_run+1),
			
 
				+    'n_total': num_tmap_voxel,
			
 
				+    'n_above': num_above_voxel,
			
 
				+    'n_below': num_below_voxel
			
 
				+})
			
 
				+file_name = opj(path_out_data, '%s_thresholding.csv' % (sub))
			
 
				+df_thresh.to_csv(file_name, sep=',', index=False)
			
 
				+
			
 
				+'''
			
 
				+========================================================================
			
 
				+FEATURE SELECTION: BINARIZE THE THRESHOLDED MASKED T-MAPS
			
 
				+We set all voxels above and below the threshold to 1 and all voxels
			
 
				+that were not selected to 0.
			
 
				+========================================================================
			
 
				+'''
			
 
				+# replace all NaNs with 0:
			
 
				+tmaps_masked_thresh_bin = [np.where(np.isnan(i), 0, i) for i in copy.deepcopy(tmaps_masked_thresh)]
			
 
				+# replace all other values with 1:
			
 
				+tmaps_masked_thresh_bin = [np.where(i > 0, 1, i) for i in copy.deepcopy(tmaps_masked_thresh_bin)]
			
 
				+# turn the 3D-array into booleans:
			
 
				+tmaps_masked_thresh_bin = [i.astype(bool) for i in copy.deepcopy(tmaps_masked_thresh_bin)]
			
 
				+# create image like object:
			
 
				+masks_final = [image.new_img_like(path_func_task[0], i.astype(np.int)) for i in copy.deepcopy(tmaps_masked_thresh_bin)]
			
 
				+
			
 
				+logging.info('plotting final masks with anatomical as background:')
			
 
				+for i, path in enumerate(masks_final):
			
 
				+    filename = '%s_run-%02d_tmap_masked_thresh.nii.gz'\
			
 
				+               % (sub, i + 1)
			
 
				+    path_save = opj(path_out_masks, filename)
			
 
				+    logging.info('saving final mask %s (%d of %d)'
			
 
				+                 % (path_save, i+1, len(masks_final)))
			
 
				+    path.to_filename(path_save)
			
 
				+    path_save = opj(path_out_figs, '%s_run-%02d_visual_final_mask.png'
			
 
				+                    % (sub, i + 1))
			
 
				+    logging.info('plotting final mask %s (%d of %d)'
			
 
				+                 % (path_save, i + 1, len(masks_final)))
			
 
				+    plotting.plot_roi(path, anat, title=os.path.basename(path_save),
			
 
				+                      output_file=path_save, colorbar=True)
			
 
				+'''
			
 
				+========================================================================
			
 
				+LOAD SMOOTHED FMRI DATA FOR ALL FUNCTIONAL TASK RUNS:
			
 
				+========================================================================
			
 
				+'''
			
 
				+# load smoothed functional mri data for all eight task runs:
			
 
				+logging.info('loading %d functional task runs ...' % len(path_func_task))
			
 
				+data_task = [image.load_img(i) for i in path_func_task]
			
 
				+logging.info('loading successful!')
			
 
				+'''
			
 
				+========================================================================
			
 
				+DEFINE THE CLASSIFIERS
			
 
				+========================================================================
			
 
				+'''
			
 
				+class_labels = ['cat', 'chair', 'face', 'house', 'shoe']
			
 
				+# create a dictionary with all values as independent instances:
			
 
				+# see here: https://bit.ly/2J1DvZm
			
 
				+clf_set = {key: LogisticRegression(
			
 
				+    C=1., penalty='l2', multi_class='ovr', solver='lbfgs',
			
 
				+    max_iter=4000, class_weight='balanced', random_state=42) for key in class_labels}
			
 
				+classifiers = {
			
 
				+    'log_reg': LogisticRegression(
			
 
				+        C=1., penalty='l2', multi_class='multinomial', solver='lbfgs',
			
 
				+        max_iter=4000, class_weight='balanced', random_state=42)}
			
 
				+clf_set.update(classifiers)
			
 
				+'''
			
 
				+========================================================================
			
 
				+1. SPLIT THE EVENTS DATAFRAME FOR EACH TASK CONDITION
			
 
				+2. RESET THE INDICES OF THE DATAFRAMES
			
 
				+3. SORT THE ROWS OF ALL DATAFRAMES IN CHRONOLOGICAL ORDER
			
 
				+4. PRINT THE NUMBER OF TRIALS OF EACH TASK CONDITION
			
 
				+========================================================================
			
 
				+'''
			
 
				+
			
 
				+
			
 
				+class TaskData:
			
 
				+    """
			
 
				+
			
 
				+    """
			
 
				+    def __init__(
			
 
				+            self, events, condition, name, trial_type, bold_delay=0,
			
 
				+            interval=1, t_tr=1.25, num_vol_run=530):
			
 
				+        import pandas as pd
			
 
				+        # define name of the task data subset:
			
 
				+        self.name = name
			
 
				+        # define the task condition the task data subset if from:
			
 
				+        self.condition = condition
			
 
				+        # define the delay (in seconds) by which onsets are moved:
			
 
				+        self.bold_delay = bold_delay
			
 
				+        # define the number of TRs from event onset that should be selected:
			
 
				+        self.interval = interval
			
 
				+        # define the repetition time (TR) of the mri data acquisition:
			
 
				+        self.t_tr = t_tr
			
 
				+        # define the number of volumes per task run:
			
 
				+        self.num_vol_run = num_vol_run
			
 
				+        # select events: upright stimulus, correct answer trials only:
			
 
				+        if trial_type == 'stimulus':
			
 
				+            self.events = events.loc[
			
 
				+                          (events['condition'] == condition) &
			
 
				+                          (events['trial_type'] == trial_type) &
			
 
				+                          (events['stim_orient'] == 0) &
			
 
				+                          (events['serial_position'] == 1) &
			
 
				+                          (events['accuracy'] != 0),
			
 
				+                          :]
			
 
				+        elif trial_type == 'cue':
			
 
				+            self.events = events.loc[
			
 
				+                          (events['condition'] == condition) &
			
 
				+                          (events['trial_type'] == trial_type),
			
 
				+                          :]
			
 
				+        # reset the indices of the data frame:
			
 
				+        self.events.reset_index()
			
 
				+        # sort all values by session and run:
			
 
				+        self.events.sort_values(by=['session', 'run_session'])
			
 
				+        # call further function upon initialization:
			
 
				+        self.define_trs()
			
 
				+        self.get_stats()
			
 
				+
			
 
				+    def define_trs(self):
			
 
				+        # import relevant functions:
			
 
				+        import numpy as np
			
 
				+        # select all events onsets:
			
 
				+        self.event_onsets = self.events['onset']
			
 
				+        # add the selected delay to account for the peak of the hrf
			
 
				+        self.bold_peaks = self.events['onset'] + self.bold_delay
			
 
				+        # divide the expected time-point of bold peaks by the repetition time:
			
 
				+        self.peak_trs = self.bold_peaks / self.t_tr
			
 
				+        # add the number of run volumes to the tr indices:
			
 
				+        run_volumes = (self.events['run_study']-1) * self.num_vol_run
			
 
				+        # add the number of volumes of each run:
			
 
				+        trs = round(self.peak_trs + run_volumes)
			
 
				+        # copy the relevant trs as often as specified by the interval:
			
 
				+        a = np.transpose(np.tile(trs, (self.interval, 1)))
			
 
				+        # create same-sized matrix with trs to be added:
			
 
				+        b = np.full((len(trs), self.interval), np.arange(self.interval))
			
 
				+        # assign the final list of trs:
			
 
				+        self.trs = np.array(np.add(a, b).flatten(), dtype=int)
			
 
				+        # save the TRs of the stimulus presentations
			
 
				+        self.stim_trs = round(self.event_onsets / self.t_tr + run_volumes)
			
 
				+
			
 
				+    def get_stats(self):
			
 
				+        import numpy as np
			
 
				+        self.num_trials = len(self.events)
			
 
				+        self.runs = np.repeat(np.array(self.events['run_study'], dtype=int), self.interval)
			
 
				+        self.trials = np.repeat(np.array(self.events['trial'], dtype=int), self.interval)
			
 
				+        self.sess = np.repeat(np.array(self.events['session'], dtype=int), self.interval)
			
 
				+        self.stim = np.repeat(np.array(self.events['stim_label'], dtype=object), self.interval)
			
 
				+        self.itis = np.repeat(np.array(self.events['interval_time'], dtype=float), self.interval)
			
 
				+        self.stim_trs = np.repeat(np.array(self.stim_trs, dtype=int), self.interval)
			
 
				+
			
 
				+    def zscore(self, signals, run_list, t_tr=1.25):
			
 
				+        from nilearn.signal import clean
			
 
				+        import numpy as np
			
 
				+        # get boolean indices for all run indices in the run list:
			
 
				+        run_indices = np.isin(self.runs, list(run_list))
			
 
				+        # standardize data all runs in the run list:
			
 
				+        self.data_zscored = clean(
			
 
				+            signals=signals[self.trs[run_indices]],
			
 
				+            sessions=self.runs[run_indices],
			
 
				+            t_r=t_tr,
			
 
				+            detrend=False,
			
 
				+            standardize=True)
			
 
				+
			
 
				+    def predict(self, clf, run_list):
			
 
				+        # import packages:
			
 
				+        import pandas as pd
			
 
				+        # get classifier class predictions:
			
 
				+        pred_class = clf.predict(self.data_zscored)
			
 
				+        # get classifier probabilistic predictions:
			
 
				+        pred_proba = clf.predict_proba(self.data_zscored)
			
 
				+        # get the classes of the classifier:
			
 
				+        classes_names = clf.classes_
			
 
				+        # get boolean indices for all run indices in the run list:
			
 
				+        run_indices = np.isin(self.runs, list(run_list))
			
 
				+        # create a dataframe with the probabilities of each class:
			
 
				+        df = pd.DataFrame(pred_proba, columns=classes_names)
			
 
				+        # get the number of predictions made:
			
 
				+        num_pred = len(df)
			
 
				+        # get the number of trials in the test set:
			
 
				+        num_trials = int(num_pred / self.interval)
			
 
				+        # add the predicted class label to the dataframe:
			
 
				+        df['pred_label'] = pred_class
			
 
				+        # add the true stimulus label to the dataframe:
			
 
				+        df['stim'] = self.stim[run_indices]
			
 
				+        # add the volume number (TR) to the dataframe:
			
 
				+        df['tr'] = self.trs[run_indices]
			
 
				+        # add the sequential TR to the dataframe:
			
 
				+        df['seq_tr'] = np.tile(np.arange(1, self.interval + 1), num_trials)
			
 
				+        # add the counter of trs on which the stimulus was presented
			
 
				+        df['stim_tr'] = self.stim_trs[run_indices]
			
 
				+        # add the trial number to the dataframe:
			
 
				+        df['trial'] = self.trials[run_indices]
			
 
				+        # add the run number to the dataframe:
			
 
				+        df['run_study'] = self.runs[run_indices]
			
 
				+        # add the session number to the dataframe:
			
 
				+        df['session'] = self.sess[run_indices]
			
 
				+        # add the inter trial interval to the dataframe:
			
 
				+        df['tITI'] = self.itis[run_indices]
			
 
				+        # add the participant id to the dataframe:
			
 
				+        df['id'] = np.repeat(self.events['subject'].unique(), num_pred)
			
 
				+        # add the name of the classifier to the dataframe:
			
 
				+        df['test_set'] = np.repeat(self.name, num_pred)
			
 
				+        # return the dataframe:
			
 
				+        return df
			
 
				+
			
 
				+
			
 
				+train_odd_peak = TaskData(
			
 
				+        events=df_events, condition='oddball', trial_type='stimulus',
			
 
				+        bold_delay=4, interval=1, name='train-odd_peak')
			
 
				+test_odd_peak = TaskData(
			
 
				+        events=df_events, condition='oddball', trial_type='stimulus',
			
 
				+        bold_delay=4, interval=1, name='test-odd_peak')
			
 
				+test_odd_long = TaskData(
			
 
				+        events=df_events, condition='oddball', trial_type='stimulus',
			
 
				+        bold_delay=0, interval=7, name='test-odd_long')
			
 
				+test_seq_long = TaskData(
			
 
				+        events=df_events, condition='sequence', trial_type='stimulus',
			
 
				+        bold_delay=0, interval=13, name='test-seq_long')
			
 
				+test_rep_long = TaskData(
			
 
				+        events=df_events, condition='repetition', trial_type='stimulus',
			
 
				+        bold_delay=0, interval=13, name='test-rep_long')
			
 
				+test_seq_cue = TaskData(
			
 
				+        events=df_events, condition='sequence', trial_type='cue',
			
 
				+        bold_delay=0, interval=5, name='test-seq_cue')
			
 
				+test_rep_cue = TaskData(
			
 
				+        events=df_events, condition='repetition', trial_type='cue',
			
 
				+        bold_delay=0, interval=5, name='test-rep_cue')
			
 
				+test_sets = [
			
 
				+        test_odd_peak, test_odd_long, test_seq_long, test_rep_long,
			
 
				+        test_seq_cue, test_rep_cue
			
 
				+        ]
			
 
				+'''
			
 
				+========================================================================
			
 
				+SEPARATE RUN-WISE STANDARDIZATION (Z-SCORING) OF ALL TASK CONDITIONS
			
 
				+Standardize features by removing the mean and scaling to unit variance.
			
 
				+Centering and scaling happen independently on each feature by computing
			
 
				+the relevant statistics on the samples in the training set. Here,
			
 
				+we standardize the features of all trials run-wise but separated for
			
 
				+each task condition (oddball, sequence, and repetition condition).
			
 
				+========================================================================
			
 
				+'''
			
 
				+
			
 
				+
			
 
				+def detrend(data, t_tr=1.25):
			
 
				+    from nilearn.signal import clean
			
 
				+    data_detrend = clean(
			
 
				+        signals=data, t_r=t_tr, detrend=True, standardize=False)
			
 
				+    return data_detrend
			
 
				+
			
 
				+
			
 
				+def show_weights(array):
			
 
				+    # https://stackoverflow.com/a/50154388
			
 
				+    import numpy as np
			
 
				+    import seaborn as sns
			
 
				+    n_samples = array.shape[0]
			
 
				+    classes, bins = np.unique(array, return_counts=True)
			
 
				+    n_classes = len(classes)
			
 
				+    weights = n_samples / (n_classes * bins)
			
 
				+    sns.barplot(classes, weights)
			
 
				+    plt.xlabel('class label')
			
 
				+    plt.ylabel('weight')
			
 
				+    plt.show()
			
 
				+
			
 
				+
			
 
				+def melt_df(df, melt_columns):
			
 
				+    # save the column names of the dataframe in a list:
			
 
				+    column_names = df.columns.tolist()
			
 
				+    # remove the stimulus classes from the column names;
			
 
				+    id_vars = [x for x in column_names if x not in melt_columns]
			
 
				+    # melt the dataframe creating one column with value_name and var_name:
			
 
				+    df_melt = pd.melt(
			
 
				+            df, value_name='probability', var_name='class', id_vars=id_vars)
			
 
				+    # return the melted dataframe:
			
 
				+    return df_melt
			
 
				+
			
 
				+
			
 
				+data_list = []
			
 
				+runs = list(range(1, n_run+1))
			
 
				+#mask_label = 'cv'
			
 
				+
			
 
				+logging.info('starting leave-one-run-out cross-validation')
			
 
				+for mask_label in ['cv', 'cv_hpc']:
			
 
				+    logging.info('testing in mask %s' % (mask_label))
			
 
				+    for run in runs:
			
 
				+        logging.info('testing on run %d of %d ...' % (run, len(runs)))
			
 
				+        # define the run indices for the training and test set:
			
 
				+        train_runs = [x for x in runs if x != run]
			
 
				+        test_runs = [x for x in runs if x == run]
			
 
				+        # get the feature selection mask of the current run:
			
 
				+        if mask_label == 'cv':
			
 
				+            mask_run = masks_final[runs.index(run)]
			
 
				+        elif mask_label == 'cv_hpc':
			
 
				+            mask_run = path_mask_hpc_task[runs.index(run)]
			
 
				+        # extract smoothed fMRI data from the mask for the cross-validation fold:
			
 
				+        masked_data = [masking.apply_mask(i, mask_run) for i in data_task]
			
 
				+        # detrend the masked fMRI data separately for each run:
			
 
				+        data_detrend = [detrend(i) for i in masked_data]
			
 
				+        # combine the detrended data of all runs:
			
 
				+        data_detrend = np.vstack(data_detrend)
			
 
				+        # loop through all classifiers in the classifier set:
			
 
				+        for clf_name, clf in clf_set.items():
			
 
				+            # print classifier:
			
 
				+            logging.info('classifier: %s' % clf_name)
			
 
				+            # fit the classifier to the training data:
			
 
				+            train_odd_peak.zscore(signals=data_detrend, run_list=train_runs)
			
 
				+            # get the example labels:
			
 
				+            train_stim = copy.deepcopy(train_odd_peak.stim[train_odd_peak.runs != run])
			
 
				+            # replace labels for single-label classifiers:
			
 
				+            if clf_name in class_labels:
			
 
				+                # replace all other labels with other
			
 
				+                train_stim = ['other' if x != clf_name else x for x in train_stim]
			
 
				+                # turn into a numpy array
			
 
				+                train_stim = np.array(train_stim, dtype=object)
			
 
				+            # check weights:
			
 
				+            #show_weights(array=train_stim)
			
 
				+            # train the classifier
			
 
				+            clf.fit(train_odd_peak.data_zscored, train_stim)
			
 
				+            # classifier prediction: predict on test data and save the data:
			
 
				+            for test_set in test_sets:
			
 
				+                logging.info('testing on test set %s' % test_set.name)
			
 
				+                test_set.zscore(signals=data_detrend, run_list=test_runs)
			
 
				+                if test_set.data_zscored.size < 0:
			
 
				+                    continue
			
 
				+                # create dataframe containing classifier predictions:
			
 
				+                df_pred = test_set.predict(clf=clf, run_list=test_runs)
			
 
				+                # add the current classifier as a new column:
			
 
				+                df_pred['classifier'] = np.repeat(clf_name, len(df_pred))
			
 
				+                # add a label that indicates the mask / training regime:
			
 
				+                df_pred['mask'] = np.repeat(mask_label, len(df_pred))
			
 
				+                # melt the data frame:
			
 
				+                df_pred_melt = melt_df(df=df_pred, melt_columns=train_stim)
			
 
				+                # append dataframe to list of dataframe results:
			
 
				+                data_list.append(df_pred_melt)
			
 
				+'''
			
 
				+========================================================================
			
 
				+DECODING ON RESTING STATE DATA:
			
 
				+========================================================================
			
 
				+'''
			
 
				+logging.info('Loading fMRI data of %d resting state runs ...' % len(path_rest))
			
 
				+data_rest = [image.load_img(i) for i in path_rest]
			
 
				+logging.info('loading successful!')
			
 
				+
			
 
				+# combine all masks from the feature selection by intersection:
			
 
				+def multimultiply(arrays):
			
 
				+    import copy
			
 
				+    # start with the first array:
			
 
				+    array_union = copy.deepcopy(arrays[0].astype(np.int))
			
 
				+    # loop through all arrays
			
 
				+    for i in range(len(arrays)):
			
 
				+        # multiply every array with all previous array
			
 
				+        array_union = np.multiply(array_union, copy.deepcopy(arrays[i].astype(np.int)))
			
 
				+    # return the union of all arrays:
			
 
				+    return(array_union)
			
 
				+
			
 
				+
			
 
				+for mask_label in ['union', 'union_hpc']:
			
 
				+    # calculate the union of all masks by multiplication:
			
 
				+    if mask_label == 'union':
			
 
				+        masks_union = multimultiply(tmaps_masked_thresh_bin).astype(int).astype(bool)
			
 
				+    elif mask_label == 'union_hpc':
			
 
				+        masks_union = multimultiply(mask_hpc).astype(int).astype(bool)
			
 
				+    # masked tmap into image like object:
			
 
				+    masks_union_nii = image.new_img_like(path_func_task[0], masks_union)
			
 
				+    path_save = opj(path_out_masks, '{}_task-rest_mask-{}.nii.gz'.format(sub, mask_label))
			
 
				+    masks_union_nii.to_filename(path_save)
			
 
				+    # mask all resting state runs with the averaged feature selection masks:
			
 
				+    data_rest_masked = [masking.apply_mask(i, masks_union_nii) for i in data_rest]
			
 
				+    # detrend and standardize each resting state run separately:
			
 
				+    data_rest_final = [clean(i, detrend=True, standardize=True) for i in data_rest_masked]
			
 
				+    # mask all functional task runs separately:
			
 
				+    data_task_masked = [masking.apply_mask(i, masks_union_nii) for i in data_task]
			
 
				+    # detrend each task run separately:
			
 
				+    data_task_masked_detrend = [clean(i, detrend=True, standardize=False) for i in data_task_masked]
			
 
				+    # combine the detrended data of all runs:
			
 
				+    data_task_masked_detrend = np.vstack(data_task_masked_detrend)
			
 
				+    # select only oddball data and standardize:
			
 
				+    train_odd_peak.zscore(signals=data_task_masked_detrend, run_list=runs)
			
 
				+    # write session and run labels:
			
 
				+    ses_labels = [i.split(sub + "_")[1].split("_task")[0] for i in path_rest]
			
 
				+    run_labels = [i.split("prenorm_")[1].split("_space")[0] for i in path_rest]
			
 
				+    file_names = ['_'.join([a, b]) for (a, b) in zip(ses_labels, run_labels)]
			
 
				+    rest_interval = 1
			
 
				+    # save the voxel patterns:
			
 
				+    num_voxels = len(train_odd_peak.data_zscored[0])
			
 
				+    voxel_labels = ['v' + str(x) for x in range(num_voxels)]
			
 
				+    df_patterns = pd.DataFrame(train_odd_peak.data_zscored, columns=voxel_labels)
			
 
				+    # add the stimulus labels to the dataframe:
			
 
				+    df_patterns['label'] = copy.deepcopy(train_odd_peak.stim)
			
 
				+    # add the participant id to the dataframe:
			
 
				+    df_patterns['id'] = np.repeat(df_events['subject'].unique(), len(train_odd_peak.stim))
			
 
				+    # add the mask label:
			
 
				+    df_patterns['mask'] = np.repeat(mask_label, len(train_odd_peak.stim))
			
 
				+    # split pattern dataframe by label:
			
 
				+    df_pattern_list = [df_patterns[df_patterns['label'] == i] for i in df_patterns['label'].unique()]
			
 
				+    # create file path to save the dataframe:
			
 
				+    file_paths = [opj(path_out_data, '{}_voxel_patterns_{}_{}'.format(sub, mask_label, i)) for i in df_patterns['label'].unique()]
			
 
				+    # save the final dataframe to a .csv-file:
			
 
				+    [i.to_csv(j + '.csv', sep=',', index=False) for (i, j) in zip(df_pattern_list, file_paths)]
			
 
				+    # save only the voxel patterns as niimg-like objects
			
 
				+    [masking.unmask(X=i.loc[:, voxel_labels].to_numpy(), mask_img=masks_union_nii).to_filename(j + '.nii.gz') for (i, j) in zip(df_pattern_list, file_paths)]
			
 
				+    #[image.new_img_like(path_func_task[0], i.loc[:, voxel_labels].to_numpy()).to_filename(j + '.nii.gz') for (i, j) in zip(df_pattern_list, file_paths)]
			
 
				+    # decoding on resting state data:
			
 
				+    for clf_name, clf in clf_set.items():
			
 
				+        # print classifier name:
			
 
				+        logging.info('classifier: %s' % clf_name)
			
 
				+        # get the example labels for all slow trials:
			
 
				+        train_stim = copy.deepcopy(train_odd_peak.stim)
			
 
				+        # replace labels for single-label classifiers:
			
 
				+        if clf_name in class_labels:
			
 
				+            # replace all other labels with other
			
 
				+            train_stim = ['other' if x != clf_name else x for x in train_stim]
			
 
				+            # turn into a numpy array
			
 
				+            train_stim = np.array(train_stim, dtype=object)
			
 
				+        # train the classifier
			
 
				+        clf.fit(train_odd_peak.data_zscored, train_stim)
			
 
				+        # classifier prediction: predict on test data and save the data:
			
 
				+        pred_rest_class = [clf.predict(i) for i in data_rest_final]
			
 
				+        pred_rest_proba = [clf.predict_proba(i) for i in data_rest_final]
			
 
				+        # get the class names of the classifier:
			
 
				+        classes_names = clf.classes_
			
 
				+        # save classifier predictions on resting state scans
			
 
				+        for t, name in enumerate(pred_rest_proba):
			
 
				+            # create a dataframe with the probabilities of each class:
			
 
				+            df_rest_pred = pd.DataFrame(
			
 
				+                    pred_rest_proba[t], columns=classes_names)
			
 
				+            # get the number of predictions made:
			
 
				+            num_pred = len(df_rest_pred)
			
 
				+            # get the number of trials in the test set:
			
 
				+            num_trials = int(num_pred / rest_interval)
			
 
				+            # add the predicted class label to the dataframe:
			
 
				+            df_rest_pred['pred_label'] = pred_rest_class[t]
			
 
				+            # add the true stimulus label to the dataframe:
			
 
				+            df_rest_pred['stim'] = np.full(num_pred, np.nan)
			
 
				+            # add the volume number (TR) to the dataframe:
			
 
				+            df_rest_pred['tr'] = np.arange(1, num_pred + 1)
			
 
				+            # add the sequential TR to the dataframe:
			
 
				+            df_rest_pred['seq_tr'] = np.arange(1, num_pred + 1)
			
 
				+            # add the trial number to the dataframe:
			
 
				+            df_rest_pred['trial'] = np.tile(
			
 
				+                    np.arange(1, rest_interval + 1), num_trials)
			
 
				+            # add the run number to the dataframe:
			
 
				+            df_rest_pred['run_study'] = np.repeat(run_labels[t], num_pred)
			
 
				+            # add the session number to the dataframe:
			
 
				+            df_rest_pred['session'] = np.repeat(ses_labels[t], len(df_rest_pred))
			
 
				+            # add the inter trial interval to the dataframe:
			
 
				+            df_rest_pred['tITI'] = np.tile('rest', num_pred)
			
 
				+            # add the participant id to the dataframe:
			
 
				+            df_rest_pred['id'] = np.repeat(df_events['subject'].unique(), num_pred)
			
 
				+            # add the name of the classifier to the dataframe:
			
 
				+            df_rest_pred['test_set'] = np.repeat('rest', num_pred)
			
 
				+            # add the name of the classifier to the dataframe;
			
 
				+            df_rest_pred['classifier'] = np.repeat(clf_name, num_pred)
			
 
				+            # add a label that indicates the mask / training regime:
			
 
				+            df_rest_pred['mask'] = np.repeat(mask_label, len(df_rest_pred))
			
 
				+            # melt the data frame:
			
 
				+            df_pred_melt = melt_df(df=df_rest_pred, melt_columns=train_stim)
			
 
				+            # append dataframe to list of dataframe results:
			
 
				+            data_list.append(df_pred_melt)
			
 
				+        # run classifier trained on all runs across all test sets:
			
 
				+        for test_set in test_sets:
			
 
				+            logging.info('testing on test set %s' % test_set.name)
			
 
				+            test_set.zscore(signals=data_task_masked_detrend, run_list=runs)
			
 
				+            if test_set.data_zscored.size < 0:
			
 
				+                continue
			
 
				+            # create dataframe containing classifier predictions:
			
 
				+            df_pred = test_set.predict(clf=clf, run_list=runs)
			
 
				+            # add the current classifier as a new column:
			
 
				+            df_pred['classifier'] = np.repeat(clf_name, len(df_pred))
			
 
				+            # add a label that indicates the mask / training regime:
			
 
				+            df_pred['mask'] = np.repeat(mask_label, len(df_pred))
			
 
				+            # melt the data frame:
			
 
				+            df_pred_melt = melt_df(df=df_pred, melt_columns=train_stim)
			
 
				+            # append dataframe to list of dataframe results:
			
 
				+            data_list.append(df_pred_melt)
			
 
				+
			
 
				+# concatenate all decoding dataframes in one final dataframe:
			
 
				+df_all = pd.concat(data_list, sort=False)
			
 
				+# create file path to save the dataframe:
			
 
				+file_path = opj(path_out_data, '{}_decoding.csv'.format(sub))
			
 
				+# save the final dataframe to a .csv-file:
			
 
				+df_all.to_csv(file_path, sep=',', index=False)
			
 
				+
			
 
				+'''
			
 
				+========================================================================
			
 
				+STOP LOGGING:
			
 
				+========================================================================
			
 
				+'''
			
 
				+end = time.time()
			
 
				+total_time = (end - start)/60
			
 
				+logging.info('total running time: %0.2f minutes' % total_time)
			
 
				+logging.shutdown()
			
--- a/code/docs/highspeed-decoding-docs.Rmd
+++ b/code/docs/highspeed-decoding-docs.Rmd
@@ -0,0 +1,41 @@
 
				+---
			
 
				+title: "Faster than thought: Detecting sub-second activation sequences with sequential fMRI pattern analysis"
			
 
				+subtitle: "Short project title: highspeed"
			
 
				+author:
			
 
				+  - Lennart Wittkuhn^[Max Planck Institute for Human Development, wittkuhn@mpib-berlin.mpg.de]
			
 
				+  - Nicolas W. Schuck^[Max Planck Institute for Human Development, schuck@mpib-berlin.mpg.de]
			
 
				+date: "Last update: `r format(Sys.time(), '%d %B, %Y')`"
			
 
				+output:
			
 
				+ html_document:
			
 
				+    toc: true
			
 
				+    self_contained: true
			
 
				+    toc_float: true
			
 
				+    toc_depth: 3
			
 
				+    number_sections: true
			
 
				+    highlight: pygments
			
 
				+    theme: cosmo
			
 
				+    df_print: paged
			
 
				+    fig_caption: true
			
 
				+fig.align: "center"
			
 
				+header-includes:
			
 
				+  - \usepackage{fontspec}
			
 
				+  - \setmainfont{AgfaRotisSansSerif}
			
 
				+email: wittkuhn@mpib-berlin.mpg.de
			
 
				+---
			
 
				+
			
 
				+```{r, libraries, echo=FALSE, message=FALSE, include=FALSE}
			
 
				+if (!requireNamespace("pacman")) install.packages("pacman")
			
 
				+packages_cran <- c("here")
			
 
				+pacman::p_load(char = packages_cran)
			
 
				+if (basename(here::here()) == "highspeed"){
			
 
				+  path_root = here::here("highspeed-decoding")
			
 
				+} else {
			
 
				+  path_root = here::here()
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+```{python, echo=TRUE, code=readLines(file.path(path_root, "code", "decoding", "highspeed_decoding.py")), eval=FALSE, python.reticulate=FALSE}
			
 
				+```
			
 
				+
			
 
				+```{bash, echo=TRUE, code=readLines(file.path(path_root, "code", "decoding", "highspeed-decoding-cluster.sh")), eval=FALSE}
			
 
				+```
			
--- a/highspeed-decoding.Rproj
+++ b/highspeed-decoding.Rproj
@@ -0,0 +1,13 @@
 
				+Version: 1.0
			
 
				+
			
 
				+RestoreWorkspace: No
			
 
				+SaveWorkspace: No
			
 
				+AlwaysSaveHistory: No
			
 
				+
			
 
				+EnableCodeIndexing: Yes
			
 
				+UseSpacesForTab: Yes
			
 
				+NumSpacesForTab: 2
			
 
				+Encoding: UTF-8
			
 
				+
			
 
				+RnwWeave: Sweave
			
 
				+LaTeX: pdfLaTeX