소스 검색

add script for plotting stripplot of correlations

Christian O. Häusler 2 년 전
부모
커밋
671d4787bb
7개의 변경된 파일519개의 추가작업 그리고 239개의 파일을 삭제
  1. 2 1
      code/data_mask_concat_runs.py
  2. 40 36
      code/data_srm_fitting.py
  3. 22 15
      code/get_wmatrix_for_left-out.py
  4. 59 19
      code/plot_bland-altman.py
  5. 93 0
      code/plot_stripplot.py
  6. 285 168
      code/predict_ppa.py
  7. 18 0
      code/predict_ppa.submit

+ 2 - 1
code/data_mask_concat_runs.py

@@ -129,8 +129,9 @@ def process_infiles(in_fpathes):
         # reshape to voxel x TRs
         masked_new_data = np.transpose(masked_new_data)
 
+        # slice the last part of the 8th AO run such that
+        # all subjects have 263 TRs (like sub-04 in which 75 TRs are missing)
         if 'aomovie_run-8_bold_filtered' in in_fpath and 'sub-04' not in in_fpath:
-            print('slicing run-8')
             masked_new_data = masked_new_data[:, :263]
 
         # concatenate current time-series to previous time-series

+ 40 - 36
code/data_srm_fitting.py

@@ -105,10 +105,10 @@ def array_cutting(in_fpath):
     '''
     # check (hard coded) expected number of TRs
     array = np.load(in_fpath)
-    print('cutting', in_fpath)
+    print('\nloading', in_fpath)
     dim = array.shape
 
-    if dim[1] > 7123:  # all except sub-04
+    if dim[1] == 7198:  # all except sub-04
         print(in_fpath[:6], dim, '(before cutting)')
         # in case AO data come before the AV data
         # cut the last 75 TRs from the audio-description's data
@@ -137,7 +137,7 @@ def fit_srm(list_of_arrays, out_dir):
     srm = brainiak.funcalign.srm.SRM(features=n_feat, n_iter=n_iter)
 
     # fit the SRM model
-    print(f'Fitting SRM to data of all subjects except {subj}...')
+    print(f'fitting SRM to data of all subjects except {subj}...')
     srm.fit(list_of_arrays)
 
     return srm
@@ -179,6 +179,8 @@ if __name__ == "__main__":
     subj, out_dir, n_feat, n_iter = parse_arguments()
 
     # a) SRM with data from AO & AV
+    model = 'srm-ao-av'
+    print('\nProcessing data for model', model)
     # find all input files
     aoav_fpathes = find_files(AOAV_TRAIN_PATTERN)
     # filter for non-current subject
@@ -187,11 +189,10 @@ if __name__ == "__main__":
     aoav_arrays = []
     # loops through subjects (one concatenated / masked time-series per sub)
     for aoav_fpath in aoav_fpathes:
-        # ~75 TRs of run-8 in sub-04 are missing
-        # Do:
-        # a) perform zero padding
-        # corrected_array = zero_padding(in_fpath)
-        # b) cutting of all other arrays
+        # 75 TRs of run-8 in sub-04 are missing
+        ### cutting of all other arrays
+        ### not necessarry anymore 'cause TRs got cutted in the script
+        ### that performed the masking but still here as 'sanity check'
         corrected_aoav_array = array_cutting(aoav_fpath)
 
         # perform zscoring across concatenated experiments
@@ -203,7 +204,6 @@ if __name__ == "__main__":
         aoav_arrays.append(zscored_aoav_array)
 
     # fit the SRM model
-    model = 'srm-ao-av'
     aoav_srm = fit_srm(aoav_arrays, out_dir)
 
     # prepare saving results as pickle
@@ -213,29 +213,11 @@ if __name__ == "__main__":
     os.makedirs(os.path.dirname(out_fpath), exist_ok=True)
     # save it
     aoav_srm.save(out_fpath)
-    print('SRM saved to', out_fpath)
-
-    # b) SRM with shuffled AO & AV data (negative control):
-    model = 'srm-ao-av-shuffled'
-    # shuffle the arrays before fitting the model
-    # always take the same seed for every subject
-    # by deriving it from the subject's number
-    random.seed(int(subj[-2:]))
-    shuffled_aoav_arrays = shuffle_all_arrays(aoav_arrays)
-    # fit the SRM model
-    shuffled_aoav_srm = fit_srm(shuffled_aoav_arrays, out_dir)
-
-    # prepare saving results as pickle
-    out_file = f'{model}_feat{n_feat}-iter{n_iter}.npz'
-    out_fpath = os.path.join(out_dir, subj, out_file)
-    # create (sub)directories
-    os.makedirs(os.path.dirname(out_fpath), exist_ok=True)
-    # save it
-    shuffled_aoav_srm.save(out_fpath)
-    print('SRM saved to', out_fpath)
+    print('SRM (AO & AV) saved to', out_fpath)
 
-    # c) SRM with data from AO, AV, VIS
+    # b) SRM with data from AO, AV, VIS
     model = 'srm-ao-av-vis'
+    print('\nProcessing data for model', model)
     # find all input files
     vis_fpathes = find_files(VIS_TRAIN_PATTERN)
     # filter for non-current subject
@@ -246,18 +228,20 @@ if __name__ == "__main__":
     # loops through subjects (one concatenated / masked time-series per sub)
     for aoav_fpath, vis_fpath in zip(aoav_fpathes, vis_fpathes):
         # load the data of AO & AV (again; not time-efficient but what ever)
-        # ~75 TRs of run-8 in sub-04 are missing
-        # Do:
-        # a) perform zero padding
-        # corrected_array = zero_padding(in_fpath)
-        # b) cutting of all other arrays
+        # 75 TRs of run-8 in sub-04 are missing
+        ###  cutting of all other arrays
+        ### not necessarry anymore 'cause TRs got cutted in the script
+        ### that performed the masking but still here as 'sanity check'
         corrected_aoav_array = array_cutting(aoav_fpath)
 
         # load the VIS data
         print('loading', vis_fpath)
         vis_array = np.load(vis_fpath)
         dim = vis_array.shape
-        aoavvis_array = np.concatenate([corrected_aoav_array, vis_array], axis=1)
+        print(vis_fpath[:6], dim)
+        # concat AOAV data and VIS data
+        aoavvis_array = np.concatenate([corrected_aoav_array, vis_array],
+                                       axis=1)
 
         # perform zscoring across concatenated experiments
         zscored_aoavvis_array = stats.zscore(aoavvis_array,
@@ -278,3 +262,23 @@ if __name__ == "__main__":
     # save it
     aoavvis_srm.save(out_fpath)
     print('SRM (AO, AV, VIS) saved to', out_fpath)
+
+    # c) SRM with shuffled AO & AV data (negative control):
+    model = 'srm-ao-av-shuffled'
+    print('\nProcessing data for model', model)
+    # shuffle the arrays before fitting the model
+    # always take the same seed for every subject
+    # by deriving it from the subject's number
+    random.seed(int(subj[-2:]))
+    shuffled_aoav_arrays = shuffle_all_arrays(aoav_arrays)
+    # fit the SRM model
+    shuffled_aoav_srm = fit_srm(shuffled_aoav_arrays, out_dir)
+
+    # prepare saving results as pickle
+    out_file = f'{model}_feat{n_feat}-iter{n_iter}.npz'
+    out_fpath = os.path.join(out_dir, subj, out_file)
+    # create (sub)directories
+    os.makedirs(os.path.dirname(out_fpath), exist_ok=True)
+    # save it
+    shuffled_aoav_srm.save(out_fpath)
+    print('SRM (AO & AV shuffled) saved to', out_fpath)

+ 22 - 15
code/get_wmatrix_for_left-out.py

@@ -20,13 +20,6 @@ import re
 # constants
 IN_PATTERN = 'sub-??/sub-??_task_aomovie-avmovie_run-1-8_bold-filtered.npy'
 
-# which TRs do we wanna use?
-# AO indice: 0 to 3598
-# AV indices: 3599 to 7122
-# the last 75 TRs of AV were cutted because they are missing in sub-04
-# start = 0  # 3599
-# end = 451  # 3599 + 451 + 441 + 438 + 488 + 462 + 439 + 542 + (338-75)
-
 
 def parse_arguments():
     '''
@@ -116,23 +109,36 @@ if __name__ == "__main__":
     ]
     # and vary the amount of TRs used for alignment
     starts_ends = [
-        (0, 451),
-        (0, 3524),
-        (3524, 3975),
-        (3524, 7123),
-        (0, 7123)
+        (0, 451),      # AO, 1 run
+        (0, 892),      # AO, 2 runs
+        (0, 1330),     # AO, 3 runs
+        (0, 1818),     # AO, 4 runs
+        (0, 2280),     # AO, 5 runs
+        (0, 2719),     # AO, 6 runs
+        (0, 3261),     # AO, 7 runs
+        (0, 3524),     # AO, 8 runs
+        (3524, 3975),  # AV, 1 run
+        (3524, 4416),  # AV, 2 runs
+        (3524, 4854),  # AV, 3 runs
+        (3524, 5342),  # AV, 4 runs
+        (3524, 5804),  # AV, 5 runs
+        (3524, 6243),  # AV, 6 runs
+        (3524, 6785),  # AV, 7 runs
+        (3524, 7123),  # AV, 8 runs
+        (0, 7123)      # AO & AV
     ]
 
     for model in models:
         for start, end in starts_ends:
-            print(f'using {model}, {start}-{end}')
+            print(f'\nUsing {model}, {start}-{end}')
             for subj in subjs:
+                print('Processing', subj)
                 in_fpath = os.path.join(
                     in_dir, subj, f'{model}_feat{n_feat}-iter{n_iter}.npz'
                 )
 
                 # load the srm from file
-                print('Loading', in_fpath)
+                print('Loading SRM:', in_fpath)
                 srm = load_srm(in_fpath)
 
                 # leave the original srm untouched but copy it
@@ -140,6 +146,7 @@ if __name__ == "__main__":
                 srm_sliced.s_ = srm_sliced.s_[:, start:end]
 
                 in_fpath = IN_PATTERN.replace('sub-??', subj)
+                print('Loading data:', in_fpath)
                 array = np.load(in_fpath)
                 array = array[:, start:end]
                 w_matrix = srm_sliced.transform_subject(array)
@@ -151,4 +158,4 @@ if __name__ == "__main__":
                 os.makedirs(os.path.dirname(out_fpath), exist_ok=True)
                 # save it
                 np.save(out_fpath, w_matrix)
-                print(f'weight matrix for {subj} saved to', out_fpath, '\n')
+                print(f'weight matrix for {subj} saved to', out_fpath)

+ 59 - 19
code/plot_bland-altman.py

@@ -26,7 +26,8 @@ from scipy.stats import gaussian_kde
 
 
 # constants
-MASK_PTTRN = 'sub-??/masks/in_bold3Tp2/grp_PPA_bin.nii.gz'
+GRP_PPA_PTTRN = 'sub-??/masks/in_bold3Tp2/grp_PPA_bin.nii.gz'
+AO_FOV_MASK = 'sub-??/masks/in_bold3Tp2/audio_fov.nii.gz'
 GM_MASK = 'sub-??/masks/in_bold3Tp2/gm_bin_dil_fov.nii.gz'
 
 # binary mask(s) of individual visual localizer (in subject space)
@@ -35,17 +36,25 @@ PPA_MASK_PATTERN = 'inputs/studyforrest-data-visualrois/'\
 
 # individual 2nd level results (primary cope in subject space)
 PREDICTED_PREFIX = 'predicted-VIS-PPA_from_'
-
 PREDICTIONS = [
     'anatomy.nii.gz',
-    'srm-ao-av_feat10_0-451.nii.gz',
-    'srm-ao-av_feat10_0-3599.nii.gz',
-    'srm-ao-av_feat10_3599-4050.nii.gz',
-    'srm-ao-av_feat10_3599-7123.nii.gz',
     'srm-ao-av-vis_feat10_0-451.nii.gz',
-    'srm-ao-av-vis_feat10_0-3599.nii.gz',
-    'srm-ao-av-vis_feat10_3599-4050.nii.gz',
-    'srm-ao-av-vis_feat10_3599-7123.nii.gz'
+    'srm-ao-av-vis_feat10_0-892.nii.gz',
+    'srm-ao-av-vis_feat10_0-1330.nii.gz',
+    'srm-ao-av-vis_feat10_0-1818.nii.gz',
+    'srm-ao-av-vis_feat10_0-2280.nii.gz',
+    'srm-ao-av-vis_feat10_0-2719.nii.gz',
+    'srm-ao-av-vis_feat10_0-3261.nii.gz',
+    'srm-ao-av-vis_feat10_0-3524.nii.gz',
+    'srm-ao-av-vis_feat10_3524-3975.nii.gz',
+    'srm-ao-av-vis_feat10_3524-4416.nii.gz',
+    'srm-ao-av-vis_feat10_3524-4854.nii.gz',
+    'srm-ao-av-vis_feat10_3524-5342.nii.gz',
+    'srm-ao-av-vis_feat10_3524-5804.nii.gz',
+    'srm-ao-av-vis_feat10_3524-6243.nii.gz',
+    'srm-ao-av-vis_feat10_3524-6785.nii.gz',
+    'srm-ao-av-vis_feat10_3524-7123.nii.gz',
+    'srm-ao-av-vis_feat10_0-7123.nii.gz'
 ]
 
 
@@ -356,6 +365,35 @@ def create_mosaic(in_pattern, dims, out_fpath, dpi):
          out_fpath])
 
 
+def load_mask(subj):
+    '''
+    '''
+    # open the mask of cortices (at the moment it justs the union of
+    # individual PPAs)
+    grp_ppa_fpath = GRP_PPA_PTTRN.replace('sub-??', subj)
+#    print('PPA GRP mask:\t', grp_ppa_fpath)
+
+    # subject-specific field of view in audio-description
+    ao_fov_mask = AO_FOV_MASK.replace('sub-??', subj)
+#    print('ind. FoV mask:\t', ao_fov_mask)
+
+    # load the masks
+    grp_ppa_img = nib.load(grp_ppa_fpath)
+    ao_fov_img = nib.load(ao_fov_mask)
+
+    # (dilated) gray matter mask; see constat at script's top
+    #gm_fpath = GM_MASK.replace('sub-??', subj)
+    #gm_img = nib.load(gm_fpath)
+    #final_mask_data = grp_ppa_img.get_fdata() * gm_img.get_fdata()
+
+    final_mask_data = grp_ppa_img.get_fdata() * ao_fov_img.get_fdata()
+    final_mask_img = nib.Nifti1Image(final_mask_data,
+                                     grp_ppa_img.affine,
+                                     header=grp_ppa_img.header)
+
+    return final_mask_img
+
+
 if __name__ == "__main__":
     # get command line argument
     inDir, outDir = parse_arguments()
@@ -368,7 +406,7 @@ if __name__ == "__main__":
     subjs = [re.search(r'sub-..', string).group() for string in subjs_pathes]
     subjs = sorted(list(set(subjs)))
 
-    for prediction in PREDICTIONS:
+    for prediction in PREDICTIONS[:]:
         # loop over subjects
         for subj in subjs:
             print('\nProcessing', subj)
@@ -392,21 +430,23 @@ if __name__ == "__main__":
             # put the array into the dataframe
             zmaps_df['vis'] = np.ndarray.flatten(vis_data)
 
-            # create the mask by combining PPA group overlap (in bold3TP)
-            # and the subject-specific gray matter mask
-            grp_ppa_fpath = MASK_PTTRN.replace('sub-??', subj)
-            grp_ppa_img = nib.load(grp_ppa_fpath)
-            gm_mask = GM_MASK.replace('sub-??', subj)
-            gm_img = nib.load(gm_mask)
-            # combine the mask
-            final_mask_data = grp_ppa_img.get_fdata() * gm_img.get_fdata()
+            # load the GRP PPA mask (in subject space)
+            # (GRP PPA will be masked with individual FoV)
+            final_mask_img = load_mask(subj)
+            final_mask_data = final_mask_img.get_fdata()
             # put the array into the dataframe
             zmaps_df['ppa_grp'] = np.ndarray.flatten(final_mask_data)
 
             # load the subject-specific PPA mask (Sengupta et al., 2016)
             ppa_fpathes = find_files(PPA_MASK_PATTERN.replace('sub-*', subj))
             ppaIndData = fmri_dataset(ppa_fpathes).samples.sum(axis=0)
-            zmaps_df['ppa_ind'] = np.ndarray.flatten(ppaIndData)
+            # masked it with individual FoV (just to be sure)
+            ao_fov_fpath = find_files(AO_FOV_MASK.replace('sub-??', subj))
+            ao_fov_data = fmri_dataset(ao_fov_fpath).samples
+
+            ppaIndMasked = ppaIndData * ao_fov_data
+
+            zmaps_df['ppa_ind'] = np.ndarray.flatten(ppaIndMasked)
 
             # let the function do the calculation and the plotting
             process_df(subj, zmaps_df, outDir, prediction)

+ 93 - 0
code/plot_stripplot.py

@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+'''
+created on Mon March 29th 2021
+author: Christian Olaf Haeusler
+'''
+
+from glob import glob
+import argparse
+import ipdb
+import matplotlib.pyplot as plt
+import os
+import pandas as pd
+import seaborn as sns
+
+
+def parse_arguments():
+    '''
+    '''
+    parser = argparse.ArgumentParser(
+        description='loads a csv file to plot data as stripplot')
+
+    parser.add_argument('-infile',
+                        required=False,
+                        default='test/corr-empVIS-vs-func.csv',
+                        help='the data as csv')
+
+    parser.add_argument('-outdir',
+                        required=False,
+                        default='test',
+                        help='output directory')
+
+    args = parser.parse_args()
+
+    infile = args.infile
+    outdir = args.outdir
+
+    return infile, outdir
+
+
+def find_files(pattern):
+    '''
+    '''
+    def sort_nicely(l):
+        '''Sorts a given list in the way that humans expect
+        '''
+        convert = lambda text: int(text) if text.isdigit() else text
+        alphanum_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)]
+        l.sort(key=alphanum_key)
+
+        return l
+
+    found_files = glob(pattern)
+    found_files = sort_nicely(found_files)
+
+    return found_files
+
+
+if __name__ == "__main__":
+    # read command line arguments
+    inFile, outDir = parse_arguments()
+
+
+
+    # close figure
+    plt.close()
+
+    df = pd.read_csv(inFile)
+
+    sns.set_theme(style='whitegrid')
+
+    ax = sns.stripplot(x='runs',
+                       y="Pearson's r",
+                       hue='prediction from',
+                       jitter=0.2,
+                       linewidth=1,
+                       dodge=True,
+                       data=df)
+
+    # prepare name of output file
+    if 'VIS' in inFile:
+        which_PPA = 'VIS'
+    elif 'AO' in inFile:
+        which_PPA = 'AO'
+    else:
+        print('unkown predicted PPA (must be VIS or AO)')
+
+    plt.savefig(f'{outDir}/correlations-{which_PPA}-stripplot.svg',
+                bbox_inches='tight')
+
+    plt.savefig(f'{outDir}/correlations-{which_PPA}-stripplot.png',
+                bbox_inches='tight')
+
+    plt.show()

+ 285 - 168
code/predict_ppa.py

@@ -14,6 +14,7 @@ import nibabel as nib
 import ipdb
 import numpy as np
 import os
+import pandas as pd
 import re
 import subprocess
 
@@ -43,7 +44,6 @@ AO_ZMAP_PATTERN = 'inputs/studyforrest-ppa-analysis/'\
 VIS_ZMAP_PATTERN = 'inputs/studyforrest-data-visualrois/'\
     'sub-*/2ndlvl.gfeat/cope*.feat/stats/zstat1.nii.gz'
 
-
 # contrast used by Sengupta et al. (2016) to create the PPA mask
 VIS_VPN_COPES = OrderedDict({  # dicts are ordered from Python 3.7
     'sub-01': 'cope8',
@@ -63,6 +63,34 @@ VIS_VPN_COPES = OrderedDict({  # dicts are ordered from Python 3.7
 })
 
 
+# the models that we have
+models = [
+    'srm-ao-av',
+    'srm-ao-av-vis'
+]
+
+# and vary the amount of TRs used for alignment
+starts_ends = [
+    (0, 451,  'AO', 1),     # AO, 1 run
+    (0, 892,  'AO', 2),     # AO, 2 runs
+    (0, 1330, 'AO', 3),     # AO, 3 runs
+    (0, 1818, 'AO', 4),     # AO, 4 runs
+    (0, 2280, 'AO', 5),     # AO, 5 runs
+    (0, 2719, 'AO', 6),     # AO, 6 runs
+    (0, 3261, 'AO', 7),     # AO, 7 runs
+    (0, 3524, 'AO', 8),     # AO, 8 runs
+    # (0,    7123, 16),  # AO & AV
+    (3524, 3975, 'AV', 1),  # AV, 1 run
+    (3524, 4416, 'AV', 2),  # AV, 2 runs
+    (3524, 4854, 'AV', 3),  # AV, 3 runs
+    (3524, 5342, 'AV', 4),  # AV, 4 runs
+    (3524, 5804, 'AV', 5),  # AV, 5 runs
+    (3524, 6243, 'AV', 6),  # AV, 6 runs
+    (3524, 6785, 'AV', 7),  # AV, 7 runs
+    (3524, 7123, 'AV', 8)  # AV, 8 runs
+]
+
+
 def parse_arguments():
     '''
     '''
@@ -118,70 +146,77 @@ def find_files(pattern):
     return found_files
 
 
-def transform_ind_vis_ppas(subjs):
+def transform_ind_ppas(zmap_fpathes, subjs):
     '''
     '''
     for source_subj in subjs:
-        # name of output fule
-        out_path = (os.path.join(in_dir, 'masks', 'in_mni'))
-        out_file = f'{source_subj}_VIS-PPA.nii.gz'
-        out_fpath = os.path.join(out_path, out_file)
-        os.makedirs(out_path, exist_ok=True)
         # filter pathes for the current subject
         zmap_fpath = [x for x in zmap_fpathes if source_subj in x][0]
+        # name of output fule
+        out_path = (os.path.join(in_dir, 'masks', 'in_mni'))
 
-        if not os.path.exists(out_fpath):
-            print(f'{source_subj}: from bold3Tp to MNI using {zmap_fpath}')
+        if 'studyforrest-data-visualrois' in zmap_fpath:
+            out_file = f'{source_subj}_VIS-PPA.nii.gz'
+            out_fpath = os.path.join(out_path, out_file)
+        elif '2nd-lvl_audio-ppa-ind' in zmap_fpath:
+            out_file = f'{source_subj}_AO-PPA.nii.gz'
+            out_fpath = os.path.join(out_path, out_file)
+        else:
+            print('unkown source for PPA (must be from VIS or AO)')
+            continue
+
+        # create the output path
+        os.makedirs(out_path, exist_ok=True)
 
-            subj2templWarp = os.path.join(TNT_DIR,
-                                        source_subj,
-                                        'bold3Tp2/in_grpbold3Tp2/'
-                                        'subj2tmpl_warp.nii.gz'
-                                        )
+        print(f'{source_subj}: from bold3Tp to MNI using {zmap_fpath}')
+        #
+        subj2templWarp = os.path.join(TNT_DIR,
+                                    source_subj,
+                                    'bold3Tp2/in_grpbold3Tp2/'
+                                    'subj2tmpl_warp.nii.gz'
+                                    )
 
-            # warp the subjec-specific VIS PPA to MNI space
-            warp_subj_to_mni(zmap_fpath, out_fpath,
-                            subj2templWarp, XFM_REF)
+        # warp the subjec-specific VIS PPA to MNI space
+        warp_subj_to_mni(zmap_fpath, out_fpath,
+                        subj2templWarp, XFM_REF)
 
         # warp from MNI to every other subject space
         ppa_in_mni_fpath = out_fpath
-        if not os.path.exists(out_fpath):
-            print(f'{source_subj}: from MNI to other subjs using {ppa_in_mni_fpath}')
-            for target_subj in subjs:
-                # do not transform the current's subject volume back
-                # into its own bold3Tp2
-                if target_subj != source_subj:
-                    # create the output path & filename
-                    out_path = os.path.join(in_dir,
-                                            target_subj,
-                                            'masks',
-                                            'in_bold3Tp2'
-                                            )
-                    os.makedirs(out_path, exist_ok=True)
-                    out_file = os.path.basename(ppa_in_mni_fpath)
-                    out_fpath = os.path.join(out_path, out_file)
-
-                    # the path of the (individual) reference image
-                    subj_ref = os.path.join(TNT_DIR,
-                                            target_subj,
-                                            'bold3Tp2/brain.nii.gz')
-
-                    # the volume providing warp/coefficient
-                    subj_warp = os.path.join(TNT_DIR,
-                                            target_subj,
-                                            'bold3Tp2/in_grpbold3Tp2/'
-                                            'tmpl2subj_warp.nii.gz'
-                                            )
-
-                    # do the warping from MNI to bold3Tp2 by calling the function
-                    if not os.path.exists(out_fpath):
-                        print(out_fpath)
-                        warp_mni_to_subj(
-                            ppa_in_mni_fpath,
-                            out_fpath,
-                            subj_ref,
-                            subj_warp
-                        )
+        print(f'{source_subj}: from MNI to other subjs using {ppa_in_mni_fpath}')
+        for target_subj in subjs:
+            # do not transform the current's subject volume back
+            # into its own bold3Tp2
+            if target_subj != source_subj:
+                # create the output path & filename
+                out_path = os.path.join(in_dir,
+                                        target_subj,
+                                        'masks',
+                                        'in_bold3Tp2'
+                                        )
+                os.makedirs(out_path, exist_ok=True)
+                out_file = os.path.basename(ppa_in_mni_fpath)
+                out_fpath = os.path.join(out_path, out_file)
+
+                # the path of the (individual) reference image
+                subj_ref = os.path.join(TNT_DIR,
+                                        target_subj,
+                                        'bold3Tp2/brain.nii.gz')
+
+                # the volume providing warp/coefficient
+                subj_warp = os.path.join(TNT_DIR,
+                                        target_subj,
+                                        'bold3Tp2/in_grpbold3Tp2/'
+                                        'tmpl2subj_warp.nii.gz'
+                                        )
+
+                # do the warping from MNI to bold3Tp2 by calling the function
+                print('warp to', out_fpath)
+                warp_mni_to_subj(
+                    ppa_in_mni_fpath,
+                    out_fpath,
+                    subj_ref,
+                    subj_warp
+                )
 
     return None
 
@@ -247,11 +282,11 @@ def load_mask(subj):
     # open the mask of cortices (at the moment it justs the union of
     # individual PPAs)
     grp_ppa_fpath = GRP_PPA_PTTRN.replace('sub-??', subj)
-    print('PPA GRP mask:\t', grp_ppa_fpath)
+#    print('PPA GRP mask:\t', grp_ppa_fpath)
 
     # subject-specific field of view in audio-description
     ao_fov_mask = AO_FOV_MASK.replace('sub-??', subj)
-    print('ind. FoV mask:\t', ao_fov_mask)
+#    print('ind. FoV mask:\t', ao_fov_mask)
 
     # load the masks
     grp_ppa_img = nib.load(grp_ppa_fpath)
@@ -285,7 +320,48 @@ def load_srm(in_fpath):
     return srm
 
 
-def predict_from_cms(left_out_subj, subjs, zmap_fpathes):
+def transform_in_n_out(masked_zmaps, srm, wmatrix):
+    '''
+    '''
+    # a) solution without brainIAK
+    zmaps_in_ind = []
+    for zmap, left_out_wmatrix in zip(masked_zmaps, srm.w_):
+        intermediate_matrix = wmatrix.dot(left_out_wmatrix.T)
+        zmap_in_ind = intermediate_matrix.dot(zmap)
+        zmap_in_ind = np.transpose(zmap_in_ind)
+        zmaps_in_ind.append(zmap_in_ind)
+
+    # b) solution with brainIAK
+    # aligned zmaps to shared space
+    # k feautures x t time-points
+    # (1 time-point cause it's a zmap no time-series)
+    zmaps_in_cms = srm.transform(masked_zmaps)
+
+#     ### THIS IS TAKING THE MEAN OF 'zmaps' aligned in CMS
+#     # get the mean of features x t time-points
+#     matrix = np.stack(zmaps_in_cms)
+#     zmaps_cms_mean = np.mean(matrix, axis=0)
+#
+#     # transform from CMS into vol
+#     predicted = np.matmul(wmatrix, zmaps_cms_mean)
+#     predicted_data = np.transpose(predicted)
+
+    # transform every zmap into left-out subjects space first
+    # then take the mean
+    zmaps_in_ind = []
+    for zmap_in_cms in zmaps_in_cms:
+        zmap_in_left_out = np.matmul(wmatrix, zmap_in_cms)
+        zmap_in_left_out = np.transpose(zmap_in_left_out)
+        zmaps_in_ind.append(zmap_in_left_out)
+
+    # take the mean
+    matrix = np.stack(zmaps_in_ind)
+    predicted_data = np.mean(matrix, axis=0)
+
+    return predicted_data
+
+
+def predict_from_cms(left_out_subj, subjs, zmap_fpathes, start, end):
     '''
     '''
     # get a list of non-left-out subjects
@@ -316,6 +392,7 @@ def predict_from_cms(left_out_subj, subjs, zmap_fpathes):
         zmap_img = nib.load(zmap_fpath)
 
         # load the mask for current subject
+#        print('masking empirical data of non-left-out subject')
         mask_img = load_mask(other_subj)
         # create instance of NiftiMasker
         other_subj_masker = NiftiMasker(mask_img=mask_img)
@@ -328,33 +405,11 @@ def predict_from_cms(left_out_subj, subjs, zmap_fpathes):
 #         print(f'shape of weight matrix: {srm.w_[other_subjs.index(other_subj)].shape}')
         masked_zmaps.append(masked_data)
 
-    # aligned zmaps to shared space
-    # k feautures x t time-points
-    # (1 time-point cause it's a zmap no time-series)
-    zmaps_in_cms = srm.transform(masked_zmaps)
-
-#     ### THIS IS TAKING THE MEAN OF 'zmaps' aligned in CMS
-#     # get the mean of features x t time-points
-#     matrix = np.stack(zmaps_in_cms)
-#     zmaps_cms_mean = np.mean(matrix, axis=0)
-#
-#     # transform from CMS into vol
-#     predicted = np.matmul(wmatrix, zmaps_cms_mean)
-#     predicted_data = np.transpose(predicted)
-
-    # transform every zmap into left-out subjects space first
-    # then take the mean
-    zmaps_in_ind = []
-    for zmap_in_cms in zmaps_in_cms:
-        zmap_in_left_out = np.matmul(wmatrix, zmap_in_cms)
-        zmap_in_left_out = np.transpose(zmap_in_left_out)
-        zmaps_in_ind.append(zmap_in_left_out)
-
-    # take the mean
-    matrix = np.stack(zmaps_in_ind)
-    predicted_data = np.mean(matrix, axis=0)
+    # transform the data into the target subject's space
+    predicted_data = transform_in_n_out(masked_zmaps, srm, wmatrix)
 
     # get the mask of the left-out subject to perform its inverse transform
+#    print('loading mask to perform inverse transform')
     mask_img = load_mask(left_out_subj)
     # create instance of NiftiMasker
     nifti_masker = NiftiMasker(mask_img=mask_img)
@@ -365,11 +420,13 @@ def predict_from_cms(left_out_subj, subjs, zmap_fpathes):
 
     # adjust the name of the output file according to the input:
     if 'studyforrest-data-visualrois' in zmap_fpathes[0]:
-        out_fpath = os.path.join(in_dir, left_out_subj,
-                                    f'predicted-VIS-PPA_from_{model}_feat{n_feat}_{start}-{end}.nii.gz')
-    elif 'studyforrest-ppa-analysis' in zmap_fpathes[0]:
-        out_fpath = os.path.join(in_dir, left_out_subj,
-                                    f'predicted-AO-PPA_from_{model}_feat{n_feat}_{start}-{end}.nii.gz')
+        out_fpath = os.path.join(
+            in_dir, left_out_subj,
+            f'predicted-VIS-PPA_from_{model}_feat{n_feat}_{start}-{end}.nii.gz')
+    elif '2nd-lvl_audio-ppa-ind' in zmap_fpathes[0]:
+        out_fpath = os.path.join(
+            in_dir, left_out_subj,
+            f'predicted-AO-PPA_from_{model}_feat{n_feat}_{start}-{end}.nii.gz')
 
     # save it
     nib.save(predicted_img, out_fpath)
@@ -377,20 +434,29 @@ def predict_from_cms(left_out_subj, subjs, zmap_fpathes):
     return predicted_data
 
 
-def predict_from_ana(left_out_subj, subjs):
+def predict_from_ana(left_out_subj, nifti_masker, subjs, zmap_fpathes):
     '''
     '''
-    # load all others subjects zmaps and stack concat them
+    # filter out the left out subj
     other_subjs = [x for x in subjs if x != left_out_subj]
 
     ppa_arrays = []
     for other_subj in other_subjs:
         # open the other subject's PPA z-map which was transformed in the
         # subject-specific space of the current subject
+        if 'studyforrest-data-visualrois' in zmap_fpathes[0]:
+            which_PPA = 'VIS'
+        elif '2nd-lvl_audio-ppa-ind' in zmap_fpathes[0]:
+            which_PPA = 'AO'
+        else:
+            print('unkown source for PPA (must be from VIS or AO)')
+            break
+
         ppa_img_fpath = os.path.join(
             in_dir, left_out_subj,
             'masks', 'in_bold3Tp2',
-            f'{other_subj}_VIS-PPA.nii.gz')
+            f'{other_subj}_{which_PPA}-PPA.nii.gz')
+
         ppa_img = nib.load(ppa_img_fpath)
         masked_array = nifti_masker.transform(ppa_img)
         masked_array = np.transpose(masked_array)
@@ -407,107 +473,158 @@ def predict_from_ana(left_out_subj, subjs):
 
     out_fpath = os.path.join(in_dir,
                              left_out_subj,
-                             f'predicted-VIS-PPA_from_anatomy.nii.gz')
+                             f'predicted-{which_PPA}-PPA_from_anatomy.nii.gz')
 
     nib.save(mean_anat_img, out_fpath)
 
     return mean_anat_arr
 
 
+def run_the_predictions(zmap_fpathes, subjs):
+    '''
+    '''
+    # just take the second model that includes VIS
+    for model in models[1:]:
+        print(f'model:\t{model}_feat{n_feat}_iter{n_iter}.npz')
+
+        # the list for the dataframe that will build from the list
+        for_dataframe = []
+
+        # compute the anatomical prediction for every subject first
+        empirical_arrays = []
+        anat_pred_arrays = []
+
+        for left_out_subj in subjs[:]:
+            # load the subject's zmap of the PPA contrast
+            zmap_fpath = [x for x in zmap_fpathes if left_out_subj in x][0]
+            zmap_img = nib.load(zmap_fpath)
+
+            # load the mask (combines PPA + gray matter)
+            mask_img = load_mask(left_out_subj)
+            # create instance of NiftiMasker used to mask the 4D time-series
+            nifti_masker = NiftiMasker(mask_img=mask_img)
+            nifti_masker.fit(zmap_img)
+
+            # mask the VIS z-map
+            masked_data = nifti_masker.transform(zmap_img)
+            masked_data = np.transpose(masked_data)
+            masked_data = masked_data.flatten()
+            empirical_arrays.append(masked_data)
+
+            # predict from anatomy
+            anat_pred_array = predict_from_ana(left_out_subj,
+                                               nifti_masker,
+                                               subjs,
+                                               zmap_fpathes)
+            # append to the list of arrays
+            anat_pred_arrays.append(anat_pred_array.flatten())
+
+        # compute the correlations of empirical vs. prediction from anatomy
+        emp_vs_anat = [stats.pearsonr(empirical_arrays[idx],
+                                      anat_pred_arrays[idx]) for idx
+                                      in range(len(empirical_arrays))]
+
+        # compute the functional prediction for increasing number of
+        # fMRI runs used
+        for start, end, stim, runs in starts_ends[:]:
+            print(f'\nTRs:\t{start}-{end}')
+
+            # following list will hold, for every subject, an array
+            # of z-values
+            func_pred_arrays = []
+
+            # for the current number of runs used, loop through the subjects
+            for left_out_subj in subjs[:]:
+                # predict from CMS
+                func_pred_array = predict_from_cms(left_out_subj,
+                                                   subjs,
+                                                   zmap_fpathes,
+                                                   start,
+                                                   end)
+                # append to the list of arrays
+                func_pred_arrays.append(func_pred_array.flatten())
+
+            # compute the correlations of empirical vs. prediction from
+            # functional alignment (with currently used no. of runs)
+            emp_vs_func = [stats.pearsonr(empirical_arrays[idx],
+                                          func_pred_arrays[idx]) for idx
+                                          in range(len(empirical_arrays))]
+
+            # print the result of the currently used runs / stimulus
+            print('subject\temp. vs. anat\temp. vs. cms')
+
+            for idx, subj in enumerate(subjs):
+                print(f'{subj}\t{round(emp_vs_anat[idx][0], 2)}\t{round(emp_vs_func[idx][0], 2)}')
+
+            # get the data for the currently used TRs for aligment in shape
+            # so they can later be stored in the dataframe
+            if stim == 'AO':
+                predictor = 'audio-description'
+            elif stim == 'AV':
+                predictor = 'movie'
+
+            func_lines = [[subj, predictor, runs, corr[0]] for subj, corr in zip(subjs, emp_vs_func)]
+            # list of line for the dataframe
+            for_dataframe.extend(func_lines)
+
+        # add the correlations of prediction from anatomy vs. empirical data
+        anat_lines = [[subj, 'anatomy', 0, corr[0]]
+                      for subj, corr in zip(subjs, emp_vs_anat)]
+
+        # put the correlations per subject into the dataframe
+        for_dataframe.extend(anat_lines)
+
+        # prepare the dataframe for the current model
+        df = pd.DataFrame(for_dataframe, columns = ['sub',
+                                                    'prediction from',
+                                                    'runs',
+                                                    'Pearson\'s r'])
+
+        # adjust the name of the output file according to the input:
+        if 'studyforrest-data-visualrois' in zmap_fpathes[0]:
+            which_PPA = 'VIS'
+
+        elif '2nd-lvl_audio-ppa-ind' in zmap_fpathes[0]:
+            which_PPA = 'AO'
+
+        # save the dataframe for the currently used CMS
+        df.to_csv(f'{in_dir}/corr-emp{which_PPA}-vs-func.csv', index=False)
+
+    return None
+
+
 if __name__ == "__main__":
     # read command line arguments
     in_dir, model, n_feat, n_iter = parse_arguments()
 
-    # loob through the models
-    models = [
-        'srm-ao-av',
-        'srm-ao-av-vis'
-    ]
-    # and vary the amount of TRs used for alignment
-    starts_ends = [
-        (0, 451),
-        (0, 3524),
-        (3524, 3975),
-        (3524, 7123),
-        (0, 7123)
-    ]
-
     # get the subjects for which data are available
     subjs_path_pattern = 'sub-??'
     subjs_pathes = find_files(subjs_path_pattern)
     subjs = [re.search(r'sub-..', string).group() for string in subjs_pathes]
     subjs = sorted(list(set(subjs)))
 
+    # for later prediction from anatomy, we need to transform the
+    # subject-specific z-maps from the localizer into MNI space
+    # (and later transform then into the subject-space of the left-out subject
+
+    ### VIS LOCALIZER PPA
     # create the list of all subjects' VIS zmaps by substituting the
     # subject's string and the correct cope that was used in Sengupta et al.
     zmap_fpathes = [
         (VIS_ZMAP_PATTERN.replace('sub-*', x[0]).replace('cope*', x[1]))
         for x in VIS_VPN_COPES.items()]
 
-    for start, end in starts_ends:
-        for model in models:
-            print(f'\nTRs:\t{start}-{end}')
-            print(f'model:\t{model}_feat{n_feat}_iter{n_iter}.npz')
-
-            ### just in case I wanna predict the AO PPA again
-        #         zmap_fpathes = [
-        #             (AO_ZMAP_PATTERN.replace('sub-*', x[0]).replace('cope*', x[1]))
-        #             for x in VIS_VPN_COPES.items()]
-
-            # for later prediction from anatomy, we need to transform the
-            # subject-specific z-maps from the localizer into MNI space
-            # (and later transform then into the subject-space of the left-out subject
-#             print('\nTransforming VIS z-maps into MNI and into other subjects\' space')
-            transform_ind_vis_ppas(subjs)
-
-            # the containers to store the masked & flattened zmaps from all subjects
-            empirical_arrays = []
-            anat_pred_arrays = []
-            func_pred_arrays = []
+    print('\nTransforming VIS z-maps into MNI and into other subjects\' space')
+    transform_ind_ppas(zmap_fpathes, subjs)
 
-            for left_out_subj in subjs[:]:
-#                 print(f'Doing predictions for {left_out_subj} as left-out subject')
-                # load the subject's zmap of the PPA contrast
-                zmap_fpath = [x for x in zmap_fpathes if left_out_subj in x][0]
-                zmap_img = nib.load(zmap_fpath)
-
-                # load the mask (combines PPA + gray matter)
-                mask_img = load_mask(left_out_subj)
-                # create instance of NiftiMasker used to mask the 4D time-series
-                nifti_masker = NiftiMasker(mask_img=mask_img)
-                nifti_masker.fit(zmap_img)
-
-                # mask the VIS z-map
-                masked_data = nifti_masker.transform(zmap_img)
-                masked_data = np.transpose(masked_data)
-                masked_data = masked_data.flatten()
-                empirical_arrays.append(masked_data)
-
-                # predict from anatomy
-                anat_pred_array = predict_from_ana(left_out_subj,
-                                                   subjs)
-                # append to the list of arrays
-                anat_pred_arrays.append(anat_pred_array.flatten())
+    run_the_predictions(zmap_fpathes, subjs)
 
-                # predict from CMS
-                func_pred_array = predict_from_cms(left_out_subj,
-                                                   subjs,
-                                                   zmap_fpathes)
-                # append to the list of arrays
-                func_pred_arrays.append(func_pred_array.flatten())
-
-            # show results
-            emp_vs_func = [stats.pearsonr(empirical_arrays[idx],
-                                        func_pred_arrays[idx]) for idx
-                                        in range(len(empirical_arrays))]
+    ### AO STIMULUS PPA
+    zmap_fpathes = [
+        (AO_ZMAP_PATTERN.replace('sub-*', x[0]))
+        for x in VIS_VPN_COPES.items()]
 
-            emp_vs_anat = [stats.pearsonr(empirical_arrays[idx],
-                                        anat_pred_arrays[idx]) for idx
-                                        in range(len(empirical_arrays))]
+    print('\nTransforming AO z-maps into MNI and into other subjects\' space')
+    transform_ind_ppas(zmap_fpathes, subjs)
 
-            print('subject\temp. vs. anat\temp. vs. cms')
-            for idx, subj in enumerate(subjs):
-                print(f'{subj}\t{round(emp_vs_anat[idx][0], 2)}\t{round(emp_vs_func[idx][0], 2)}')
-#             # calculate the pearson correlation between mean correlation of
-#             # empirical vs. prediction via anatomy
-#             # empirical vs. prediction via functional alignment
+    run_the_predictions(zmap_fpathes, subjs)

+ 18 - 0
code/predict_ppa.submit

@@ -0,0 +1,18 @@
+universe = vanilla
+getenv   = True
+request_cpus = 1
+request_memory = 4G
+
+# Outputs
+output = condor_logs/$(CLUSTER).$(PROCESS).out
+error = condor_logs/$(CLUSTER).$(PROCESS).err
+log = condor_logs/$(CLUSTER).$(PROCESS).log
+
+# Execution
+initialdir = /data/group/psyinf/studyforrest-srm-movies
+executable = /bin/bash
+transfer_executable = False
+should_transfer_files = NO
+
+arguments = ./code/code/predict_ppa.py -indir 'test' -nfeat '10'
+queue