123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461 |
- #!/usr/bin/env python3
- '''
- created on Fri May 21 2021
- author: Christian Olaf Haeusler
- '''
- from glob import glob
- import argparse
- import matplotlib
- import matplotlib.pyplot as plt
- import numpy as np
- import os
- import pandas as pd
- import re
- import seaborn as sns
- import brainiak.funcalign.srm
- matplotlib.use('Agg')
- AO_USED = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 17, 18]
- AO_NAMES = {1: 'body',
- 2: 'bpart',
- 3: 'fahead',
- 4: 'furn',
- 5: 'geo',
- 6: 'groom',
- 7: 'object',
- 8: 'se_new',
- 9: 'se_old',
- 10: 'sex_f',
- 11: 'sex_m',
- 12: 'vse_new',
- 13: 'vse_old',
- 14: 'vlo_ch',
- 15: 'vpe_new',
- 16: 'vpe_old',
- 17: 'fg_ad_lrdiff',
- 18: 'fg_ad_rms'
- }
- AV_USED = [1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14]
- AV_NAMES = {1: 'vse_new',
- 2: 'vse_old',
- 3: 'vlo_ch',
- 4: 'vpe_new',
- 5: 'vpe_old',
- 6: 'vno_cut',
- 7: 'se_new (ao)',
- 8: 'se_old (ao)',
- 9: 'fg_av_ger_lr',
- 10: 'fg_av_ger_lr_diff',
- 11: 'fg_av_ger_ml',
- 12: 'fg_av_ger_pd',
- 13: 'fg_av_ger_rms',
- 14: 'fg_av_ger_ud'
- }
- VIS_USED = [1, 2, 3, 4, 5, 6]
- VIS_NAMES = {1: 'body',
- 2: 'face',
- 3: 'house',
- 4: 'object',
- 5: 'scene',
- 6: 'scramble'
- }
- def parse_arguments():
- '''
- '''
- parser = argparse.ArgumentParser(
- description="creates the correlation of convoluted regressors from \
- a subject's 1st lvl results directories (= all single run dirs ")
- parser.add_argument('-ao',
- default='inputs/studyforrest-ppa-analysis/'\
- 'sub-01/run-1_audio-ppa-grp.feat/design.mat',
- help='pattern of path/file for 1st lvl (AO) design files')
- parser.add_argument('-vis',
- default='inputs/studyforrest-data-visualrois/'\
- 'sub-01/run-1.feat/design.mat',
- help='pattern of path/file for 1st lvl (vis) design files')
- parser.add_argument('-av',
- default='inputs/studyforrest-ppa-analysis/'\
- 'sub-01/run-1_movie-ppa-grp.feat/design.mat',
- help='pattern of path/file for 1st lvl (AV) design files')
- parser.add_argument('-model',
- default='sub-01/srm-ao-av-vis_feat10-iter30.npz',
- help='the model file')
- parser.add_argument('-o',
- default='test',
- help='the output directory for the PDF and SVG file')
- args = parser.parse_args()
- aoExample = args.ao
- avExample = args.av
- visExample = args.vis
- modelFile = args.model
- outDir = args.o
- return aoExample, avExample, visExample, modelFile, outDir
- def find_files(pattern):
- '''
- '''
- def sort_nicely(l):
- '''Sorts a given list in the way that humans expect
- '''
- convert = lambda text: int(text) if text.isdigit() else text
- alphanum_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)]
- l.sort(key=alphanum_key)
- return l
- found_files = glob(pattern)
- found_files = sort_nicely(found_files)
- return found_files
- def find_design_files(example):
- '''
- '''
- # from example, create the pattern to find design files for all runs
- run = re.search('run-\d', example)
- run = run.group()
- designPattern = example.replace(run, 'run-*')
- # just in case, create substitute random subject for sub-01
- subj = re.search('sub-\d{2}', example)
- subj = subj.group()
- designPattern = designPattern.replace(subj, 'sub-01')
- designFpathes = sorted(glob(designPattern))
- return designFpathes
- def load_srm(in_fpath):
- # make np.load work with allow_pickle=True
- # save np.load
- np_load_old = np.load
- # modify the default parameters of np.load
- np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k)
- # np.load = lambda *a: np_load_old(*a, allow_pickle=True)
- # load the pickle file
- srm = brainiak.funcalign.srm.load(in_fpath)
- # change np.load() back to normal
- np.load = np_load_old
- return srm
- def plot_heatmap(title, matrix, outFpath, usedRegressors=[]):
- '''
- '''
- # generate a mask for the upper triangle
- mask = np.zeros_like(matrix, dtype=bool)
- mask[np.triu_indices_from(mask)] = True
- # set up the matplotlib figure
- f, ax = plt.subplots(figsize=(11, 9))
- # custom diverging colormap
- cmap = sns.diverging_palette(220, 10, sep=1, as_cmap=True)
- # draw the heatmap with the mask and correct aspect ratio
- sns_plot = sns.heatmap(matrix, mask=mask,
- cmap=cmap,
- square=True,
- center=0,
- vmin=-1.0, vmax=1,
- annot=True, annot_kws={"size": 8, "color": "k"}, fmt='.1f',
- # linewidths=.5,
- cbar_kws={"shrink": .6}
- )
- plt.xticks(rotation=90, fontsize=12)
- plt.yticks(rotation=0, fontsize=12)
- # plt.title(title)
- # coloring of ticklabels
- # x-axis
- if usedRegressors == AO_USED:
- # for x in range(len(srm.s_), len(srm.s_) + len(AO_USED)):
- for x in range(len(AO_USED)):
- plt.gca().get_xticklabels()[x].set_color('blue') # black = default
- # handle the sum of regressors
- for x in range(len(AO_USED), len(AO_USED) + 1):
- plt.gca().get_xticklabels()[x].set_color('cornflowerblue') # black = default
- elif usedRegressors == AV_USED:
- for x in range(len(AV_USED)):
- plt.gca().get_xticklabels()[x].set_color('red') # black = default
- elif usedRegressors == VIS_USED:
- for x in range(len(VIS_USED)):
- plt.gca().get_xticklabels()[x].set_color('y') # black = default
- # y-axis
- if usedRegressors == AO_USED:
- for y in range(len(AO_USED)):
- plt.gca().get_yticklabels()[y].set_color('blue') # black = default
- for y in range(len(AO_USED), len(AO_USED) + 1):
- plt.gca().get_yticklabels()[y].set_color('cornflowerblue') # black = default
- elif usedRegressors == AV_USED:
- for y in range(len(AV_USED)):
- plt.gca().get_yticklabels()[y].set_color('red') # black = default
- elif usedRegressors == VIS_USED:
- for y in range(len(VIS_USED)):
- plt.gca().get_yticklabels()[y].set_color('y') # black = default
- # create the output path
- os.makedirs(os.path.dirname(outFpath), exist_ok=True)
- extensions = ['pdf'] # , 'png', 'svg']
- for extension in extensions:
- fpath = os.path.join(f'{out_fpath}.{extension}')
- plt.savefig(fpath, bbox_inches='tight')
- plt.close()
- def create_aoDf(aofPathes):
- '''
- factorize / merge this function and the two functions below into one
- '''
- # specify which columns of the design file to use
- # correct for python index starting at 0
- # use every 2nd column because odd numbered columns
- # in the design file are temporal derivatives
- ao_columns = [(x-1) * 2 for x in AO_USED]
- ao_reg_names = [AO_NAMES[x] for x in AO_USED]
- # read the 8 design files and concatenate
- aoDf = pd.concat([pd.read_csv(run,
- usecols=ao_columns,
- names=ao_reg_names,
- skiprows=5, sep='\t')
- for run in aofPathes], ignore_index=True)
- # add a combination of regressors
- aoDf['geo&groom'] = aoDf['geo'] + aoDf['groom']
- # aoDf['geo&groom&furn'] = aoDf['geo'] + aoDf['groom'] + aoDf['furn']
- return aoDf
- def create_avDf(avfPathes):
- '''
- '''
- # specify which columns of the design file to use
- # correct for python index starting at 0
- # use every 2nd column because odd numbered columns
- # in the design file are temporal derivatives
- av_columns = [(x-1) * 2 for x in AV_USED]
- av_reg_names = [AV_NAMES[x] for x in AV_USED]
- # read the 8 design files and concatenate
- avDf = pd.concat([pd.read_csv(run,
- usecols=av_columns,
- names=av_reg_names,
- skiprows=5, sep='\t')
- for run in avfPathes], ignore_index=True)
- return avDf
- def create_visDf(visfPathes):
- '''
- '''
- # specify which columns of the design file to use
- # correct for python index starting at 0
- # use every 2nd column because odd numbered columns
- # in the design file are temporal derivatives
- vis_columns = [(x-1) * 2 for x in VIS_USED]
- vis_reg_names = [VIS_NAMES[x] for x in VIS_USED]
- # read the 4 design files and concatenate
- visDf = pd.concat([pd.read_csv(run,
- usecols=vis_columns,
- names=vis_reg_names,
- skiprows=5, sep='\t')
- for run in visfPathes], ignore_index=True)
- return visDf
- def create_srmDf(modelFile):
- '''
- '''
- # load the SRM from file
- srm = load_srm(modelFile)
- # slice SRM model for the TRs of the audio-description
- srm_array = srm.s_.T
- # create pandas dataframe from array and name the columns
- columns = ['shared feature %s' % str(int(x)+1) for x in range(srm_array.shape[1])]
- srmDf = pd.DataFrame(data=srm_array,
- columns=columns)
- return srmDf
- def create_corr_matrix(df1, df2, arctanh=False):
- '''
- '''
- # concat regressors and shared responses
- # slice the dataframe cause the last 75 TRs are not in the model space
- regressorsAndModelDf = pd.concat([df1, df2], axis=1)
- # create the correlation matrix for all columns
- if arctanh is True:
- regCorrMat = regressorsAndModelDf.corr()
- regCorrMat = np.arctanh(regCorrMat)
- else:
- regCorrMat = regressorsAndModelDf.corr()
- return regCorrMat
- def handle_one_or_list_of_models(regressorsDf, modelFile, start, end):
- '''it's the thought that counts (and that the function does what it does)
- '''
- if 'shuffled' not in modelFile:
- # read the model file
- srmDf = create_srmDf(modelFile)
- # reset index of SRM's df
- srm_ao_TRs = srmDf[start:end]
- srm_ao_TRs.reset_index(inplace=True, drop=True)
- # concat regressors and shared responses
- # slice the dataframe cause the last 75 TRs are not in the model space
- regCorrMat = create_corr_matrix(regressorsDf, srm_ao_TRs,
- arctanh=False)
- else:
- # find all files according to pattern
- directory = os.path.dirname(modelFile)
- modelPattern = model[:-4] + '*.npz'
- modelPattern = os.path.join(directory, modelPattern)
- shuffledModelFpathes = find_files(modelPattern)
- # read in the files and sum up all cells
- for idx, shuffledModelFile in enumerate(shuffledModelFpathes, 1):
- if idx == 1:
- print(f'reading model no. {idx}:', shuffledModelFile)
- # read the model file
- srmDf = create_srmDf(shuffledModelFile)
- # reset index of SRM's df
- srm_ao_TRs = srmDf[start:end]
- srm_ao_TRs.reset_index(inplace=True, drop=True)
- # concat regressors and shared responses
- regCorrMat = create_corr_matrix(regressorsDf, srm_ao_TRs,
- arctanh=True)
- else:
- # read the model file
- print(f'reading model no. {idx}:', shuffledModelFile)
- # reset index of SRM's df
- srm_ao_TRs = srmDf[start:end]
- srm_ao_TRs.reset_index(inplace=True, drop=True)
- # concat regressors and shared responses
- regCorrMat += create_corr_matrix(regressorsDf, srm_ao_TRs,
- arctanh=True)
- # take the mean
- regCorrMat = regCorrMat / idx
- regCorrMat = np.tanh(regCorrMat)
- return regCorrMat
- if __name__ == "__main__":
- # get the command line inputs
- aoExample, avExample, visExample, modelFile, outDir = parse_arguments()
- # infere subject number form file name
- sub = re.search('sub-\d{2}', modelFile)
- sub = sub.group()
- model = os.path.basename(modelFile).split('.npz')[0]
- # a) plot the correlation of AO regressors and features (only AO TRs)
- # get design.mat files for the 8 runs
- aofPathes = find_design_files(aoExample)
- # create the dataframe
- aoDf = create_aoDf(aofPathes)
- # indices of audio-decription TRs within the model
- start = 0
- end = 3524
- # get the correlation matrix
- regCorrMat = handle_one_or_list_of_models(aoDf, modelFile, start, end)
- # plot it
- title = f'{sub}: AO Regressors vs. Shared Features'\
- ' ({model}; TRs {start}-{end})'
- # create name of path and file (must not include ".{extension}"
- out_fpath = os.path.join(
- outDir, f'corr_ao-regressors-vs-cfs_{sub}_{model}_{start}-{end}')
- plot_heatmap(title, regCorrMat, out_fpath, AO_USED)
- # b) plot the correlation of AV regressors and features (only AV TRs)
- # get design.mat files for the 8 runs
- avfPathes = find_design_files(avExample)
- # create the dataframe
- avDf = create_avDf(avfPathes)
- # indices of movie TRs within the model
- start = 3524
- end = 7123
- # get the correlation matrix
- regCorrMat = handle_one_or_list_of_models(avDf, modelFile, start, end)
- # plot it
- title = f'{sub}: AV Regressors vs. Shared Responses' \
- ' ({model}; TRs {start}-{end})'
- # create name of path and file (must not include file extension)"
- out_fpath = os.path.join(
- outDir, f'corr_av-regressors-vs-cfs_{sub}_{model}_{start}-{end}'
- )
- plot_heatmap(title, regCorrMat, out_fpath, AV_USED)
- # c) plot the correlation of VIS regressors and features (only VIS TRs)
- # get design.mat files for the 4 runs
- visfPathes = find_design_files(visExample)
- # create the dataframe
- visDf = create_visDf(visfPathes)
- # indices of localizer TRs within the model
- start = 7123
- end = 7747
- # get the correlation matrix
- regCorrMat = handle_one_or_list_of_models(visDf, modelFile, start, end)
- # plot it
- title = f'{sub}: VIS Regressors vs. Shared Responses' \
- ' ({model}; TRs {start}-{end})'
- # create name of path and file (must not include file extension)
- out_fpath = os.path.join(
- outDir, f'corr_vis-regressors-vs-cfs_{sub}_{model}_{start}-{end}'
- )
- plot_heatmap(title, regCorrMat, out_fpath, VIS_USED)
|