2 rokov pred · 537ec90433
--- a/code/__pycache__/corrstats.cpython-37.pyc
+++ b/code/__pycache__/corrstats.cpython-37.pyc
--- a/code/corrstats.py
+++ b/code/corrstats.py
@@ -0,0 +1,112 @@
 
				+"""
			
 
				+Functions for calculating the statistical significant differences between two dependent or independent correlation
			
 
				+coefficients.
			
 
				+The Fisher and Steiger method is adopted from the R package http://personality-project.org/r/html/paired.r.html
			
 
				+and is described in detail in the book 'Statistical Methods for Psychology'
			
 
				+The Zou method is adopted from http://seriousstats.wordpress.com/2012/02/05/comparing-correlations/
			
 
				+Credit goes to the authors of above mentioned packages!
			
 
				+
			
 
				+Author: Philipp Singer (www.philippsinger.info)
			
 
				+"""
			
 
				+
			
 
				+from __future__ import division
			
 
				+
			
 
				+__author__ = 'psinger'
			
 
				+
			
 
				+import numpy as np
			
 
				+from scipy.stats import t, norm
			
 
				+from math import atanh, pow
			
 
				+from numpy import tanh
			
 
				+
			
 
				+def rz_ci(r, n, conf_level = 0.95):
			
 
				+    zr_se = pow(1/(n - 3), .5)
			
 
				+    moe = norm.ppf(1 - (1 - conf_level)/float(2)) * zr_se
			
 
				+    zu = atanh(r) + moe
			
 
				+    zl = atanh(r) - moe
			
 
				+    return tanh((zl, zu))
			
 
				+
			
 
				+def rho_rxy_rxz(rxy, rxz, ryz):
			
 
				+    num = (ryz-1/2.*rxy*rxz)*(1-pow(rxy,2)-pow(rxz,2)-pow(ryz,2))+pow(ryz,3)
			
 
				+    den = (1 - pow(rxy,2)) * (1 - pow(rxz,2))
			
 
				+    return num/float(den)
			
 
				+
			
 
				+def dependent_corr(xy, xz, yz, n, twotailed=True, conf_level=0.95, method='steiger'):
			
 
				+    """
			
 
				+    Calculates the statistic significance between two dependent correlation coefficients
			
 
				+    @param xy: correlation coefficient between x and y
			
 
				+    @param xz: correlation coefficient between x and z
			
 
				+    @param yz: correlation coefficient between y and z
			
 
				+    @param n: number of elements in x, y and z
			
 
				+    @param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method
			
 
				+    @param conf_level: confidence level, only works for 'zou' method
			
 
				+    @param method: defines the method uses, 'steiger' or 'zou'
			
 
				+    @return: t and p-val
			
 
				+    """
			
 
				+    if method == 'steiger':
			
 
				+        d = xy - xz
			
 
				+        determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz
			
 
				+        av = (xy + xz)/2
			
 
				+        cube = (1 - yz) * (1 - yz) * (1 - yz)
			
 
				+
			
 
				+        t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube)))
			
 
				+        p = 1 - t.cdf(abs(t2), n - 3)
			
 
				+
			
 
				+        if twotailed:
			
 
				+            p *= 2
			
 
				+
			
 
				+        return t2, p
			
 
				+    elif method == 'zou':
			
 
				+        L1 = rz_ci(xy, n, conf_level=conf_level)[0]
			
 
				+        U1 = rz_ci(xy, n, conf_level=conf_level)[1]
			
 
				+        L2 = rz_ci(xz, n, conf_level=conf_level)[0]
			
 
				+        U2 = rz_ci(xz, n, conf_level=conf_level)[1]
			
 
				+        rho_r12_r13 = rho_rxy_rxz(xy, xz, yz)
			
 
				+        lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5)
			
 
				+        upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5)
			
 
				+        return lower, upper
			
 
				+    else:
			
 
				+        raise Exception('Wrong method!')
			
 
				+
			
 
				+def independent_corr(xy, ab, n, n2 = None, twotailed=True, conf_level=0.95, method='fisher'):
			
 
				+    """
			
 
				+    Calculates the statistic significance between two independent correlation coefficients
			
 
				+    @param xy: correlation coefficient between x and y
			
 
				+    @param xz: correlation coefficient between a and b
			
 
				+    @param n: number of elements in xy
			
 
				+    @param n2: number of elements in ab (if distinct from n)
			
 
				+    @param twotailed: whether to calculate a one or two tailed test, only works for 'fisher' method
			
 
				+    @param conf_level: confidence level, only works for 'zou' method
			
 
				+    @param method: defines the method uses, 'fisher' or 'zou'
			
 
				+    @return: z and p-val
			
 
				+    """
			
 
				+
			
 
				+    if method == 'fisher':
			
 
				+        xy_z = 0.5 * np.log((1 + xy)/(1 - xy))
			
 
				+        ab_z = 0.5 * np.log((1 + ab)/(1 - ab))
			
 
				+        if n2 is None:
			
 
				+            n2 = n
			
 
				+
			
 
				+        se_diff_r = np.sqrt(1/(n - 3) + 1/(n2 - 3))
			
 
				+        diff = xy_z - ab_z
			
 
				+        z = abs(diff / se_diff_r)
			
 
				+        p = (1 - norm.cdf(z))
			
 
				+        if twotailed:
			
 
				+            p *= 2
			
 
				+
			
 
				+        return z, p
			
 
				+    elif method == 'zou':
			
 
				+        L1 = rz_ci(xy, n, conf_level=conf_level)[0]
			
 
				+        U1 = rz_ci(xy, n, conf_level=conf_level)[1]
			
 
				+        L2 = rz_ci(ab, n2, conf_level=conf_level)[0]
			
 
				+        U2 = rz_ci(ab, n2, conf_level=conf_level)[1]
			
 
				+        lower = xy - ab - pow((pow((xy - L1), 2) + pow((U2 - ab), 2)), 0.5)
			
 
				+        upper = xy - ab + pow((pow((U1 - xy), 2) + pow((ab - L2), 2)), 0.5)
			
 
				+        return lower, upper
			
 
				+    else:
			
 
				+        raise Exception('Wrong method!')
			
 
				+
			
 
				+print(dependent_corr(.40, .50, .10, 103, method='steiger'))
			
 
				+print(independent_corr(0.5 , 0.6, 103, 103, method='fisher'))
			
 
				+
			
 
				+#print dependent_corr(.396, .179, .088, 200, method='zou')
			
 
				+#print independent_corr(.560, .588, 100, 353, method='zou')
			
--- a/code/predict_ppa.py
+++ b/code/predict_ppa.py
@@ -489,6 +489,7 @@ def run_the_predictions(zmap_fpathes, subjs):
 
				 
			
 
				         # the list for the dataframe that will build from the list
			
 
				         for_dataframe = []
			
 
				+        for_dataframe_func_vs_anat = []
			
 
				 
			
 
				         # compute the anatomical prediction for every subject first
			
 
				         empirical_arrays = []
			
@@ -550,6 +551,7 @@ def run_the_predictions(zmap_fpathes, subjs):
 
				                                           func_pred_arrays[idx]) for idx
			
 
				                                           in range(len(empirical_arrays))]
			
 
				 
			
 
				+
			
 
				             # print the result of the currently used runs / stimulus
			
 
				             print('subject\temp. vs. anat\temp. vs. cms')
			
 
				 
			
@@ -563,16 +565,31 @@ def run_the_predictions(zmap_fpathes, subjs):
 
				             elif stim == 'AV':
			
 
				                 predictor = 'movie'
			
 
				 
			
 
				-            func_lines = [[subj, predictor, runs, corr[0]] for subj, corr in zip(subjs, emp_vs_func)]
			
 
				+            func_lines = [[subj, predictor, runs, corr[0]]
			
 
				+                          for subj, corr in zip(subjs, emp_vs_func)]
			
 
				             # list of line for the dataframe
			
 
				             for_dataframe.extend(func_lines)
			
 
				 
			
 
				+            # compute the correlations of prediction from anatomy vs.
			
 
				+            # prediction from functional alignment (with currently used
			
 
				+            # no. of runs
			
 
				+            func_vs_anat = [stats.pearsonr(func_pred_arrays[idx],
			
 
				+                                           anat_pred_arrays[idx]) for idx
			
 
				+                                           in range(len(func_pred_arrays))]
			
 
				+
			
 
				+            func_vs_anat_lines = [[subj,  f'{stim}-vs-anat', runs, corr[0]]
			
 
				+                                  for subj, corr in zip(subjs, func_vs_anat)]
			
 
				+
			
 
				+            for_dataframe_func_vs_anat.extend(func_vs_anat_lines)
			
 
				+
			
 
				+
			
 
				         # add the correlations of prediction from anatomy vs. empirical data
			
 
				         anat_lines = [[subj, 'anatomy', 0, corr[0]]
			
 
				                       for subj, corr in zip(subjs, emp_vs_anat)]
			
 
				 
			
 
				         # put the correlations per subject into the dataframe
			
 
				         for_dataframe.extend(anat_lines)
			
 
				+        for_dataframe.extend(for_dataframe_func_vs_anat)
			
 
				 
			
 
				         # prepare the dataframe for the current model
			
 
				         df = pd.DataFrame(for_dataframe, columns = ['sub',
			
@@ -615,7 +632,7 @@ if __name__ == "__main__":
 
				         for x in VIS_VPN_COPES.items()]
			
 
				 
			
 
				     print('\nTransforming VIS z-maps into MNI and into other subjects\' space')
			
 
				-    transform_ind_ppas(zmap_fpathes, subjs)
			
 
				+###    transform_ind_ppas(zmap_fpathes, subjs)
			
 
				 
			
 
				     run_the_predictions(zmap_fpathes, subjs)
			
 
				 
			
@@ -625,6 +642,6 @@ if __name__ == "__main__":
 
				         for x in VIS_VPN_COPES.items()]
			
 
				 
			
 
				     print('\nTransforming AO z-maps into MNI and into other subjects\' space')
			
 
				-    transform_ind_ppas(zmap_fpathes, subjs)
			
 
				+###    transform_ind_ppas(zmap_fpathes, subjs)
			
 
				 
			
 
				     run_the_predictions(zmap_fpathes, subjs)
			
--- a/code/statistics_t-test-correlations.py
+++ b/code/statistics_t-test-correlations.py
@@ -0,0 +1,135 @@
 
				+#!/usr/bin/env python3
			
 
				+'''
			
 
				+created on Mon March 29th 2021
			
 
				+author: Christian Olaf Haeusler
			
 
				+'''
			
 
				+
			
 
				+from collections import OrderedDict
			
 
				+from glob import glob
			
 
				+from nilearn.input_data import NiftiMasker, MultiNiftiMasker
			
 
				+from scipy import stats
			
 
				+
			
 
				+import argparse
			
 
				+import copy
			
 
				+import ipdb
			
 
				+import matplotlib.pyplot as plt
			
 
				+import nibabel as nib
			
 
				+import numpy as np
			
 
				+import os
			
 
				+import pandas as pd
			
 
				+import re
			
 
				+import subprocess
			
 
				+import sys
			
 
				+sys.path.append('code')
			
 
				+import corrstats
			
 
				+
			
 
				+def parse_arguments():
			
 
				+    '''
			
 
				+    '''
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description='loads data from VIS runs and denoises them'
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument('-indir',
			
 
				+                        required=False,
			
 
				+                        default='test',
			
 
				+                        help='input directory')
			
 
				+
			
 
				+    parser.add_argument('-run',
			
 
				+                        required=False,
			
 
				+                        default='1',
			
 
				+                        help='run to be tested')
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    indir = args.indir
			
 
				+    run = args.run
			
 
				+
			
 
				+    return indir, run
			
 
				+
			
 
				+
			
 
				+def find_files(pattern):
			
 
				+    '''
			
 
				+    '''
			
 
				+    def sort_nicely(l):
			
 
				+        '''Sorts a given list in the way that humans expect
			
 
				+        '''
			
 
				+        convert = lambda text: int(text) if text.isdigit() else text
			
 
				+        alphanum_key = lambda key: [convert(c) for c in re.split("([0-9]+)", key)]
			
 
				+        l.sort(key=alphanum_key)
			
 
				+
			
 
				+        return l
			
 
				+
			
 
				+    found_files = glob(pattern)
			
 
				+    found_files = sort_nicely(found_files)
			
 
				+
			
 
				+    return found_files
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # read command line arguments
			
 
				+    in_dir, run = parse_arguments()
			
 
				+
			
 
				+    stimulus, stim = 'movie', 'AV'  # vs. 'audio-description', 'AO'
			
 
				+    runA = 7
			
 
				+    runB = 8
			
 
				+
			
 
				+    df = pd.read_csv('test/corr-empVIS-vs-func.csv')
			
 
				+
			
 
				+    corrsAna = df.loc[df['prediction from'] == 'anatomy', 'Pearson\'s r']
			
 
				+    corrsAna = corrsAna.values
			
 
				+    # perform Fisher's z-transformation
			
 
				+    corrsAnaZ = np.arctanh(corrsAna)
			
 
				+
			
 
				+    # get corrleation between empirical and prediction from CMS
			
 
				+    allRuns = df.loc[df['prediction from'] == stimulus, ['runs', 'Pearson\'s r']]
			
 
				+
			
 
				+    for runA in list(range(1, 8)):
			
 
				+        runB = runA + 1
			
 
				+
			
 
				+        # get correlations functional vs. empirical for run A
			
 
				+        corrsRunA = allRuns.loc[allRuns['runs'] == runA, 'Pearson\'s r']
			
 
				+        corrsRunA = corrsRunA.values
			
 
				+        # perform Fisher's z-transformation
			
 
				+        corrsRunAZ = np.arctanh(corrsRunA)
			
 
				+
			
 
				+        # compare run 1 (and just run 1) to correlation anatomy vs. empirical
			
 
				+        if runA == 1:
			
 
				+            print(f'Run {runA} vs Anatomy:')
			
 
				+            # independent t-test
			
 
				+            tValue, pValue = stats.ttest_ind(corrsRunAZ, corrsAnaZ)
			
 
				+            tValue = round(tValue, 2)
			
 
				+            pValue = round(pValue, 4)
			
 
				+            print(f'independent:\tt={tValue}, p={pValue}')
			
 
				+            # dependent t-test
			
 
				+            tValue, pValue = stats.ttest_rel(corrsRunAZ, corrsAnaZ)
			
 
				+            tValue = round(tValue, 2)
			
 
				+            pValue = round(pValue, 4)
			
 
				+            print(f'dependent:\tt={tValue}, p={pValue}\n')
			
 
				+
			
 
				+        # get correlations functional vs. empirical for run B
			
 
				+        corrsRunB = allRuns.loc[allRuns['runs'] == runB, 'Pearson\'s r']
			
 
				+        corrsRunB = corrsRunB.values
			
 
				+        # perform Fisher's z-transformation
			
 
				+        corrsRunBZ = np.arctanh(corrsRunB)
			
 
				+
			
 
				+        print(f'Run {runA} vs {runB}:')
			
 
				+        # independent t-test
			
 
				+        tValue, pValue = stats.ttest_ind(corrsRunAZ, corrsRunBZ)
			
 
				+        tValue = round(tValue, 2)
			
 
				+        pValue = round(pValue, 5)
			
 
				+        print(f'independent:\tt={tValue}, p={pValue}')
			
 
				+        # dependent t-test
			
 
				+        tValue, pValue = stats.ttest_rel(corrsRunAZ, corrsRunBZ)
			
 
				+        tValue = round(tValue, 2)
			
 
				+        pValue = round(pValue, 5)
			
 
				+        print(f'dependent:\tt={tValue}, p={pValue}\n')
			
 
				+
			
 
				+        # get correlations between predictions from CMS and anatomy
			
 
				+#         allRuns = df.loc[df['prediction from'] == f'{stim}-vs-anat', ['runs', 'Pearson\'s r']]
			
 
				+#         corrsFunAnaRunA = allRuns.loc[allRuns['runs'] == runA, 'Pearson\'s r']
			
 
				+#         corrsFunAnaRunA = corrsFunAnaRunA.values
			
 
				+#
			
 
				+#         allRuns = df.loc[df['prediction from'] == f'{stim}-vs-anat', ['runs', 'Pearson\'s r']]
			
 
				+#         corrsFunAnaRunB = allRuns.loc[allRuns['runs'] == runB, 'Pearson\'s r']
			
 
				+#         corrsFunAnaRunB = corrsFunAnaRunB.values