plot_bland-altman.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. #!/usr/bin/env python2
  2. # -*- coding: utf-8 -*-
  3. '''
  4. author: Christian Olaf Häusler
  5. created on Wednesday September 15 2021
  6. start ipython2.7 via
  7. 'python2 -m IPython'
  8. '''
  9. from __future__ import print_function
  10. from collections import OrderedDict
  11. from glob import glob
  12. from mvpa2.datasets.mri import fmri_dataset
  13. from nilearn.input_data import NiftiMasker, MultiNiftiMasker
  14. import argparse
  15. import numpy as np
  16. import os
  17. import pandas as pd
  18. import matplotlib.pyplot as plt
  19. import nibabel as nib
  20. import re
  21. import subprocess
  22. # from mvpa2.datasets.mri import fmri_dataset
  23. from scipy.stats import gaussian_kde
  24. # constants
  25. GRP_PPA_PTTRN = 'sub-??/masks/in_bold3Tp2/grp_PPA_bin.nii.gz'
  26. AO_FOV_MASK = 'sub-??/masks/in_bold3Tp2/audio_fov.nii.gz'
  27. GM_MASK = 'sub-??/masks/in_bold3Tp2/gm_bin_dil_fov.nii.gz'
  28. # binary mask(s) of individual visual localizer (in subject space)
  29. PPA_MASK_PATTERN = 'inputs/studyforrest-data-visualrois/'\
  30. 'sub-*/rois/?PPA_?_mask.nii.gz'
  31. # individual 2nd level results (primary cope in subject space)
  32. PREDICTED_PREFIX = 'predicted-VIS-PPA_from_'
  33. PREDICTIONS = [
  34. 'anatomy.nii.gz',
  35. 'srm-ao-av-vis_feat10_0-451.nii.gz',
  36. 'srm-ao-av-vis_feat10_0-892.nii.gz',
  37. 'srm-ao-av-vis_feat10_0-1330.nii.gz',
  38. 'srm-ao-av-vis_feat10_0-1818.nii.gz',
  39. 'srm-ao-av-vis_feat10_0-2280.nii.gz',
  40. 'srm-ao-av-vis_feat10_0-2719.nii.gz',
  41. 'srm-ao-av-vis_feat10_0-3261.nii.gz',
  42. 'srm-ao-av-vis_feat10_0-3524.nii.gz',
  43. 'srm-ao-av-vis_feat10_3524-3975.nii.gz',
  44. 'srm-ao-av-vis_feat10_3524-4416.nii.gz',
  45. 'srm-ao-av-vis_feat10_3524-4854.nii.gz',
  46. 'srm-ao-av-vis_feat10_3524-5342.nii.gz',
  47. 'srm-ao-av-vis_feat10_3524-5804.nii.gz',
  48. 'srm-ao-av-vis_feat10_3524-6243.nii.gz',
  49. 'srm-ao-av-vis_feat10_3524-6785.nii.gz',
  50. 'srm-ao-av-vis_feat10_3524-7123.nii.gz',
  51. 'srm-ao-av-vis_feat10_0-7123.nii.gz'
  52. ]
  53. VIS_ZMAP_PATTERN = 'inputs/studyforrest-data-visualrois/'\
  54. 'sub-??/2ndlvl.gfeat/cope*.feat/stats/zstat1.nii.gz'
  55. # contrast used by Sengupta et al. (2016) to create the PPA mask
  56. VIS_VPN_COPES = OrderedDict({ # dicts are ordered from Python 3.7
  57. 'sub-01': 'cope8',
  58. 'sub-02': 'cope3',
  59. 'sub-03': 'cope3',
  60. 'sub-04': 'cope3',
  61. 'sub-05': 'cope3',
  62. 'sub-06': 'cope3',
  63. 'sub-09': 'cope3',
  64. 'sub-14': 'cope3',
  65. 'sub-15': 'cope3',
  66. 'sub-16': 'cope3',
  67. 'sub-17': 'cope3',
  68. 'sub-18': 'cope8',
  69. 'sub-19': 'cope3',
  70. 'sub-20': 'cope3'
  71. })
  72. def parse_arguments():
  73. '''
  74. '''
  75. parser = argparse.ArgumentParser(
  76. description='Creates mosaic of individual Bland-Altman-Plots'
  77. )
  78. parser.add_argument('-i',
  79. default='./',
  80. help='input directory')
  81. parser.add_argument('-o',
  82. default='test',
  83. help='output directory')
  84. args = parser.parse_args()
  85. inDir = args.i
  86. outDir = args.o
  87. return inDir, outDir
  88. def find_files(pattern):
  89. '''
  90. '''
  91. found_files = glob(pattern)
  92. found_files = sort_nicely(found_files)
  93. return found_files
  94. def sort_nicely(l):
  95. '''Sorts a given list in the way that humans expect
  96. '''
  97. convert = lambda text: int(text) if text.isdigit() else text
  98. alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
  99. l.sort(key=alphanum_key)
  100. return l
  101. def compute_means(data1, data2, log='n'):
  102. '''
  103. '''
  104. if len(data1) != len(data2):
  105. raise ValueError('data1 does not have the same length as data2.')
  106. data1 = np.asarray(data1)
  107. data2 = np.asarray(data2)
  108. if log == 'n':
  109. means = np.mean([data1, data2], axis=0)
  110. elif log == 'y':
  111. # what ever computation
  112. pass
  113. return means
  114. def compute_diffs(data1, data2, log='n'):
  115. '''
  116. '''
  117. if len(data1) != len(data2):
  118. raise ValueError('data1 does not have the same length as data2.')
  119. data1 = np.asarray(data1)
  120. data2 = np.asarray(data2)
  121. if log == 'n':
  122. diffs = data1 - data2 # Difference between data1 and data2
  123. elif log == 'y':
  124. # what ever computation
  125. pass
  126. return diffs
  127. def process_df(subj, zmaps_df, out_path, prediction):
  128. '''
  129. '''
  130. # get the contrasts' names from the column names
  131. zmap1 = zmaps_df.iloc[:, 1]
  132. zmap2 = zmaps_df.iloc[:, 0]
  133. # mask all voxels not contained in the PPA group mask
  134. ppa_grp_masked1 = zmap1.as_matrix()[zmaps_df['ppa_grp'].as_matrix() > 0]
  135. ppa_grp_masked2 = zmap2.as_matrix()[zmaps_df['ppa_grp'].as_matrix() > 0]
  136. # mask all voxels not contained in the individual PPA mask
  137. ppa_ind_masked1 = zmap1.as_matrix()[zmaps_df['ppa_ind'].as_matrix() > 0]
  138. ppa_ind_masked2 = zmap2.as_matrix()[zmaps_df['ppa_ind'].as_matrix() > 0]
  139. # ao_ind_masked1 = zmap1.as_matrix()[zmaps_df['ao_ind'].as_matrix() > 0]
  140. # ao_ind_masked2 = zmap2.as_matrix()[zmaps_df['ao_ind'].as_matrix() > 0]
  141. datasets = [
  142. [zmap1, zmap2],
  143. [ppa_grp_masked1, ppa_grp_masked2],
  144. [ppa_ind_masked1, ppa_ind_masked2],
  145. # [ao_ind_masked1, ao_ind_masked2]
  146. ]
  147. means_list = [compute_means(data1, data2) for data1, data2 in datasets]
  148. diffs_list = [compute_diffs(data1, data2) for data1, data2 in datasets]
  149. # Set up the axes with gridspec
  150. fig = plt.figure(figsize=(6, 6))
  151. grid = plt.GridSpec(6, 6, hspace=0.0, wspace=0.0)
  152. # add three subplots
  153. ax_scatter = fig.add_subplot(grid[1:, :-1])
  154. ax_xhist = fig.add_subplot(grid[0:1, 0:-1],
  155. yticklabels=[],
  156. sharex=ax_scatter)
  157. ax_yhist = fig.add_subplot(grid[1:, -1],
  158. xticklabels=[],
  159. sharey=ax_scatter)
  160. ax_scatter.text(5.1, 5.8, subj, fontsize=16, fontweight='bold')
  161. # plot voxel within occipitotemporal cortex
  162. plot_blandaltman(ax_scatter,
  163. means_list[0],
  164. diffs_list[0],
  165. alpha=0.6,
  166. c='darkgrey',
  167. s=2)
  168. # plot voxels within PPA group overlap
  169. plot_blandaltman(ax_scatter,
  170. means_list[1],
  171. diffs_list[1],
  172. alpha=1,
  173. c='royalblue',
  174. s=2)
  175. # plot voxels within individual PPA ROI
  176. plot_blandaltman(ax_scatter,
  177. means_list[2],
  178. diffs_list[2],
  179. alpha=1,
  180. c='r',
  181. s=2)
  182. # # plot voxels within (thresholded) individual AO zmap
  183. # plot_blandaltman(ax_scatter,
  184. # means_list[3],
  185. # diffs_list[3],
  186. # alpha=0.5,
  187. # c='y',
  188. # s=2)
  189. plot_histogram(ax_xhist, ax_yhist,
  190. means_list[0], diffs_list[0],
  191. alpha=1,
  192. color='darkgrey')
  193. plot_histogram(ax_xhist, ax_yhist,
  194. means_list[1], diffs_list[1],
  195. alpha=1,
  196. color='royalblue')
  197. plot_histogram(ax_xhist, ax_yhist,
  198. means_list[2], diffs_list[2],
  199. alpha=1,
  200. color='r')
  201. # try:
  202. # plot_histogram(ax_xhist, ax_yhist,
  203. # means_list[3], diffs_list[3],
  204. # alpha=1,
  205. # color='y')
  206. #
  207. # except ValueError:
  208. # print(subj, 'has no significant cluster in primary AO contrast')
  209. # save it
  210. suffix = 'from_' + prediction.split('.nii')[0]
  211. out_file = ('%s_bland-altman_%s.png' % (subj, suffix))
  212. out_fpath = os.path.join(out_path, out_file)
  213. if not os.path.exists(out_path):
  214. os.makedirs(out_path)
  215. plt.savefig(out_fpath,
  216. bbox_inches='tight',
  217. dpi=80)
  218. plt.close()
  219. def plot_blandaltman(ax, means, diffs, *args, **kwargs):
  220. '''
  221. '''
  222. if len(means) != len(diffs):
  223. raise ValueError('means do not have the same length as diffs.')
  224. # annotation
  225. # variable subj is still a global here
  226. if subj in ['sub-01', 'sub-04', 'sub-09', 'sub-16', 'sub-19']:
  227. ax.set_ylabel('Difference between 2 measures', fontsize=16)
  228. if subj in ['sub-19', 'sub-20']:
  229. ax.set_xlabel('Average of 2 measures', fontsize=16)
  230. # draw the scattergram
  231. ax.scatter(means, diffs, *args, **kwargs)
  232. # set the size of the tick labels
  233. ax.xaxis.set_tick_params(labelsize=16)
  234. ax.yaxis.set_tick_params(labelsize=16)
  235. # limit the range of data shown
  236. ax.set_xlim(-5, 5)
  237. ax.set_ylim(-5, 5)
  238. # draw horizontal and vertical line at 0
  239. ax.axhline(0, color='k', linewidth=0.5, linestyle='--')
  240. ax.axvline(0, color='k', linewidth=0.5, linestyle='--')
  241. return None
  242. def plot_histogram(x_hist, y_hist, means, diffs, *args, **kwargs):
  243. '''
  244. '''
  245. # basic preparation of the axes
  246. x_hist.xaxis.set_tick_params(bottom=True, labelbottom=False)
  247. y_hist.yaxis.set_tick_params(bottom=True, labelbottom=False)
  248. # show vertical/horizontal zero line
  249. x_hist.axvline(0, color='k', linewidth=0.5, linestyle='--')
  250. y_hist.axhline(0, color='k', linewidth=0.5, linestyle='--')
  251. # plot histogram -> take KDE plot, s. below
  252. # x_hist.hist(means, 50, normed=True, histtype='bar',
  253. # orientation='vertical', **kwargs)
  254. # y_hist.hist(diffs, 50, normed=True, histtype='bar',
  255. # orientation='horizontal', **kwargs)
  256. # x_hist.set_yscale('log')
  257. # y_hist.set_xscale('log')
  258. # plot KDE
  259. xvalues = np.arange(-5.1, 5.1, 0.1)
  260. kde_means = gaussian_kde(means)
  261. kde_diffs = gaussian_kde(diffs)
  262. # KDE subplot on the top
  263. x_hist.plot(xvalues, kde_means(xvalues), **kwargs)
  264. # x_hist.fill_between(xvalues, kde_means(xvalues), 0, **kwargs)
  265. x_hist.set_ylim(0.015, 0.8)
  266. x_hist.set_yticks([0.2, 0.4, 0.6]) # , 1])
  267. x_hist.set_yticklabels(['.2', '.4', '.6']) # , '1'])
  268. x_hist.yaxis.set_tick_params(labelsize=16)
  269. # KDE subplot on the right
  270. y_hist.plot(kde_diffs(xvalues), xvalues, **kwargs)
  271. # y_hist.fill_between(xvalues, kde_diffs(xvalues), 0, **kwargs)
  272. y_hist.set_xlim(0.015, .8)
  273. y_hist.set_xticks([0.2, 0.4, 0.6]) # , 1])
  274. y_hist.set_xticklabels(['.2', '.4', '.6']) # , '1'])
  275. y_hist.xaxis.set_tick_params(labelsize=16)
  276. return None
  277. def create_mosaic(in_pattern, dims, out_fpath, dpi):
  278. '''
  279. http://www.imagemagick.org/Usage/montage/
  280. '''
  281. # dimensions in columns*rows
  282. subprocess.call(
  283. ['montage',
  284. '-density', str(dpi),
  285. in_pattern,
  286. '-geometry', '+1+1',
  287. '-tile', dims,
  288. out_fpath])
  289. def load_mask(subj):
  290. '''
  291. '''
  292. # open the mask of cortices (at the moment it justs the union of
  293. # individual PPAs)
  294. grp_ppa_fpath = GRP_PPA_PTTRN.replace('sub-??', subj)
  295. # print('PPA GRP mask:\t', grp_ppa_fpath)
  296. # subject-specific field of view in audio-description
  297. ao_fov_mask = AO_FOV_MASK.replace('sub-??', subj)
  298. # print('ind. FoV mask:\t', ao_fov_mask)
  299. # load the masks
  300. grp_ppa_img = nib.load(grp_ppa_fpath)
  301. ao_fov_img = nib.load(ao_fov_mask)
  302. # (dilated) gray matter mask; see constat at script's top
  303. #gm_fpath = GM_MASK.replace('sub-??', subj)
  304. #gm_img = nib.load(gm_fpath)
  305. #final_mask_data = grp_ppa_img.get_fdata() * gm_img.get_fdata()
  306. final_mask_data = grp_ppa_img.get_fdata() * ao_fov_img.get_fdata()
  307. final_mask_img = nib.Nifti1Image(final_mask_data,
  308. grp_ppa_img.affine,
  309. header=grp_ppa_img.header)
  310. return final_mask_img
  311. if __name__ == "__main__":
  312. # get command line argument
  313. inDir, outDir = parse_arguments()
  314. vis_fpathes = find_files(VIS_ZMAP_PATTERN)
  315. # get pathes & filenames of all available zmaps
  316. subjs_pattern = os.path.join(inDir, 'sub-??')
  317. subjs_pathes = find_files(subjs_pattern)
  318. subjs = [re.search(r'sub-..', string).group() for string in subjs_pathes]
  319. subjs = sorted(list(set(subjs)))
  320. for prediction in PREDICTIONS[:]:
  321. # loop over subjects
  322. for subj in subjs:
  323. print('\nProcessing', subj)
  324. # initialize a dataframe
  325. zmaps_df = pd.DataFrame()
  326. # load predicted z-map of current subject
  327. pred_fpath = os.path.join(inDir, subj, PREDICTED_PREFIX + prediction)
  328. pred_fpath = pred_fpath.replace('sub-??', subj)
  329. pred_data = fmri_dataset(pred_fpath).samples.T
  330. # put the array into the dataframe
  331. zmaps_df['predicted'] = np.ndarray.flatten(pred_data)
  332. # load the visual localizer's z-map
  333. # filter for current subject
  334. fpathes = [x for x in vis_fpathes if subj in x]
  335. # filter for the subject's correct cope
  336. vis_fpath = [x for x in fpathes if VIS_VPN_COPES[subj] in x][0]
  337. vis_data = fmri_dataset(vis_fpath).samples.T
  338. # put the array into the dataframe
  339. zmaps_df['vis'] = np.ndarray.flatten(vis_data)
  340. # load the GRP PPA mask (in subject space)
  341. # (GRP PPA will be masked with individual FoV)
  342. final_mask_img = load_mask(subj)
  343. final_mask_data = final_mask_img.get_fdata()
  344. # put the array into the dataframe
  345. zmaps_df['ppa_grp'] = np.ndarray.flatten(final_mask_data)
  346. # load the subject-specific PPA mask (Sengupta et al., 2016)
  347. ppa_fpathes = find_files(PPA_MASK_PATTERN.replace('sub-*', subj))
  348. ppaIndData = fmri_dataset(ppa_fpathes).samples.sum(axis=0)
  349. # masked it with individual FoV (just to be sure)
  350. ao_fov_fpath = find_files(AO_FOV_MASK.replace('sub-??', subj))
  351. ao_fov_data = fmri_dataset(ao_fov_fpath).samples
  352. ppaIndMasked = ppaIndData * ao_fov_data
  353. zmaps_df['ppa_ind'] = np.ndarray.flatten(ppaIndMasked)
  354. # let the function do the calculation and the plotting
  355. process_df(subj, zmaps_df, outDir, prediction)
  356. # create the mosaic
  357. suffix = 'from_' + prediction.split('.nii')[0]
  358. infile_pattern = os.path.join(outDir, ('sub-??_bland-altman_%s.png' % suffix))
  359. out_fpath = os.path.join(outDir, 'subjs_bland-altman_%s.png' % suffix)
  360. create_mosaic(infile_pattern, '3x5', out_fpath, 80) # columns x rows