save_feat.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. #%%
  2. import pandas as pd
  3. import os.path as op
  4. import re
  5. from nilearn import masking
  6. from glob import glob
  7. from nilearn.image import load_img, math_img, resample_to_img
  8. import sys
  9. dir_path = sys.argv[1]
  10. # dir_path = "/tmp/data"
  11. mask_path = "/extras/brainmask_12.8.nii"
  12. print("Analyzing data in {}".format(dir_path))
  13. #%%
  14. # extract for resampled at 4 and smoothed 4
  15. resolutions = ['r4s4', 'r4s8', 'r8s4', 'r8s8']
  16. tissues = ['p1', 'p2']
  17. mask = load_img(mask_path)
  18. df_total = pd.DataFrame()
  19. subj_path = glob(op.join(dir_path, '**/r4s4*p1sub*'), recursive=True)[0]
  20. subj_file = op.basename(subj_path)
  21. if not subj_path:
  22. raise ValueError('no subject found')
  23. sess = op.dirname(subj_path)
  24. df = pd.DataFrame()
  25. subj_names = []
  26. session_names = []
  27. sub_paths = []
  28. sub_paths.append(sess)
  29. s_name = 'sub-' + re.findall('sub-(.*?)_T1', subj_file)[0]
  30. subj_names.append(s_name)
  31. if '/ses' in sess:
  32. sess_name = "ses-" + re.findall('ses-(.*?)_T1', subj_file)[0]
  33. session_names.append(sess_name)
  34. else:
  35. sess_name = 'ses-1'
  36. session_names.append(sess_name)
  37. print("Extracting features for {}".format(sess))
  38. for resol in resolutions:
  39. print("resolution & smoothing: {}".format(resol))
  40. for tissue in tissues:
  41. print("Tissue (p1=GM, p2=WM, p3=CSF): {}".format(tissue))
  42. subjects = glob(op.join(sess, "**", '*{}r_{}sub*'.format(
  43. resol, tissue)), recursive=True)
  44. resampled_mask = resample_to_img(mask, subjects[0])
  45. bin_mask = math_img('img >= 0.5', img=resampled_mask)
  46. masked = masking.apply_mask(imgs=subjects, mask_img=bin_mask)
  47. tis_r_s = '_'.join([tissue, resol])
  48. file_name = '_'.join([s_name, sess_name,
  49. '_features']) + '.csv'
  50. cols = list(range(0, len(masked[0])))
  51. df_temp = pd.DataFrame(masked, columns=cols)
  52. df_temp = df_temp.add_prefix(tis_r_s + '_')
  53. df = pd.concat([df, df_temp], axis=1)
  54. df['subj_path'] = sub_paths
  55. df['subject'] = subj_names
  56. df['session'] = session_names
  57. df.to_csv(op.join(sess, file_name))
  58. # %%