ParsingData.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Nov 22 17:28:14 2021
  5. @author: kalantaria
  6. Description: This code will parse through every possible folder after a defined initial path,
  7. looking for MR data files of any type. Then it will extract the wanted files
  8. and eliminiate the duplicates.
  9. """
  10. import os
  11. import glob
  12. import pv_parser as par
  13. import pandas as pd
  14. import argparse
  15. import alive_progress as ap
  16. import QC
  17. import shutil
  18. #from openpyxl import Workbook
  19. import FeatureCheck as fc
  20. #%% Command line interface
  21. if __name__ == "__main__":
  22. parser = argparse.ArgumentParser(description="Parser of all MR files: Description:\
  23. This code will parse through every possible folder behind a defined initial path,\
  24. looking for MR data files of any type. Then it will extract the wanted files \
  25. and eliminate any duplicates(example: python ParsingData.py -i C:/raw_data -o C:/raw_data/QCOutput -f raw )")
  26. parser.add_argument('-i','--initial_path',required=True, \
  27. help='initial path to start the parsing')
  28. parser.add_argument('-o','--output_path',required=True,\
  29. help='Set the path where the results should be saved')
  30. parser.add_argument('-f','--format_type',\
  31. help="the format your dataset has:\
  32. nifti or raw Bruker",type=str,required=True,choices=["nifti","raw"])
  33. # parser.add_argument('-t','--sequence_types',\
  34. # help="you need to tell what kind of Sequences should be used in \
  35. # for processing the dataset:\
  36. # T2w, DTI, fmri",type=str,required=False,choices=["T2w","DTI","fMRI"],default=["T2w","DTI","fMRI"])
  37. parser.add_argument('-s','--suffix',\
  38. help="If necessary you can specify what kind of suffix the data to look for should have :\
  39. for example: -s test , this means it will only look for data that have this\
  40. suffix befor the .nii.gz, meaning test.nii.gz",type=str, required=False, default="")
  41. parser.add_argument('-e','--exclude',\
  42. help="If you have a specific sequence which you don't want to include in the analysis you can name it here as a string.\
  43. for example, you have some resting state scans. One of them has the name 'rsfmri_Warmup' as its Protocol or sequence\
  44. name or file name (in the case of Niftis)). The program will automatically categorize it as an fMRI scan. \
  45. You can set this parameter to exclude some sequences. Here you would do: --exclude Warmup, then it will\
  46. exclude those scans",type=str, required=False,nargs='+')
  47. args = parser.parse_args()
  48. initial_path = args.initial_path
  49. saving_path = args.output_path
  50. format_type= args.format_type
  51. exclude_param = args.exclude
  52. print(exclude_param)
  53. #sequence_types = args.sequence_types
  54. suffix = args.suffix
  55. #%% Information for the user
  56. QC.tic()
  57. print("Hello! Are you ready to get rid of bad quality data?")
  58. print('------------------------------------------------------------')
  59. print('Thank you for using our code. Contact:')
  60. print('aref.kalantari-sarcheshmeh@uk-koeln.de / markus.aswendt@uk-koeln.de')
  61. print('Lab: AG Neuroimaging and Neuroengineering of Experimental Stroke, University Hospital Cologne')
  62. print('Web: https://neurologie.uk-koeln.de/forschung/ag-neuroimaging-neuroengineering/')
  63. print('------------------------------------------------------------')
  64. DTI_string = ["DTI","STRUCT","DWI","DIFFUS"]
  65. FMRI_string = ["RESTING","FUN","RSF","FMRI","BOLD","RS-"]#possiblto add "RS-" but had problems when a dti scan was named -64Dirs-(rs-)
  66. T2_string = ["T2W","T1W","ANAT","RARE","TURBO","T1_F","T2_F"]
  67. NotAllowed = ["LOC","PIL","FISP","WOB","NOIS","SINGL","MRS","B0M","FIELD"]
  68. if exclude_param:
  69. NotAllowed = NotAllowed + [e.upper() for e in exclude_param]
  70. #%% Path Construction
  71. if not os.path.exists(saving_path):
  72. os.makedirs(saving_path)
  73. #%% Parsing
  74. #Types = ['Dti','EPI','RARE']
  75. #Types_new = ['DTI','rsfMRI','T2w']
  76. type_strs = ['diff', 'func', 'anat']
  77. if format_type == "raw":
  78. PathALL = os.path.join(initial_path,"**","acqp")
  79. with ap.alive_bar(title='Parsing through folders ...',length=10,stats = False,monitor=False) as bar:
  80. text_files = glob.glob(PathALL, recursive = True)
  81. kall = len(text_files)
  82. print(( 'Total number of '+ str(kall) + ' files were found:'+' parsing finished! '.upper()).upper())
  83. #%% Extrtacting usable data
  84. ABook = {}
  85. ErrorList =[]
  86. CheckDates = []
  87. C = 0
  88. #EPI_flag = ["EPI"]
  89. for i in range(len(type_strs)): #Creation of Adress Book
  90. ABook[type_strs[i]] = []
  91. with ap.alive_bar(kall, title='Extracting T1 or T2 weighted, diff and func sequences:'.upper(),length=10,stats = False,spinner= 'wait') as bar:
  92. for p in text_files: #filling the Address Book with wanted files
  93. try:
  94. NameTemp = par.read_param_file(p)
  95. MN = NameTemp[1]["ACQ_method"].upper() #Here we check what the name of the sequence is
  96. MN2 = NameTemp[1]["ACQ_protocol_name"].upper()
  97. KEY = MN + MN2
  98. DateTemp = NameTemp[0]['Date'] #Here we check the date of the measurement
  99. Ans = []
  100. except KeyError:
  101. print("KeyError")
  102. print(p)
  103. ErrorList.append(p)
  104. except UnicodeDecodeError:
  105. print("UnicodeDecodeError")
  106. print(p)
  107. ErrorList.append(p)
  108. Flag_anat = any([(aa in KEY) for aa in T2_string])
  109. Flag_struct = any([(aa in KEY) for aa in DTI_string])
  110. Flag_func = any([(aa in KEY) for aa in FMRI_string])
  111. Flag_notAllowed = any([(aa in KEY) for aa in NotAllowed])
  112. Flag_epi = "EPI" in KEY
  113. if DateTemp not in CheckDates:
  114. if Flag_struct and not Flag_notAllowed:
  115. ABook["diff"].append(os.path.dirname(p))
  116. C = C+1
  117. elif Flag_func and not Flag_notAllowed:
  118. ABook["func"].append(os.path.dirname(p)) #I know it is totally confusing with EPI as the col name for the ABook but sadly EPI can also be a DTI scan
  119. C = C+1
  120. elif Flag_anat and not Flag_notAllowed and not Flag_epi: #T2Star EPIS are usually rsfmri scans
  121. ABook["anat"].append(os.path.dirname(p))
  122. C = C+1
  123. elif Flag_epi and not Flag_notAllowed:
  124. TP = NameTemp[1]["ACQ_time_points"]
  125. MF = NameTemp[1]["ACQ_n_movie_frames"]
  126. if MF != len(TP):
  127. ABook["func"].append(os.path.dirname(p)) #I know it is totally confusing with EPI as the col name for the ABook but sadly EPI can also be a DTI scan
  128. C = C+1
  129. elif MF == len(TP):
  130. ABook["diff"].append(os.path.dirname(p))
  131. C = C+1
  132. # for i,t in enumerate(type_strs):
  133. #
  134. # if t in MN or t in p:
  135. # if t.upper() in MN.upper():
  136. # ABook[type_strs[i]].append(os.path.dirname(p))
  137. # C = C+1
  138. CheckDates.append(DateTemp)
  139. bar()
  140. M = dict.fromkeys(CheckDates)
  141. print(' '+str(C)+' files were extracted! %%%'.upper())
  142. print((' ' + str(len(CheckDates)-len(M))+ ' duplicates were eliminated! %%%').upper())
  143. #%% Saving parsed files
  144. #saving in csv file
  145. for n,type_str in enumerate(type_strs):
  146. if len(ABook[type_str]) !=0:
  147. addreses= pd.DataFrame(ABook[type_str])
  148. csv_path= "raw_data_addreses_"+type_str+".csv"
  149. csv_path= os.path.join(saving_path,csv_path)
  150. addreses.to_csv(csv_path, sep=',',index=False)
  151. if ErrorList:
  152. dfError = pd.DataFrame()
  153. dfError['ErrorData'] = ErrorList
  154. eror= os.path.join(saving_path,"CanNotOpenTheseFiles.csv")
  155. print("Some data could not be opened by the pipline- Check CanNotOpenTheseFiles.csv for more information")
  156. dfError.to_csv(eror,index=False)
  157. print('\n\ncsv files were created:' + str(saving_path))
  158. print('\n\n%%%%%%%%%%%%% End of stage 1 %%%%%%%%%%%%%%%'.upper())
  159. # to make exel file as an output you can uncomment below lines
  160. # for i,T in enumerate(Types):
  161. # globals()['df'+ str(i)] = pd.DataFrame(ABook[T])
  162. # saving_path2 = saving_path + 'QuiC_Data_Result_raw.xlsx'
  163. # writer = pd.ExcelWriter(saving_path2, engine='xlsxwriter')
  164. # for i,T in enumerate(Types_new):
  165. # globals()['df'+ str(i)].to_excel(writer,sheet_name=T, index = False)
  166. # dfError.to_excel(writer, sheet_name='ErrorData',index = False)
  167. # writer.save()
  168. # print('\n\nExcel file was created:' + str(saving_path2))
  169. # print('\n\n%%%%%%%%%%%%%End of the first stage%%%%%%%%%%%%%%%'.upper())
  170. #%% Parsing nifti format
  171. elif format_type=="nifti":
  172. PathALL = os.path.join(initial_path,"**","*" + suffix + ".nii.gz")
  173. PathALL2 = os.path.join(initial_path,"**","*" + suffix + ".nii")
  174. with ap.alive_bar(title='Parsing through folders ...',length=10,stats = False,monitor=False) as bar:
  175. text_files = glob.glob(PathALL, recursive = True) + glob.glob(PathALL2, recursive = True)
  176. kall = len(text_files)
  177. print(( 'Total number of '+ str(kall) + ' files were found:'+'Parsing finished! '.upper()).upper())
  178. ABook={}
  179. for i in range(len(type_strs)): #Creation of Adress Book
  180. ABook[type_strs[i]] = []
  181. # =============================================================================
  182. # text_files2 = [os.path.split(t)[-1] for t in text_files]
  183. # text_files3,Index = np.unique(text_files2,return_index=True)
  184. # text_files = [text_files[ii] for ii in Index]
  185. # ============================================================================
  186. for i,T in enumerate(type_strs):
  187. globals()['df'+ str(i)] = pd.DataFrame(ABook[T])
  188. for i in text_files :
  189. for rr,Q in enumerate(i.split(os.sep)):
  190. temp = i.split(os.sep)[-rr].upper()
  191. Flag_anat = any([(aa in temp) for aa in T2_string])
  192. Flag_struct = any([(aa in temp) for aa in DTI_string])
  193. Flag_func = any([(aa in temp) for aa in FMRI_string])
  194. Flag_notAllowed = any([(aa in temp) for aa in NotAllowed])
  195. if any([Flag_anat,Flag_struct,Flag_func,Flag_notAllowed]):
  196. break
  197. if not any([Flag_anat,Flag_struct,Flag_func]):
  198. continue
  199. print("The sequence type is ambiguous between registered nifti files.")
  200. print("To solve this problem use the following names to define sequences in ther path name:")
  201. print(DTI_string)
  202. print(FMRI_string)
  203. print(T2_string)
  204. print('Avoid using "EPI"!')
  205. if Flag_struct and not Flag_notAllowed:
  206. ABook["diff"].append(i)
  207. elif Flag_func and not Flag_notAllowed:
  208. ABook["func"].append(i)
  209. elif Flag_anat and not Flag_notAllowed:
  210. ABook["anat"].append(i)
  211. #saving in csv file
  212. for n,type_str in enumerate(type_strs):
  213. if len(ABook[type_str]) !=0:
  214. addreses= pd.DataFrame(ABook[type_str])
  215. csv_path= "nifti_data_addreses_"+type_str+".csv"
  216. csv_path= os.path.join(saving_path,csv_path)
  217. addreses.to_csv(csv_path, sep=',',index=False)
  218. print('\n\ncsv files were created:' + str(saving_path))
  219. print('\n\n%%%%%%%%%%%%% End of the stage 1 %%%%%%%%%%%%%%%'.upper())
  220. print('\nStarting Stage 2 ...'.upper())
  221. #print('\nChosen Sequences are: ')
  222. #print(sequence_types)
  223. print('\nCalculating features...\n'.upper())
  224. print('This will take some time depending on the size of the dataset. See the progress bar below.\n\n')
  225. if format_type=="raw":
  226. fc.CheckingRawFeatures(saving_path)
  227. QC.toc()
  228. elif format_type=="nifti":
  229. fc.CheckingNiftiFeatures(saving_path)
  230. QC.toc()
  231. print('Plotting quality features...\n'.upper())
  232. QC.QCPlot(saving_path)
  233. QC.QCtable(saving_path, format_type)
  234. # remove addressed files
  235. for file in glob.glob(os.path.join(saving_path, '*data_addreses*.csv')) :
  236. os.remove(file)
  237. # relocate calculated fearures
  238. calculated_features = os.path.join(saving_path, "calculated_features")
  239. os.mkdir(calculated_features)
  240. old_files=[]
  241. for old_file in glob.glob(os.path.join(saving_path, '*caculated_features*.csv')) :
  242. old_files.append(old_file)
  243. new_direction= os.path.join(saving_path,"calculated_features")
  244. new_files=[]
  245. for old_file in old_files:
  246. path_split= os.path.split(old_file)
  247. files_name= path_split[1]
  248. join_path= os.path.join(new_direction,files_name)
  249. new_files.append(join_path)
  250. for new_file,old_file in enumerate(old_files) :
  251. shutil.move(old_file , new_files[new_file])
  252. print('\n\n%%%%%%%%%%%%%Quality feature plots were successfully created and saved%%%%%%%%%%%%%%%\n\n'.upper())
  253. print('------------------------------------------------------------')
  254. print('Thank you for using our code. For questions please contact us via:')
  255. print('aref.kalantari-sarcheshmeh@uk-koeln.de or markus.aswendt@uk-koeln.de')
  256. print('Lab: AG Neuroimaging and neuroengineering of experimental stroke University Hospital Cologne')
  257. print('Web:https://neurologie.uk-koeln.de/forschung/ag-neuroimaging-neuroengineering/')
  258. print('------------------------------------------------------------')