Aswendt_Lab
/
2023_Kalantari_AIDAqc_old


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 22 17:28:14 2021
@author: kalantaria
Description: This code will parse through every possible folder after a defined initial path,
looking for MR data files of any type. Then it will extract the wanted files 
and eliminiate the duplicates.
"""

import os
import glob
import pv_parser as par
import pandas as pd
import argparse
import alive_progress as ap
import QC
import shutil
#from openpyxl import Workbook
import FeatureCheck as fc
#%% Command line interface
if __name__ == "__main__":
        
    parser = argparse.ArgumentParser(description="Parser of all MR files: Description:\
          This code will parse through every possible folder behind a defined initial path,\
     looking for MR data files of any type. Then it will extract the wanted files \
     and eliminate any duplicates(example:   python ParsingData.py -i C:/raw_data -o C:/raw_data/QCOutput -f raw )")
    parser.add_argument('-i','--initial_path',required=True, \
                        help='initial path to start the parsing')
    parser.add_argument('-o','--output_path',required=True,\
                        help='Set the path where the results should be saved')
    parser.add_argument('-f','--format_type',\
                        help="the format your dataset has:\
                            nifti or raw Bruker",type=str,required=True,choices=["nifti","raw"])  
   # parser.add_argument('-t','--sequence_types',\
   #                     help="you need to tell what kind of Sequences should be used in \
   #                          for processing the dataset:\
   #                         T2w, DTI, fmri",type=str,required=False,choices=["T2w","DTI","fMRI"],default=["T2w","DTI","fMRI"])  
    parser.add_argument('-s','--suffix',\
                        help="If necessary you can specify what kind of suffix the data to look for should have :\
                            for example: -s test , this means it will only look for data that have this\
                                suffix befor the .nii.gz, meaning test.nii.gz",type=str, required=False, default="")
                                                  
    parser.add_argument('-e','--exclude',\
                        help="If you have a specific sequence which you don't want to include in the analysis you can name it here as a string.\
                            for example, you have some resting state scans. One of them has the name 'rsfmri_Warmup' as its Protocol or sequence\
                                name or file name (in the case of Niftis)). The program will automatically categorize it as an fMRI scan. \
                                    You can set this parameter to exclude some sequences. Here you would do: --exclude Warmup, then it will\
                                        exclude those scans",type=str, required=False,nargs='+')
                                                  
    
    args = parser.parse_args()
    initial_path = args.initial_path
    saving_path = args.output_path
    format_type= args.format_type
    exclude_param = args.exclude
    print(exclude_param)
    #sequence_types = args.sequence_types
    suffix = args.suffix
    #%% Information for the user 
    
    QC.tic()
    print("Hello! Are you ready to get rid of bad quality data?")
    print('------------------------------------------------------------')
    print('Thank you for using our code. Contact:')
    print('aref.kalantari-sarcheshmeh@uk-koeln.de / markus.aswendt@uk-koeln.de')
    print('Lab: AG Neuroimaging and Neuroengineering of Experimental Stroke, University Hospital Cologne')
    print('Web: https://neurologie.uk-koeln.de/forschung/ag-neuroimaging-neuroengineering/')
    print('------------------------------------------------------------')
    
    DTI_string = ["DTI","STRUCT","DWI","DIFFUS"]
    FMRI_string = ["RESTING","FUN","RSF","FMRI","BOLD","RS-"]#possiblto add "RS-" but had problems when a dti scan was named -64Dirs-(rs-)
    T2_string = ["T2W","T1W","ANAT","RARE","TURBO","T1_F","T2_F"]
    NotAllowed = ["LOC","PIL","FISP","WOB","NOIS","SINGL","MRS","B0M","FIELD"]
    if exclude_param:
        NotAllowed = NotAllowed + [e.upper() for e in exclude_param]
    
    #%% Path Construction
    
    if not os.path.exists(saving_path):
        os.makedirs(saving_path)
    
    #%% Parsing
    
    #Types = ['Dti','EPI','RARE']
    #Types_new = ['DTI','rsfMRI','T2w']
    type_strs = ['diff', 'func', 'anat']
  
    if format_type == "raw":
        
        
        PathALL = os.path.join(initial_path,"**","acqp")
        with ap.alive_bar(title='Parsing through folders ...',length=10,stats = False,monitor=False) as bar:
            text_files = glob.glob(PathALL, recursive = True)
            kall = len(text_files)
        
        print(( 'Total number of '+ str(kall) + ' files were found:'+' parsing finished! '.upper()).upper())
    
        #%% Extrtacting usable data
        ABook = {}
        ErrorList =[]
        CheckDates = []
        C = 0
        
        #EPI_flag = ["EPI"]
        
        
        for i in range(len(type_strs)):         #Creation of Adress Book
            ABook[type_strs[i]] = []
    
    
        with ap.alive_bar(kall, title='Extracting T1 or T2 weighted, diff and func sequences:'.upper(),length=10,stats = False,spinner= 'wait') as bar:   
            
            for p in text_files:   #filling the Address Book with wanted files

            
                try:
                    NameTemp = par.read_param_file(p)
                    MN = NameTemp[1]["ACQ_method"].upper()  #Here we check what the name of the sequence is
                    MN2 = NameTemp[1]["ACQ_protocol_name"].upper()
                    KEY = MN + MN2
                    DateTemp = NameTemp[0]['Date'] #Here we check the date of the measurement
                    Ans = []
                except KeyError:
                    print("KeyError")
                    print(p)
                    ErrorList.append(p)
                except UnicodeDecodeError:
                    print("UnicodeDecodeError")
                    print(p)
                    ErrorList.append(p)
                
                Flag_anat = any([(aa in KEY) for aa in T2_string])
                Flag_struct = any([(aa in KEY) for aa in DTI_string])
                Flag_func = any([(aa in KEY) for aa in FMRI_string])
                Flag_notAllowed = any([(aa in KEY) for aa in NotAllowed])
                Flag_epi = "EPI" in KEY
                
            
                if DateTemp not in CheckDates:
                    
                    if Flag_struct and not Flag_notAllowed:
                        ABook["diff"].append(os.path.dirname(p))
                        C = C+1
                    elif Flag_func and not Flag_notAllowed:
                        ABook["func"].append(os.path.dirname(p)) #I know it is totally confusing with EPI as the col name for the ABook but sadly EPI can also be a DTI scan
                        C = C+1
                    elif Flag_anat and not Flag_notAllowed and not Flag_epi: #T2Star EPIS are usually rsfmri scans
                        ABook["anat"].append(os.path.dirname(p))
                        C = C+1
                    elif Flag_epi and not Flag_notAllowed:
                        TP = NameTemp[1]["ACQ_time_points"]
                        MF = NameTemp[1]["ACQ_n_movie_frames"]
                        if MF != len(TP):
                            ABook["func"].append(os.path.dirname(p)) #I know it is totally confusing with EPI as the col name for the ABook but sadly EPI can also be a DTI scan
                            C = C+1
                        elif MF == len(TP):
                            ABook["diff"].append(os.path.dirname(p)) 
                            C = C+1
                        
                        
 #                   for i,t in enumerate(type_strs):
 #                   
                       # if t in MN or t in p:
 #                        if t.upper() in MN.upper():
 #                           ABook[type_strs[i]].append(os.path.dirname(p))
 #                          C = C+1
                    
                CheckDates.append(DateTemp)
                bar()
                
        M = dict.fromkeys(CheckDates)
        
        print(' '+str(C)+' files were extracted! %%%'.upper())
        print((' ' + str(len(CheckDates)-len(M))+ ' duplicates were eliminated! %%%').upper())
        #%% Saving parsed files 
       
        #saving in csv file
        for n,type_str in enumerate(type_strs):
             if len(ABook[type_str]) !=0:
                 addreses= pd.DataFrame(ABook[type_str])
                 csv_path= "raw_data_addreses_"+type_str+".csv"
                 csv_path= os.path.join(saving_path,csv_path)
                 addreses.to_csv(csv_path, sep=',',index=False)
        if ErrorList:
            dfError = pd.DataFrame()
            dfError['ErrorData'] = ErrorList
            eror= os.path.join(saving_path,"CanNotOpenTheseFiles.csv")
            print("Some data could not be opened by the pipline- Check CanNotOpenTheseFiles.csv for more information")
            dfError.to_csv(eror,index=False)

        print('\n\ncsv files were created:' + str(saving_path))
        print('\n\n%%%%%%%%%%%%% End of stage 1 %%%%%%%%%%%%%%%'.upper())

        # to make exel file as an output you can uncomment below lines
        # for i,T in enumerate(Types):
        #     globals()['df'+ str(i)] = pd.DataFrame(ABook[T])
    
    
        # saving_path2 = saving_path + 'QuiC_Data_Result_raw.xlsx'
        # writer = pd.ExcelWriter(saving_path2, engine='xlsxwriter')
    
        
        # for i,T in enumerate(Types_new):
        #     globals()['df'+ str(i)].to_excel(writer,sheet_name=T, index = False)
    
    
        # dfError.to_excel(writer, sheet_name='ErrorData',index = False)
    
        # writer.save()
    
        # print('\n\nExcel file was created:' + str(saving_path2))
        # print('\n\n%%%%%%%%%%%%%End of the first stage%%%%%%%%%%%%%%%'.upper())        
    
    #%% Parsing nifti format

    elif format_type=="nifti":

        
        PathALL = os.path.join(initial_path,"**","*" + suffix + ".nii.gz")
        PathALL2 = os.path.join(initial_path,"**","*" + suffix + ".nii")
        with ap.alive_bar(title='Parsing through folders ...',length=10,stats = False,monitor=False) as bar:
            text_files = glob.glob(PathALL, recursive = True) + glob.glob(PathALL2, recursive = True)
            kall = len(text_files)
        
        print(( 'Total number of '+ str(kall) + ' files were found:'+'Parsing finished! '.upper()).upper())
        ABook={}
        for i in range(len(type_strs)):         #Creation of Adress Book
            ABook[type_strs[i]] = []
        
# =============================================================================
#         text_files2 = [os.path.split(t)[-1] for t in text_files]
#         text_files3,Index = np.unique(text_files2,return_index=True)
#         text_files = [text_files[ii] for ii in Index]        
# ============================================================================


        for i,T in enumerate(type_strs):
            globals()['df'+ str(i)] = pd.DataFrame(ABook[T])        
        
        for i in text_files :
            
            
            for rr,Q in enumerate(i.split(os.sep)):
                temp = i.split(os.sep)[-rr].upper()
    
                Flag_anat = any([(aa in temp) for aa in T2_string])
                Flag_struct = any([(aa in temp) for aa in DTI_string])
                Flag_func = any([(aa in temp) for aa in FMRI_string])
                Flag_notAllowed = any([(aa in temp) for aa in NotAllowed])
                
                if any([Flag_anat,Flag_struct,Flag_func,Flag_notAllowed]):
                    break

            
            if not any([Flag_anat,Flag_struct,Flag_func]):
                continue
                print("The sequence type is ambiguous between registered nifti files.")
                print("To solve this problem use the following names to define sequences in ther path name:")
                print(DTI_string)
                print(FMRI_string)
                print(T2_string)
                print('Avoid using "EPI"!')
            
                
            if Flag_struct and not Flag_notAllowed:
                ABook["diff"].append(i)
            elif Flag_func and not Flag_notAllowed:
                ABook["func"].append(i)
            elif Flag_anat and not Flag_notAllowed:
                ABook["anat"].append(i)

        #saving in csv file
        for n,type_str in enumerate(type_strs):
             if len(ABook[type_str]) !=0:
                 addreses= pd.DataFrame(ABook[type_str])
                 csv_path= "nifti_data_addreses_"+type_str+".csv"
                 csv_path= os.path.join(saving_path,csv_path)
                 addreses.to_csv(csv_path, sep=',',index=False)


    print('\n\ncsv files were created:' + str(saving_path))
    print('\n\n%%%%%%%%%%%%% End of the stage 1 %%%%%%%%%%%%%%%'.upper())
    print('\nStarting Stage 2 ...'.upper())
    #print('\nChosen Sequences are: ')
    #print(sequence_types)
    print('\nCalculating features...\n'.upper())
    print('This will take some time depending on the size of the dataset. See the progress bar below.\n\n')
    if format_type=="raw":
        fc.CheckingRawFeatures(saving_path)
        QC.toc()
    elif format_type=="nifti":
        fc.CheckingNiftiFeatures(saving_path)
        QC.toc()
    
     
    print('Plotting quality features...\n'.upper())
    QC.QCPlot(saving_path)
    QC.QCtable(saving_path, format_type)

    # remove addressed files
    for file in glob.glob(os.path.join(saving_path, '*data_addreses*.csv')) :
        os.remove(file) 
        
    # relocate calculated fearures

    calculated_features = os.path.join(saving_path, "calculated_features")
    os.mkdir(calculated_features) 
    old_files=[]
    for old_file in glob.glob(os.path.join(saving_path, '*caculated_features*.csv')) :
        old_files.append(old_file)

    new_direction= os.path.join(saving_path,"calculated_features")
    new_files=[]
    for old_file in old_files:
        path_split= os.path.split(old_file)
        files_name= path_split[1]
        join_path= os.path.join(new_direction,files_name)
        new_files.append(join_path)
                         
    for new_file,old_file in enumerate(old_files) :

        shutil.move(old_file , new_files[new_file])
    
    print('\n\n%%%%%%%%%%%%%Quality feature plots were successfully created and saved%%%%%%%%%%%%%%%\n\n'.upper())
    
    print('------------------------------------------------------------')
    print('Thank you for using our code. For questions please contact us via:')
    print('aref.kalantari-sarcheshmeh@uk-koeln.de or markus.aswendt@uk-koeln.de')
    print('Lab: AG Neuroimaging and neuroengineering of experimental stroke University Hospital Cologne')
    print('Web:https://neurologie.uk-koeln.de/forschung/ag-neuroimaging-neuroengineering/')
    print('------------------------------------------------------------')