doi
/
2023_Kalantari_AIDAqc
forked from Aswendt_Lab/2023_Kalantari_AIDAqc


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
							import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, f_oneway
import glob
import os
from scipy.stats import ttest_ind
from statsmodels.stats.multitest import multipletests
import re

#% Function to read CSV files and store them in a dictionary

cm = 1/2.54  # centimeters in inches
def read_csv_files(files):
    data_dict = {}
    for ff,file in enumerate(files):
        df = pd.read_csv(file)    
        # Extract the data type from the file name (assuming the file name contains "anatomical", "structural", or "functional")
        data_type = "anatomical" if "anatomical" in file else ("structural" if "structural" in file else "functional")
        data_dict[ff] = {data_type:df} 
            
    return data_dict


# Function for statistical comparison and plotting
def compare_and_plot(data, column_name, group_column):
    sns.set_palette("Set2")  # Use colors for color-blind people
    plt.figure(figsize=(9*cm, 6*cm))
    sns.boxplot(x=group_column, y=column_name, data=data)
    plt.xlabel(group_column)
    plt.ylabel(column_name)
    plt.title(f"Statistical Comparison of {column_name} by {group_column}")
    plt.tight_layout()
    #plt.savefig(f"{column_name}_by_{group_column}_boxplot.png")
    plt.show()

#% List of CSV files for each data type
Path = r"C:\Users\aswen\Documents\Data\2023_Kalantari_AIDAqc\outputs\validation\QC_SameFSdiffVS" #QC_Plot_VoxelsSize

anatomical_files = glob.glob(os.path.join(Path,"**/*caculated_features_anatomical.csv"), recursive=True)
structural_files = glob.glob(os.path.join(Path,"**/*caculated_features_structural.csv"), recursive=True)
functional_files = glob.glob(os.path.join(Path,"**/*caculated_features_functional.csv"), recursive=True)

All_files = [anatomical_files,structural_files,functional_files]

# Read the CSV files and store them in dictionaries
anatomical_data = read_csv_files(anatomical_files)
structural_data = read_csv_files(structural_files)
functional_data = read_csv_files(functional_files)
All_Data = [anatomical_data]

All_type = ["anatomical"]
#% data statistisc figure 7

#features_to_compare = ["SNR Chang", "SNR Normal", "tSNR (Averaged Brain ROI)", "Displacement factor (std of Mutual information)"]
features_to_compare = ["SNR Chang"]
#features_to_compare = ["Vol"]

for dd,data in enumerate(All_Data):
    for feature in features_to_compare:
        cc = 0
        temp = pd.DataFrame()
        Data_of_selected_feature = pd.DataFrame()
        temp_data = pd.DataFrame()

        for key in data:
            try:
                temp_data[feature] = data[key][All_type[dd]][feature]
                temp_data["SpatRx"] = data[key][All_type[dd]]["SpatRx"]
                temp_data["SpatRy"] = data[key][All_type[dd]]["SpatRy"]
                temp_data["Slicethick"] = data[key][All_type[dd]]["Slicethick"]
                
            except KeyError:
                continue
            temp_data["Dataset"] = All_files[dd][cc].split(os.sep)[-3]
            cc = cc +1
            Data_of_selected_feature = pd.concat([Data_of_selected_feature, temp_data], ignore_index=True)
            #Data_of_selected_feature2 = pd.concat([Data_of_selected_feature2, temp_data], ignore_index=True)
            
            
        if not Data_of_selected_feature.empty:
            Data_of_selected_feature['sort'] = Data_of_selected_feature['Dataset'].str.extract('(\d+)', expand=True).astype(int)
            Data_of_selected_feature = Data_of_selected_feature.sort_values('Dataset')
            
            if feature == "SNR Normal":
                Data_of_selected_feature.rename(columns={"SNR Normal": "SNR-Standard (dB)"}, inplace=True)
                feature = "SNR-Standard (dB)"
            if feature == "SNR Chang":
                Data_of_selected_feature.rename(columns={"SNR Chang": "SNR-Chang (dB)"}, inplace=True)
                feature = "SNR-Chang (dB)"    
            elif feature == "tSNR (Averaged Brain ROI)":
                Data_of_selected_feature.rename(columns={"tSNR (Averaged Brain ROI)": "tSNR (dB)"}, inplace=True)
                feature = "tSNR (dB)"
            elif feature == "Displacement factor (std of Mutual information)":
                Data_of_selected_feature.rename(columns={"Displacement factor (std of Mutual information)": "Motion severity (A.U)"}, inplace=True)
                feature = "Motion severity (A.U)"
            
            
            Data_of_selected_feature["Vol"] = Data_of_selected_feature["SpatRx"]*Data_of_selected_feature["SpatRy"]*Data_of_selected_feature["Slicethick"]
            #Data_of_selected_feature = Data_of_selected_feature.sort_values("Dataset",ascending=False)
            # creating boxplots
            plt.figure(figsize=(4.5*cm,5*cm),dpi=300)
            sns.set_style('ticks')
            sns.set(font='Times New Roman', font_scale=0.8,style=None)  # Set font to Times New Roman and font size to 9
            palette = 'pastel'
            ax = sns.violinplot(x="Dataset", y=feature, data=Data_of_selected_feature, hue="Dataset", dodge=False,
                                palette=palette,
                                scale="width", inner=None,linewidth=1)
            patches = ax.patches
            #legend_colors = [patch.get_facecolor() for patch in patches[:]]

            xlim = ax.get_xlim()
            ylim = ax.get_ylim()
            for violin in ax.collections:
                bbox = violin.get_paths()[0].get_extents()
                x0, y0, width, height = bbox.bounds
                violin.set_clip_path(plt.Rectangle((x0, y0), width / 2, height, transform=ax.transData))
            
            sns.boxplot(x="Dataset", y=feature, data=Data_of_selected_feature, saturation=1, showfliers=False,
                        width=0.3, boxprops={'zorder': 3, 'facecolor': 'none'}, ax=ax, linewidth=1)
            old_len_collections = len(ax.collections)
            sns.stripplot(x="Dataset", y=feature, data=Data_of_selected_feature,size=1.1, hue="Dataset", palette=palette, dodge=False, ax=ax)
            for dots in ax.collections[old_len_collections:]:
                dots.set_offsets(dots.get_offsets() + np.array([0.12, 0]))
            ax.set_xlim(xlim)
            ax.set_ylim(ylim)
            ax.legend_.remove()
            ax
            ax.set_xticklabels(ax.get_xticklabels(), rotation=45,fontsize=8)
            #ax.set_ylabel(ax.get_ylabels(),fontsize=8)
# =============================================================================
#             for label, color in zip(ax.get_xticklabels(), legend_colors):
#                 label.set_color(color)
# =============================================================================
            ax.set_xlabel('')
            ax.set_title("(b) Voxel size effect",weight='bold',fontsize=10)
            y_label = ax.set_ylabel(ax.get_ylabel(),fontsize=8)

# =============================================================================
            ax.xaxis.grid(True, linestyle='-', which='major', color='gray', linewidth=0.5)
            ax.xaxis.grid(True, linestyle='--', which='minor', color='gray', linewidth=0.5)
# 
            ax.yaxis.grid(True, linestyle='-', which='major', color='gray', linewidth=0.5)
            ax.yaxis.grid(True, linestyle='--', which='minor', color='gray', linewidth=0.5)        
# =============================================================================
            ax.spines['top'].set_linewidth(0)  # Top border
            ax.spines['right'].set_linewidth(0)  # Right border
            ax.spines['bottom'].set_linewidth(0.5)  # Bottom border
            ax.spines['left'].set_linewidth(0.5)  # Left border
            ax.tick_params(axis='both', which='both', width=0.5,color='gray',length=2)
            plt.xticks(ha='right')
            #plt.savefig(os.path.join(Path,feature+"_"+All_type[dd]+".svg"), format='svg', bbox_inches='tight',transparent=False)
            plt.show()
                        
            import pandas as pd
            import scikit_posthocs as sp
            from scipy.stats import kruskal

            # reading CSV file
            #dataset= pd.read_csv(r"C:\Users\arefk\OneDrive\Desktop\Projects\iris.csv")
             
            # data which contains sepal width of the three species
            # data = [Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_Rei"]['Vol'],
            #        Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_We"]['Vol'],
            #        Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94c_m_As"]['Vol']]
            
            data = [Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_As"][feature],
                    Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_Ce"][feature],
                    Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_Je"][feature]]
            
            
            # Perform Kruskal-Wallis test
            kruskal_statistic, p_value_kruskal = kruskal(*data,nan_policy="omit")
            print(kruskal_statistic)
            print(p_value_kruskal)
            # using the posthoc_dunn() function
            p_values= sp.posthoc_dunn(data, p_adjust = 'holm')
             
            print(p_values)