SameFSdifferentVS.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import seaborn as sns
  5. from scipy.stats import ttest_ind, f_oneway
  6. import glob
  7. import os
  8. from scipy.stats import ttest_ind
  9. from statsmodels.stats.multitest import multipletests
  10. import re
  11. #% Function to read CSV files and store them in a dictionary
  12. cm = 1/2.54 # centimeters in inches
  13. def read_csv_files(files):
  14. data_dict = {}
  15. for ff,file in enumerate(files):
  16. df = pd.read_csv(file)
  17. # Extract the data type from the file name (assuming the file name contains "anatomical", "structural", or "functional")
  18. data_type = "anatomical" if "anatomical" in file else ("structural" if "structural" in file else "functional")
  19. data_dict[ff] = {data_type:df}
  20. return data_dict
  21. # Function for statistical comparison and plotting
  22. def compare_and_plot(data, column_name, group_column):
  23. sns.set_palette("Set2") # Use colors for color-blind people
  24. plt.figure(figsize=(9*cm, 6*cm))
  25. sns.boxplot(x=group_column, y=column_name, data=data)
  26. plt.xlabel(group_column)
  27. plt.ylabel(column_name)
  28. plt.title(f"Statistical Comparison of {column_name} by {group_column}")
  29. plt.tight_layout()
  30. #plt.savefig(f"{column_name}_by_{group_column}_boxplot.png")
  31. plt.show()
  32. #% List of CSV files for each data type
  33. Path = r"C:\Users\aswen\Documents\Data\2023_Kalantari_AIDAqc\outputs\validation\QC_SameFSdiffVS" #QC_Plot_VoxelsSize
  34. anatomical_files = glob.glob(os.path.join(Path,"**/*caculated_features_anatomical.csv"), recursive=True)
  35. structural_files = glob.glob(os.path.join(Path,"**/*caculated_features_structural.csv"), recursive=True)
  36. functional_files = glob.glob(os.path.join(Path,"**/*caculated_features_functional.csv"), recursive=True)
  37. All_files = [anatomical_files,structural_files,functional_files]
  38. # Read the CSV files and store them in dictionaries
  39. anatomical_data = read_csv_files(anatomical_files)
  40. structural_data = read_csv_files(structural_files)
  41. functional_data = read_csv_files(functional_files)
  42. All_Data = [anatomical_data]
  43. All_type = ["anatomical"]
  44. #% data statistisc figure 7
  45. #features_to_compare = ["SNR Chang", "SNR Normal", "tSNR (Averaged Brain ROI)", "Displacement factor (std of Mutual information)"]
  46. features_to_compare = ["SNR Chang"]
  47. #features_to_compare = ["Vol"]
  48. for dd,data in enumerate(All_Data):
  49. for feature in features_to_compare:
  50. cc = 0
  51. temp = pd.DataFrame()
  52. Data_of_selected_feature = pd.DataFrame()
  53. temp_data = pd.DataFrame()
  54. for key in data:
  55. try:
  56. temp_data[feature] = data[key][All_type[dd]][feature]
  57. temp_data["SpatRx"] = data[key][All_type[dd]]["SpatRx"]
  58. temp_data["SpatRy"] = data[key][All_type[dd]]["SpatRy"]
  59. temp_data["Slicethick"] = data[key][All_type[dd]]["Slicethick"]
  60. except KeyError:
  61. continue
  62. temp_data["Dataset"] = All_files[dd][cc].split(os.sep)[-3]
  63. cc = cc +1
  64. Data_of_selected_feature = pd.concat([Data_of_selected_feature, temp_data], ignore_index=True)
  65. #Data_of_selected_feature2 = pd.concat([Data_of_selected_feature2, temp_data], ignore_index=True)
  66. if not Data_of_selected_feature.empty:
  67. Data_of_selected_feature['sort'] = Data_of_selected_feature['Dataset'].str.extract('(\d+)', expand=True).astype(int)
  68. Data_of_selected_feature = Data_of_selected_feature.sort_values('Dataset')
  69. if feature == "SNR Normal":
  70. Data_of_selected_feature.rename(columns={"SNR Normal": "SNR-Standard (dB)"}, inplace=True)
  71. feature = "SNR-Standard (dB)"
  72. if feature == "SNR Chang":
  73. Data_of_selected_feature.rename(columns={"SNR Chang": "SNR-Chang (dB)"}, inplace=True)
  74. feature = "SNR-Chang (dB)"
  75. elif feature == "tSNR (Averaged Brain ROI)":
  76. Data_of_selected_feature.rename(columns={"tSNR (Averaged Brain ROI)": "tSNR (dB)"}, inplace=True)
  77. feature = "tSNR (dB)"
  78. elif feature == "Displacement factor (std of Mutual information)":
  79. Data_of_selected_feature.rename(columns={"Displacement factor (std of Mutual information)": "Motion severity (A.U)"}, inplace=True)
  80. feature = "Motion severity (A.U)"
  81. Data_of_selected_feature["Vol"] = Data_of_selected_feature["SpatRx"]*Data_of_selected_feature["SpatRy"]*Data_of_selected_feature["Slicethick"]
  82. #Data_of_selected_feature = Data_of_selected_feature.sort_values("Dataset",ascending=False)
  83. # creating boxplots
  84. plt.figure(figsize=(4.5*cm,5*cm),dpi=300)
  85. sns.set_style('ticks')
  86. sns.set(font='Times New Roman', font_scale=0.8,style=None) # Set font to Times New Roman and font size to 9
  87. palette = 'pastel'
  88. ax = sns.violinplot(x="Dataset", y=feature, data=Data_of_selected_feature, hue="Dataset", dodge=False,
  89. palette=palette,
  90. scale="width", inner=None,linewidth=1)
  91. patches = ax.patches
  92. #legend_colors = [patch.get_facecolor() for patch in patches[:]]
  93. xlim = ax.get_xlim()
  94. ylim = ax.get_ylim()
  95. for violin in ax.collections:
  96. bbox = violin.get_paths()[0].get_extents()
  97. x0, y0, width, height = bbox.bounds
  98. violin.set_clip_path(plt.Rectangle((x0, y0), width / 2, height, transform=ax.transData))
  99. sns.boxplot(x="Dataset", y=feature, data=Data_of_selected_feature, saturation=1, showfliers=False,
  100. width=0.3, boxprops={'zorder': 3, 'facecolor': 'none'}, ax=ax, linewidth=1)
  101. old_len_collections = len(ax.collections)
  102. sns.stripplot(x="Dataset", y=feature, data=Data_of_selected_feature,size=1.1, hue="Dataset", palette=palette, dodge=False, ax=ax)
  103. for dots in ax.collections[old_len_collections:]:
  104. dots.set_offsets(dots.get_offsets() + np.array([0.12, 0]))
  105. ax.set_xlim(xlim)
  106. ax.set_ylim(ylim)
  107. ax.legend_.remove()
  108. ax
  109. ax.set_xticklabels(ax.get_xticklabels(), rotation=45,fontsize=8)
  110. #ax.set_ylabel(ax.get_ylabels(),fontsize=8)
  111. # =============================================================================
  112. # for label, color in zip(ax.get_xticklabels(), legend_colors):
  113. # label.set_color(color)
  114. # =============================================================================
  115. ax.set_xlabel('')
  116. ax.set_title("(b) Voxel size effect",weight='bold',fontsize=10)
  117. y_label = ax.set_ylabel(ax.get_ylabel(),fontsize=8)
  118. # =============================================================================
  119. ax.xaxis.grid(True, linestyle='-', which='major', color='gray', linewidth=0.5)
  120. ax.xaxis.grid(True, linestyle='--', which='minor', color='gray', linewidth=0.5)
  121. #
  122. ax.yaxis.grid(True, linestyle='-', which='major', color='gray', linewidth=0.5)
  123. ax.yaxis.grid(True, linestyle='--', which='minor', color='gray', linewidth=0.5)
  124. # =============================================================================
  125. ax.spines['top'].set_linewidth(0) # Top border
  126. ax.spines['right'].set_linewidth(0) # Right border
  127. ax.spines['bottom'].set_linewidth(0.5) # Bottom border
  128. ax.spines['left'].set_linewidth(0.5) # Left border
  129. ax.tick_params(axis='both', which='both', width=0.5,color='gray',length=2)
  130. plt.xticks(ha='right')
  131. #plt.savefig(os.path.join(Path,feature+"_"+All_type[dd]+".svg"), format='svg', bbox_inches='tight',transparent=False)
  132. plt.show()
  133. import pandas as pd
  134. import scikit_posthocs as sp
  135. from scipy.stats import kruskal
  136. # reading CSV file
  137. #dataset= pd.read_csv(r"C:\Users\arefk\OneDrive\Desktop\Projects\iris.csv")
  138. # data which contains sepal width of the three species
  139. # data = [Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_Rei"]['Vol'],
  140. # Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_We"]['Vol'],
  141. # Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94c_m_As"]['Vol']]
  142. data = [Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_As"][feature],
  143. Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_Ce"][feature],
  144. Data_of_selected_feature[Data_of_selected_feature['Dataset']=="94_m_Je"][feature]]
  145. # Perform Kruskal-Wallis test
  146. kruskal_statistic, p_value_kruskal = kruskal(*data,nan_policy="omit")
  147. print(kruskal_statistic)
  148. print(p_value_kruskal)
  149. # using the posthoc_dunn() function
  150. p_values= sp.posthoc_dunn(data, p_adjust = 'holm')
  151. print(p_values)