Confusionplot.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri Sep 1 17:57:59 2023
  4. @author: arefks
  5. """
  6. import glob
  7. import pandas as pd
  8. import numpy as np
  9. import matplotlib.pyplot as plt
  10. import seaborn as sns
  11. from scipy.stats import ttest_ind, f_oneway
  12. import glob
  13. import os
  14. from scipy.stats import ttest_ind
  15. from statsmodels.stats.multitest import multipletests
  16. import re
  17. from statsmodels.stats.inter_rater import fleiss_kappa
  18. from statsmodels.stats.inter_rater import aggregate_raters
  19. #%function
  20. def calculate_and_display_fleiss_kappa(data):
  21. kappa, _ = fleiss_kappa(data)
  22. print("Fleiss' Kappa:", kappa)
  23. return kappa
  24. #%% Expert
  25. Path_votings = r"Z:\2023_Kalantari_AIDAqc\outputs\QC_Final\validation\*\votings.csv"
  26. All_csv_votings = glob.glob(Path_votings)
  27. def read_csv_files(files):
  28. data_dict = {}
  29. for ff,file in enumerate(files):
  30. df = pd.read_csv(file)
  31. #data_dict[ff] = df[df["Voting outliers (from 5)"]]
  32. data_dict[ff] = df
  33. return data_dict
  34. All_values = read_csv_files(All_csv_votings)
  35. img_values = []
  36. for df_name, df in All_values.items():
  37. if "corresponding_img" in df.columns:
  38. img_values.extend(df["corresponding_img"].tolist())
  39. new_df = pd.DataFrame({"corresponding_img": img_values})
  40. MU_strings = ["adam","joanes"]
  41. Sequence_type = ["anatomical"]
  42. cc=0
  43. cm = 1/2.54 # centimeters in inches
  44. plt.figure(figsize=(18*cm, 9*cm))
  45. fig, ax = plt.subplots(1,2, dpi=300,figsize=(18*cm, 9*cm))#,sharex="row",sharey="row")
  46. #fig.suptitle('Confusion matrix',fontname='Times New Roman')
  47. Matric_sequneces = []
  48. for ss,S in enumerate(Sequence_type):
  49. for mu,MU in enumerate(MU_strings):
  50. Path = r"Z:\2023_Kalantari_AIDAqc\outputs\QC_Final\validation\*\votings.csv"
  51. p_afs = os.path.join(os.path.dirname(Path),S+"*.png")
  52. afs_all = glob.glob(p_afs,recursive=True)
  53. afs_all = [os.path.basename(path) for path in afs_all]
  54. count_afs_all = len(afs_all)
  55. Path_gt = r"Z:\2023_Kalantari_AIDAqc\outputs\QC_Final\validation\*\validation_" + MU
  56. pgt_afs = os.path.join(Path_gt, S+"*.png")
  57. afsgt_all = glob.glob(pgt_afs,recursive=True)
  58. afsgt_all = [os.path.basename(path) for path in afsgt_all]
  59. countgt_afs_bad = len(afsgt_all)
  60. countgt_afs_good = count_afs_all - countgt_afs_bad
  61. # Separate the new_df DataFrame based on specific prefixes
  62. afsqc_all = new_df[new_df['corresponding_img'].str.startswith(S)]['corresponding_img'].tolist()
  63. countqc_afs_bad = len(afsqc_all)
  64. countqc_afs_good = count_afs_all - countqc_afs_bad
  65. afs_intersect_qc_gt = set(afsgt_all) & set(afsqc_all)
  66. # =============================================================================
  67. # afs_TN = len(afs_intersect_qc_gt)
  68. # afs_FN = countqc_afs_bad - afs_TN
  69. # afs_FP = countgt_afs_bad - afs_TN
  70. # afs_TP = countgt_afs_good - afs_FN
  71. # =============================================================================
  72. afs_TP = len(afs_intersect_qc_gt)
  73. afs_FN = countgt_afs_bad - afs_TP
  74. afs_FP = countqc_afs_bad - afs_TP
  75. afs_TN = countgt_afs_good - afs_FP
  76. # =============================================================================
  77. afs_percent_TP = (afs_TP / countgt_afs_bad)
  78. afs_percent_FN = (1 - afs_percent_TP)
  79. afs_percent_FP = (afs_FP /countgt_afs_good)
  80. afs_percent_TN = (1 - afs_percent_FP)
  81. # =============================================================================
  82. # Calculate precision
  83. precision = afs_TP / (afs_TP + afs_FP)
  84. #print("precision"+str(precision))
  85. # Calculate recall
  86. recall = afs_TP / (afs_TP + afs_FN)
  87. #print("recall:"+str(recall))
  88. # Calculate F1 score
  89. f1_score = 2 * (precision * recall) / (precision + recall)
  90. # Print the F1 score
  91. #print("F1 Score:", f1_score)
  92. confusion_matrix = [[afs_percent_TP, afs_percent_FN],
  93. [afs_percent_FP, afs_percent_TN]]
  94. # =============================================================================
  95. # Per = afs_TP + afs_FN+afs_FP +afs_TN
  96. # confusion_matrix = [[afs_TP/Per, afs_FN/Per],
  97. # [afs_FP/Per, afs_TN/Per]]
  98. #
  99. # =============================================================================
  100. # Create a heatmap using Seaborn
  101. sns.set(font_scale=0.8) # Adjust the font size
  102. heatmap = sns.heatmap(confusion_matrix, annot=True, fmt='.2%', cmap='Greys',
  103. annot_kws={"fontname": "Times New Roman"},
  104. xticklabels=False, yticklabels=False, cbar=False,ax=ax[mu,ss])
  105. ax[mu, ss].set_xlabel('AIDAqc', fontname='Times New Roman')
  106. ax[mu, ss].set_ylabel("Test", fontname='Times New Roman')
  107. ax[mu, ss].set_title(S.capitalize()+'\n F1-score: %.2f' % f1_score + "", fontname='Times New Roman', weight="bold")
  108. ax[mu, ss].set_xticks([0.5, 1.5])
  109. ax[mu, ss].set_xticklabels(['bad', 'good'], fontname='Times New Roman')
  110. ax[mu, ss].set_yticks([0.5, 1.5])
  111. ax[mu, ss].set_yticklabels(['bad', 'good'], fontname='Times New Roman',rotation=90)
  112. Vec = [0 if item in afsgt_all else 1 for item in afs_all]
  113. Matric_sequneces.append(Vec)
  114. Vec_qc = [0 if item in afsqc_all else 1 for item in afs_all]
  115. Matric_sequneces.append(Vec_qc)
  116. # Show the plot
  117. #plt.subplots_adjust(left=0.00, right=1, top=1, bottom=0.00, wspace=0.3, hspace=1.8)
  118. plt.tight_layout()
  119. plt.show()
  120. Stacked_anat = np.stack(Matric_sequneces[0:4],axis=0).transpose()
  121. Stacked_func = np.stack(Matric_sequneces[4:8],axis=0).transpose()
  122. Stacked_struct = np.stack(Matric_sequneces[8:12],axis=0).transpose()
  123. Stacked_anat_bool = Stacked_anat.astype(bool)
  124. Stacked_func_bool = Stacked_func.astype(bool)
  125. Stacked_struct_bool = Stacked_struct.astype(bool)
  126. # Define column labels
  127. column_labels = ["experienced", "expert", "aidaqc"]
  128. # Create pandas DataFrames
  129. df_anat = pd.DataFrame(Stacked_anat_bool, columns=column_labels)
  130. df_func = pd.DataFrame(Stacked_func_bool, columns=column_labels)
  131. df_struct = pd.DataFrame(Stacked_struct_bool, columns=column_labels)
  132. from sklearn.metrics import cohen_kappa_score
  133. # Create a list of DataFrames
  134. dataframes = [df_anat, df_func, df_struct]
  135. # Loop through the DataFrames
  136. for i, df in enumerate(dataframes):
  137. print(f"DataFrame {i + 1}:")
  138. for col1 in df.columns:
  139. for col2 in df.columns:
  140. if col1 != col2:
  141. kappa_score = cohen_kappa_score(df[col1], df[col2])
  142. print(f"Cohen's Kappa Score between {col1} and {col2}: {kappa_score:.4f}")
  143. print()