artifact_comparisons.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import os
  2. import glob
  3. import pandas as pd
  4. import matplotlib.pyplot as plt
  5. import seaborn as sns
  6. from scipy.stats import ttest_ind
  7. # Initial path
  8. init_path = r"C:\Users\aswen\Desktop\TestingData\Aswendt_qc_rsfmri_plot"
  9. # Define the search paths for noisy files and all files
  10. searchpath = os.path.join(init_path, "QCs*", "votings.csv")
  11. # List of all votings.csv files
  12. file_list = glob.glob(searchpath)
  13. # Create an empty DataFrame to append all data
  14. df_all = pd.DataFrame()
  15. # Read each CSV file and append to df_all
  16. for file_path in file_list:
  17. df = pd.read_csv(file_path)
  18. df_all = pd.concat([df_all, df], ignore_index=True)
  19. # Remove duplicates from df_all
  20. df_all.drop_duplicates(inplace=True)
  21. # Extract paths without artifacts
  22. df_all["path_check"] = df_all["Pathes"].str.replace("_strong_artifact.nii.gz", ".gz", regex=False)
  23. # Identify rows with artifacts
  24. df_artifact = df_all[df_all["Pathes"].str.contains("artifact")]
  25. # Initialize an empty DataFrame for non-artifact paths
  26. df_no_artifact = pd.DataFrame(columns=df_all.columns)
  27. # Process each unique path_check in artifact DataFrame
  28. for pp in df_artifact["path_check"].unique():
  29. # Check if there are matching non-artifact paths in df_all
  30. matching_non_artifact = df_all[df_all["Pathes"] == pp]
  31. if not matching_non_artifact.empty:
  32. df_no_artifact = pd.concat([df_no_artifact, matching_non_artifact])
  33. else:
  34. # If no matching non-artifact path, add a row with Voting outliers set to 0
  35. new_row = pd.DataFrame({"Pathes": [pp], "Voting outliers (from 5)": [0]})
  36. df_no_artifact = pd.concat([df_no_artifact, new_row], ignore_index=True)
  37. # Prepare the data for seaborn boxplot
  38. df_artifact["Artifact"] = "artifact induced"
  39. df_no_artifact["Artifact"] = "original"
  40. plot_data = pd.concat([df_no_artifact, df_artifact])
  41. # Set font to Times New Roman
  42. plt.rcParams["font.family"] = "Times New Roman"
  43. cm = 1 / 2.54
  44. # Create the seaborn boxplot with customized settings
  45. plt.figure(figsize=(4.5 * cm, 3 * cm), dpi=300)
  46. sns.boxplot(x="Artifact", y="Voting outliers (from 5)", data=plot_data, palette="Set2", linewidth=1, width=0.5, showfliers=False)
  47. # Add points representing each data point within each box
  48. sns.stripplot(x="Artifact", y="Voting outliers (from 5)", data=plot_data, jitter=True, color="black", alpha=0.5, size=2)
  49. # Calculate averages for "Original" and "Artifact Induced" groups
  50. avg_original = plot_data[plot_data["Artifact"] == "original"]["Voting outliers (from 5)"].mean()
  51. avg_artifact_induced = plot_data[plot_data["Artifact"] == "artifact induced"]["Voting outliers (from 5)"].mean()
  52. # Connect the averages with a line plot
  53. plt.plot(["original", "artifact induced"], [avg_original, avg_artifact_induced], color="grey", linestyle="--", linewidth=0.4)
  54. plt.ylabel("Majority Votes", fontsize=8) # Set ylabel to "Majority Votes"
  55. plt.xticks(fontsize=8) # Set xtick font size
  56. plt.yticks(fontsize=8) # Set ytick font size
  57. plt.xlabel(None) # Remove x-label
  58. plt.ylim([-1, 7])
  59. # Perform Welch's t-test between the two groups
  60. statistic, p_value = ttest_ind(df_artifact["Voting outliers (from 5)"], df_no_artifact["Voting outliers (from 5)"], equal_var=False)
  61. print(f"Welch's t-test p-value: {p_value}")
  62. output_path = r"C:\Users\aswen\Desktop\Code\AIDAqc_Figures\figures\pythonCreated\VotingProof"
  63. plt.tight_layout()
  64. # Save as SVG
  65. plt.savefig(output_path + ".svg", format='svg')
  66. plt.show()