123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- import os
- import glob
- import pandas as pd
- import matplotlib.pyplot as plt
- import seaborn as sns
- from scipy.stats import ttest_ind
- # Initial path
- init_path = r"C:\Users\aswen\Desktop\TestingData\Aswendt_qc_rsfmri_plot"
- # Define the search paths for noisy files and all files
- searchpath = os.path.join(init_path, "QCs*", "votings.csv")
- # List of all votings.csv files
- file_list = glob.glob(searchpath)
- # Create an empty DataFrame to append all data
- df_all = pd.DataFrame()
- # Read each CSV file and append to df_all
- for file_path in file_list:
- df = pd.read_csv(file_path)
- df_all = pd.concat([df_all, df], ignore_index=True)
- # Remove duplicates from df_all
- df_all.drop_duplicates(inplace=True)
- # Extract paths without artifacts
- df_all["path_check"] = df_all["Pathes"].str.replace("_strong_artifact.nii.gz", ".gz", regex=False)
- # Identify rows with artifacts
- df_artifact = df_all[df_all["Pathes"].str.contains("artifact")]
- # Initialize an empty DataFrame for non-artifact paths
- df_no_artifact = pd.DataFrame(columns=df_all.columns)
- # Process each unique path_check in artifact DataFrame
- for pp in df_artifact["path_check"].unique():
- # Check if there are matching non-artifact paths in df_all
- matching_non_artifact = df_all[df_all["Pathes"] == pp]
- if not matching_non_artifact.empty:
- df_no_artifact = pd.concat([df_no_artifact, matching_non_artifact])
- else:
- # If no matching non-artifact path, add a row with Voting outliers set to 0
- new_row = pd.DataFrame({"Pathes": [pp], "Voting outliers (from 5)": [0]})
- df_no_artifact = pd.concat([df_no_artifact, new_row], ignore_index=True)
- # Prepare the data for seaborn boxplot
- df_artifact["Artifact"] = "artifact induced"
- df_no_artifact["Artifact"] = "original"
- plot_data = pd.concat([df_no_artifact, df_artifact])
- # Set font to Times New Roman
- plt.rcParams["font.family"] = "Times New Roman"
- cm = 1 / 2.54
- # Create the seaborn boxplot with customized settings
- plt.figure(figsize=(4.5 * cm, 3 * cm), dpi=300)
- sns.boxplot(x="Artifact", y="Voting outliers (from 5)", data=plot_data, palette="Set2", linewidth=1, width=0.5, showfliers=False)
- # Add points representing each data point within each box
- sns.stripplot(x="Artifact", y="Voting outliers (from 5)", data=plot_data, jitter=True, color="black", alpha=0.5, size=2)
- # Calculate averages for "Original" and "Artifact Induced" groups
- avg_original = plot_data[plot_data["Artifact"] == "original"]["Voting outliers (from 5)"].mean()
- avg_artifact_induced = plot_data[plot_data["Artifact"] == "artifact induced"]["Voting outliers (from 5)"].mean()
- # Connect the averages with a line plot
- plt.plot(["original", "artifact induced"], [avg_original, avg_artifact_induced], color="grey", linestyle="--", linewidth=0.4)
- plt.ylabel("Majority Votes", fontsize=8) # Set ylabel to "Majority Votes"
- plt.xticks(fontsize=8) # Set xtick font size
- plt.yticks(fontsize=8) # Set ytick font size
- plt.xlabel(None) # Remove x-label
- plt.ylim([-1, 7])
- # Perform Welch's t-test between the two groups
- statistic, p_value = ttest_ind(df_artifact["Voting outliers (from 5)"], df_no_artifact["Voting outliers (from 5)"], equal_var=False)
- print(f"Welch's t-test p-value: {p_value}")
- output_path = r"C:\Users\aswen\Desktop\Code\AIDAqc_Figures\figures\pythonCreated\VotingProof"
- plt.tight_layout()
- # Save as SVG
- plt.savefig(output_path + ".svg", format='svg')
- plt.show()
|