Aswendt_Lab
/
2023_Kalantari_AIDAqc


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
							import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind

# Initial path
init_path = r"C:\Users\aswen\Desktop\TestingData\Aswendt_qc_rsfmri_plot"

# Define the search paths for noisy files and all files
searchpath = os.path.join(init_path, "QCs*", "votings.csv")

# List of all votings.csv files
file_list = glob.glob(searchpath)

# Create an empty DataFrame to append all data
df_all = pd.DataFrame()

# Read each CSV file and append to df_all
for file_path in file_list:
    df = pd.read_csv(file_path)
    df_all = pd.concat([df_all, df], ignore_index=True)


# Remove duplicates from df_all
df_all.drop_duplicates(inplace=True)

# Extract paths without artifacts
df_all["path_check"] = df_all["Pathes"].str.replace("_strong_artifact.nii.gz", ".gz", regex=False)

# Identify rows with artifacts
df_artifact = df_all[df_all["Pathes"].str.contains("artifact")]

# Initialize an empty DataFrame for non-artifact paths
df_no_artifact = pd.DataFrame(columns=df_all.columns)

# Process each unique path_check in artifact DataFrame
for pp in df_artifact["path_check"].unique():
    # Check if there are matching non-artifact paths in df_all
    matching_non_artifact = df_all[df_all["Pathes"] == pp]
    if not matching_non_artifact.empty:
        df_no_artifact = pd.concat([df_no_artifact, matching_non_artifact])
    else:
        # If no matching non-artifact path, add a row with Voting outliers set to 0
        new_row = pd.DataFrame({"Pathes": [pp], "Voting outliers (from 5)": [0]})
        df_no_artifact = pd.concat([df_no_artifact, new_row], ignore_index=True)

# Prepare the data for seaborn boxplot
df_artifact["Artifact"] = "artifact induced"
df_no_artifact["Artifact"] = "original"
plot_data = pd.concat([df_no_artifact, df_artifact])

# Set font to Times New Roman
plt.rcParams["font.family"] = "Times New Roman"
cm = 1 / 2.54
# Create the seaborn boxplot with customized settings
plt.figure(figsize=(4.5 * cm, 3 * cm), dpi=300)
sns.boxplot(x="Artifact", y="Voting outliers (from 5)", data=plot_data, palette="Set2", linewidth=1, width=0.5, showfliers=False)

# Add points representing each data point within each box
sns.stripplot(x="Artifact", y="Voting outliers (from 5)", data=plot_data, jitter=True, color="black", alpha=0.5, size=2)

# Calculate averages for "Original" and "Artifact Induced" groups
avg_original = plot_data[plot_data["Artifact"] == "original"]["Voting outliers (from 5)"].mean()
avg_artifact_induced = plot_data[plot_data["Artifact"] == "artifact induced"]["Voting outliers (from 5)"].mean()

# Connect the averages with a line plot
plt.plot(["original", "artifact induced"], [avg_original, avg_artifact_induced], color="grey", linestyle="--", linewidth=0.4)

plt.ylabel("Majority Votes", fontsize=8)  # Set ylabel to "Majority Votes"
plt.xticks(fontsize=8)  # Set xtick font size
plt.yticks(fontsize=8)  # Set ytick font size

plt.xlabel(None)  # Remove x-label
plt.ylim([-1, 7])
# Perform Welch's t-test between the two groups
statistic, p_value = ttest_ind(df_artifact["Voting outliers (from 5)"], df_no_artifact["Voting outliers (from 5)"], equal_var=False)
print(f"Welch's t-test p-value: {p_value}")
output_path = r"C:\Users\aswen\Desktop\Code\AIDAqc_Figures\figures\pythonCreated\VotingProof"
plt.tight_layout()
# Save as SVG
plt.savefig(output_path + ".svg", format='svg')
plt.show()