import os import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import statsmodels.formula.api as smf from statsmodels.stats.multicomp import MultiComparison from itertools import combinations import statsmodels import scipy from scipy.stats import dunnett # Set up warning suppression for convergence import warnings warnings.simplefilter(action='ignore') # Define the PlotBoxplot function def PlotBoxplot(filtered_df, qq, mm,dd, output_folder,sig): """ Plot boxplots for filtered dataframe and save as images. Parameters: filtered_df (DataFrame): Filtered dataframe based on "Qtype" and "mask_name". qq (str): Qtype value. mm (str): Mask_name value. output_folder (str): Path to the output folder to save images. """ # Set the color palette to Set2 sns.set_palette("grey") # Set the figure size to 18 cm in width plt.figure(figsize=(18/2.54, 4)) # Convert 18 cm to inches # Create a boxplot sns.boxplot(data=filtered_df, x="Group", y="Value", hue="merged_timepoint", fliersize=3 ,flierprops={"marker": "o"}) # Set title and labels plt.title(f'{qq}_{mm}_{dd}') plt.xlabel('Timepoint') plt.ylabel(qq+"(n.a)") # Show legend without title and frame plt.legend(title=None, frameon=False,bbox_to_anchor=(1.05, 1), loc='best') plt.tight_layout() # Save the plot as an image if sig: output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm_significant.png') else: output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm.png') plt.savefig(output_path, dpi=100) # Close the plot to avoid displaying it plt.close() # Get the directory where the code file is located code_dir = os.path.dirname(os.path.abspath(__file__)) # Get the parent directory of the code directory parent_dir = os.path.dirname(code_dir) # Create a new folder for mixed model analysis mixed_model_dir = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'mixed_model_analysis_dunnett') os.makedirs(mixed_model_dir, exist_ok=True) # Step 4: Save the resulting dataframe to a CSV file input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing.csv') df = pd.read_csv(input_file_path) qc_csv = os.path.join(parent_dir,"input","AIDAqc_ouptut_for_data","Voting_remapped.csv") df_qc = pd.read_csv(qc_csv) df_qc_5 = df_qc[(df_qc["Voting outliers (from 5)"]>3) & (df_qc["sequence_type"]=="diff")] # Filtering the dataframe based on conditions df_f0 = df[~df["merged_timepoint"].isin([42, 56])] # Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint" merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True) # Drop the entries that are present in both df_f1 and df_qc_5 df_f1 = merged_df[merged_df["_merge"] == "left_only"].drop(columns=["_merge"]) # Drop unnecessary columns with NaN values df_f1 = df_f1.dropna(axis=1, how="all") # Create lists to store results and terminal output results = [] # Iterate over unique values of "Qtype" and "mask_name" for dd in df["dialation_amount"].unique(): for qq in df["Qtype"].unique(): for mm in df["mask_name"].unique(): # Filter the dataframe based on current "Qtype" and "mask_name" filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm) & (df_f1["dialation_amount"] == dd)] # Fit the mixed-effects model model_name = f"{qq}_{mm}_{dd}" md = smf.mixedlm("Value ~ merged_timepoint", filtered_df, groups=filtered_df["Group"]) mdf = md.fit(method=["lbfgs"]) # Log file for each combination of qq and mm # Check if the p-value is significant if mdf.pvalues['merged_timepoint'] < 0.05: log_file = os.path.join(mixed_model_dir, f"{model_name}_log_significant.txt") sig = True else: log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt") sig = False with open(log_file, 'w') as log: # Print model name to log file log.write(f"Model Name: {model_name}\n") # Print the model summary to log file log.write(str(mdf.summary()) + '\n') # Plot and save boxplots for significant models PlotBoxplot(filtered_df, qq, mm,dd ,mixed_model_dir,sig) # Perform Dunnet test for post hoc analysis log.write("Dunnett Multiple Comparisons for time effect:\n") log.write("Group\tTP1\tTP2\tPValue\tSignificant\n") for gg in filtered_df["Group"].unique(): control_data = filtered_df[(filtered_df["Group"]==gg) & (filtered_df["merged_timepoint"]==0)]["Value"] for tt in filtered_df["merged_timepoint"].unique(): treatment_data = filtered_df[(filtered_df["Group"]==gg) & (filtered_df["merged_timepoint"]==tt)]["Value"] result = dunnett(treatment_data, control=control_data) if result.pvalue < 0.05: # You can adjust the significance level as needed significant = "Yes" else: significant = "No" # Write the results to the log file log.write(f"{gg}\t0\t{tt}\t{result.pvalue}\t{significant}\n") # Perform the Sidak multiple comparisons for the group effect group_combinations = combinations(filtered_df["Group"].unique(), 2) log.write("Sidak Multiple Comparisons for group effect:\n") log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificant\n") for group1, group2 in group_combinations: for time_point in filtered_df["merged_timepoint"].unique(): group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"] group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"] sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak') if sidak_result[0]: log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n") else: log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n") # Append the results to the list results.append({'Qtype': qq, 'mask_name': mm,'dialation': dd,'p_value': mdf.pvalues['merged_timepoint']}) # Create a DataFrame from results results_df = pd.DataFrame(results) # Save results_df as CSV in mixed_model_analysis folder output_file_path = os.path.join(mixed_model_dir, 'mixed_model_results.csv') results_df.to_csv(output_file_path, index=False) # Print the table print(results_df)