123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- import os
- import pandas as pd
- import seaborn as sns
- import matplotlib.pyplot as plt
- import statsmodels.formula.api as smf
- from statsmodels.stats.multicomp import MultiComparison
- from itertools import combinations
- import statsmodels
- import scipy
- # Set up warning suppression for convergence
- import warnings
- warnings.simplefilter(action='ignore')
- # Define the PlotBoxplot function
- def PlotBoxplot(filtered_df, qq, mm, output_folder):
- """
- Plot boxplots for filtered dataframe and save as images.
- Parameters:
- filtered_df (DataFrame): Filtered dataframe based on "Qtype" and "mask_name".
- qq (str): Qtype value.
- mm (str): Mask_name value.
- output_folder (str): Path to the output folder to save images.
- """
- # Set the color palette to Set2
- sns.set_palette("Set2")
-
- # Set the figure size to 18 cm in width
- plt.figure(figsize=(18/2.54, 4)) # Convert 18 cm to inches
-
- # Create a boxplot
- sns.boxplot(data=filtered_df, x="merged_timepoint", y="Value", hue="Group", fliersize=3 ,flierprops={"marker": "o"})
-
- # Set title and labels
- plt.title(f'{mm}')
- plt.xlabel('Timepoint')
- plt.ylabel(qq)
-
- # Show legend without title and frame
- plt.legend(title=None, frameon=False)
-
- # Save the plot as an image
- output_path = os.path.join(output_folder, f'{qq}_{mm}_mixedlm_significant.png')
- plt.savefig(output_path, dpi=100)
-
- # Close the plot to avoid displaying it
- plt.close()
- # Get the directory where the code file is located
- code_dir = os.path.dirname(os.path.abspath(__file__))
- # Get the parent directory of the code directory
- parent_dir = os.path.dirname(code_dir)
- # Create a new folder for mixed model analysis
- mixed_model_dir = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'mixed_model_analysis')
- os.makedirs(mixed_model_dir, exist_ok=True)
- # Step 4: Save the resulting dataframe to a CSV file
- input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing.csv')
- df = pd.read_csv(input_file_path)
- qc_csv = os.path.join(parent_dir,"input","AIDAqc_ouptut_for_data","Voting_remapped.csv")
- df_qc = pd.read_csv(qc_csv)
- df_qc_5 = df_qc[(df_qc["Voting outliers (from 5)"]>4) & (df_qc["sequence_type"]=="diff")]
- # Filtering the dataframe based on conditions
- df_f0 = df[(df["dialation_amount"] == 2) & (~df["merged_timepoint"].isin([42, 56]))]
- # Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint"
- merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True)
- # Drop the entries that are present in both df_f1 and df_qc_5
- df_f1 = merged_df[merged_df["_merge"] == "left_only"].drop(columns=["_merge"])
- # Drop unnecessary columns with NaN values
- df_f1 = df_f1.dropna(axis=1, how="all")
- # Create lists to store results and terminal output
- results = []
- # Iterate over unique values of "Qtype" and "mask_name"
- for qq in df["Qtype"].unique():
- for mm in df["mask_name"].unique():
- # Filter the dataframe based on current "Qtype" and "mask_name"
- filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm)]
-
- # Fit the mixed-effects model
- model_name = f"{qq}_{mm}"
- md = smf.mixedlm("Value ~ merged_timepoint", filtered_df, groups=filtered_df["Group"])
- mdf = md.fit(method=["lbfgs"])
-
- # Log file for each combination of qq and mm
- log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")
-
-
- # Check if the p-value is significant
- if mdf.pvalues['merged_timepoint'] < 0.05:
- with open(log_file, 'w') as log:
- # Print model name to log file
- log.write(f"Model Name: {model_name}\n")
-
- # Print the model summary to log file
- log.write(str(mdf.summary()) + '\n')
- # Plot and save boxplots for significant models
- PlotBoxplot(filtered_df, qq, mm, mixed_model_dir)
-
- # Perform Tukey's HSD test for post hoc analysis
- for gg in filtered_df["Group"].unique():
- filtered_df2 = filtered_df[filtered_df["Group"]==gg]
- mc = MultiComparison(filtered_df2["Value"], filtered_df2["merged_timepoint"])
- tukey_result = mc.tukeyhsd()
-
- # Print the summary of the test to log file
- log.write(f"Posthoc Analysis for {gg}:\n")
- log.write(str(tukey_result.summary()) + '\n')
-
- # Perform the Sidak multiple comparisons for the group effect
- group_combinations = combinations(filtered_df["Group"].unique(), 2)
- log.write("Sidak Multiple Comparisons:\n")
- log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificantDifference\n")
- for group1, group2 in group_combinations:
- for time_point in filtered_df["merged_timepoint"].unique():
- group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
- group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
- sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
- if sidak_result[0]:
- log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
- else:
- log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
-
- # Append the results to the list
- results.append({'Qtype': qq, 'mask_name': mm, 'p_value': mdf.pvalues['merged_timepoint']})
- # Create a DataFrame from results
- results_df = pd.DataFrame(results)
- # Save results_df as CSV in mixed_model_analysis folder
- output_file_path = os.path.join(mixed_model_dir, 'mixed_model_results.csv')
- results_df.to_csv(output_file_path, index=False)
- # Print the table
- print(results_df)
|