123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- import os
- import pandas as pd
- import seaborn as sns
- import matplotlib.pyplot as plt
- import statsmodels.formula.api as smf
- from statsmodels.stats.multicomp import MultiComparison
- from itertools import combinations
- import statsmodels
- import scipy
- from scipy.stats import dunnett
- # Set up warning suppression for convergence
- import warnings
- warnings.simplefilter(action='ignore')
- # Define the PlotBoxplot function
- def PlotBoxplot(filtered_df, qq, mm,dd, output_folder,sig):
- """
- Plot boxplots for filtered dataframe and save as images.
- Parameters:
- filtered_df (DataFrame): Filtered dataframe based on "Qtype" and "mask_name".
- qq (str): Qtype value.
- mm (str): Mask_name value.
- output_folder (str): Path to the output folder to save images.
- """
- # Set the color palette to Set2
- sns.set_palette("grey")
-
- # Set the figure size to 18 cm in width
- plt.figure(figsize=(18/2.54, 4)) # Convert 18 cm to inches
-
- # Create a boxplot
- sns.boxplot(data=filtered_df, x="Group", y="Value", hue="merged_timepoint",
- fliersize=3 ,flierprops={"marker": "o"})
-
- # Set title and labels
- plt.title(f'{qq}_{mm}_{dd}')
- plt.xlabel('Timepoint')
- plt.ylabel(qq+"(n.a)")
-
- # Show legend without title and frame
- plt.legend(title=None, frameon=False,bbox_to_anchor=(1.05, 1), loc='best')
- plt.tight_layout()
- # Save the plot as an image
- if sig:
- output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm_significant.png')
- else:
- output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm.png')
- plt.savefig(output_path, dpi=100)
-
- # Close the plot to avoid displaying it
- plt.close()
- # Get the directory where the code file is located
- code_dir = os.path.dirname(os.path.abspath(__file__))
- # Get the parent directory of the code directory
- parent_dir = os.path.dirname(code_dir)
- # Create a new folder for mixed model analysis
- mixed_model_dir = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'mixed_model_analysis_dunnett')
- os.makedirs(mixed_model_dir, exist_ok=True)
- # Step 4: Save the resulting dataframe to a CSV file
- input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing.csv')
- df = pd.read_csv(input_file_path)
- qc_csv = os.path.join(parent_dir,"input","AIDAqc_ouptut_for_data","Voting_remapped.csv")
- df_qc = pd.read_csv(qc_csv)
- df_qc_5 = df_qc[(df_qc["Voting outliers (from 5)"]>3) & (df_qc["sequence_type"]=="diff")]
- # Filtering the dataframe based on conditions
- df_f0 = df[~df["merged_timepoint"].isin([42, 56])]
- # Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint"
- merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True)
- # Drop the entries that are present in both df_f1 and df_qc_5
- df_f1 = merged_df[merged_df["_merge"] == "left_only"].drop(columns=["_merge"])
- # Drop unnecessary columns with NaN values
- df_f1 = df_f1.dropna(axis=1, how="all")
- # Create lists to store results and terminal output
- results = []
- # Iterate over unique values of "Qtype" and "mask_name"
- for dd in df["dialation_amount"].unique():
- for qq in df["Qtype"].unique():
- for mm in df["mask_name"].unique():
- # Filter the dataframe based on current "Qtype" and "mask_name"
- filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm) & (df_f1["dialation_amount"] == dd)]
-
- # Fit the mixed-effects model
- model_name = f"{qq}_{mm}_{dd}"
- md = smf.mixedlm("Value ~ merged_timepoint", filtered_df, groups=filtered_df["Group"])
- mdf = md.fit(method=["lbfgs"])
-
- # Log file for each combination of qq and mm
-
-
- # Check if the p-value is significant
- if mdf.pvalues['merged_timepoint'] < 0.05:
- log_file = os.path.join(mixed_model_dir, f"{model_name}_log_significant.txt")
- sig = True
- else:
- log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")
- sig = False
-
- with open(log_file, 'w') as log:
- # Print model name to log file
- log.write(f"Model Name: {model_name}\n")
-
- # Print the model summary to log file
- log.write(str(mdf.summary()) + '\n')
- # Plot and save boxplots for significant models
- PlotBoxplot(filtered_df, qq, mm,dd ,mixed_model_dir,sig)
-
- # Perform Dunnet test for post hoc analysis
- log.write("Dunnett Multiple Comparisons for time effect:\n")
- log.write("Group\tTP1\tTP2\tPValue\tSignificant\n")
- for gg in filtered_df["Group"].unique():
- control_data = filtered_df[(filtered_df["Group"]==gg) & (filtered_df["merged_timepoint"]==0)]["Value"]
- for tt in filtered_df["merged_timepoint"].unique():
- treatment_data = filtered_df[(filtered_df["Group"]==gg) & (filtered_df["merged_timepoint"]==tt)]["Value"]
- result = dunnett(treatment_data, control=control_data)
- if result.pvalue < 0.05: # You can adjust the significance level as needed
- significant = "Yes"
- else:
- significant = "No"
-
- # Write the results to the log file
- log.write(f"{gg}\t0\t{tt}\t{result.pvalue}\t{significant}\n")
-
- # Perform the Sidak multiple comparisons for the group effect
- group_combinations = combinations(filtered_df["Group"].unique(), 2)
- log.write("Sidak Multiple Comparisons for group effect:\n")
- log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificant\n")
- for group1, group2 in group_combinations:
- for time_point in filtered_df["merged_timepoint"].unique():
- group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
- group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
- sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
- if sidak_result[0]:
- log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
- else:
- log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
-
- # Append the results to the list
- results.append({'Qtype': qq, 'mask_name': mm,'dialation': dd,'p_value': mdf.pvalues['merged_timepoint']})
- # Create a DataFrame from results
- results_df = pd.DataFrame(results)
- # Save results_df as CSV in mixed_model_analysis folder
- output_file_path = os.path.join(mixed_model_dir, 'mixed_model_results.csv')
- results_df.to_csv(output_file_path, index=False)
- # Print the table
- print(results_df)
|