Aswendt_Lab
/
2024_Ruthe_SND


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
							import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from statsmodels.stats.multicomp import MultiComparison
from itertools import combinations
import statsmodels
import scipy

# Set up warning suppression for convergence
import warnings
warnings.simplefilter(action='ignore')

# Define the PlotBoxplot function
def PlotBoxplot(filtered_df, qq, mm, output_folder):
    """
    Plot boxplots for filtered dataframe and save as images.

    Parameters:
    filtered_df (DataFrame): Filtered dataframe based on "Qtype" and "mask_name".
    qq (str): Qtype value.
    mm (str): Mask_name value.
    output_folder (str): Path to the output folder to save images.
    """
    # Set the color palette to Set2
    sns.set_palette("Set2")
    
    # Set the figure size to 18 cm in width
    plt.figure(figsize=(18/2.54, 4))  # Convert 18 cm to inches
    
    # Create a boxplot
    sns.boxplot(data=filtered_df, x="merged_timepoint", y="Value", hue="Group", fliersize=3 ,flierprops={"marker": "o"})
    
    # Set title and labels
    plt.title(f'{mm}')
    plt.xlabel('Timepoint')
    plt.ylabel(qq)
    
    # Show legend without title and frame
    plt.legend(title=None, frameon=False)
    
    # Save the plot as an image
    output_path = os.path.join(output_folder, f'{qq}_{mm}_mixedlm_significant.png')
    plt.savefig(output_path, dpi=100)
    
    # Close the plot to avoid displaying it
    plt.close()

# Get the directory where the code file is located
code_dir = os.path.dirname(os.path.abspath(__file__))

# Get the parent directory of the code directory
parent_dir = os.path.dirname(code_dir)

# Create a new folder for mixed model analysis
mixed_model_dir = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'mixed_model_analysis')
os.makedirs(mixed_model_dir, exist_ok=True)

# Step 4: Save the resulting dataframe to a CSV file
input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing.csv')
df = pd.read_csv(input_file_path)
qc_csv = os.path.join(parent_dir,"input","AIDAqc_ouptut_for_data","Voting_remapped.csv")
df_qc = pd.read_csv(qc_csv)
df_qc_5 = df_qc[(df_qc["Voting outliers (from 5)"]>4) & (df_qc["sequence_type"]=="diff")]


# Filtering the dataframe based on conditions
df_f0 = df[(df["dialation_amount"] == 2) & (~df["merged_timepoint"].isin([42, 56]))]
# Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint"
merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True)

# Drop the entries that are present in both df_f1 and df_qc_5
df_f1 = merged_df[merged_df["_merge"] == "left_only"].drop(columns=["_merge"])
# Drop unnecessary columns with NaN values
df_f1 = df_f1.dropna(axis=1, how="all")


# Create lists to store results and terminal output
results = []

# Iterate over unique values of "Qtype" and "mask_name"
for qq in df["Qtype"].unique():
    for mm in df["mask_name"].unique():
        # Filter the dataframe based on current "Qtype" and "mask_name"
        filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm)]
        
        # Fit the mixed-effects model
        model_name = f"{qq}_{mm}"
        md = smf.mixedlm("Value ~ merged_timepoint", filtered_df, groups=filtered_df["Group"])
        mdf = md.fit(method=["lbfgs"])
        
        # Log file for each combination of qq and mm
        log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")
       
        
        # Check if the p-value is significant
        if mdf.pvalues['merged_timepoint'] < 0.05:
            with open(log_file, 'w') as log:
                # Print model name to log file
                log.write(f"Model Name: {model_name}\n")
                
                # Print the model summary to log file
                log.write(str(mdf.summary()) + '\n')
                # Plot and save boxplots for significant models
                PlotBoxplot(filtered_df, qq, mm, mixed_model_dir)
                
                # Perform Tukey's HSD test for post hoc analysis
                for gg in filtered_df["Group"].unique():
                    filtered_df2 = filtered_df[filtered_df["Group"]==gg]
                    mc = MultiComparison(filtered_df2["Value"], filtered_df2["merged_timepoint"])
                    tukey_result = mc.tukeyhsd()
                    
                    # Print the summary of the test to log file
                    log.write(f"Posthoc Analysis for {gg}:\n")
                    log.write(str(tukey_result.summary()) + '\n')
                
                # Perform the Sidak multiple comparisons for the group effect
                group_combinations = combinations(filtered_df["Group"].unique(), 2)
                log.write("Sidak Multiple Comparisons:\n")
                log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificantDifference\n")
                for group1, group2 in group_combinations:
                    for time_point in filtered_df["merged_timepoint"].unique():
                        group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
                        group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
                        sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
                        if sidak_result[0]:
                            log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
                        else:
                            log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
        
        # Append the results to the list
        results.append({'Qtype': qq, 'mask_name': mm, 'p_value': mdf.pvalues['merged_timepoint']})

# Create a DataFrame from results
results_df = pd.DataFrame(results)

# Save results_df as CSV in mixed_model_analysis folder
output_file_path = os.path.join(mixed_model_dir, 'mixed_model_results.csv')
results_df.to_csv(output_file_path, index=False)

# Print the table
print(results_df)