Aswendt_Lab
/
2024_Ruthe_SND


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
							import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from statsmodels.stats.multicomp import MultiComparison
from itertools import combinations
import statsmodels
import scipy
from scipy.stats import dunnett

# Set up warning suppression for convergence
import warnings
warnings.simplefilter(action='ignore')

# Define the PlotBoxplot function
def PlotBoxplot(filtered_df, qq, mm,dd, output_folder,sig):
    """
    Plot boxplots for filtered dataframe and save as images.

    Parameters:
    filtered_df (DataFrame): Filtered dataframe based on "Qtype" and "mask_name".
    qq (str): Qtype value.
    mm (str): Mask_name value.
    output_folder (str): Path to the output folder to save images.
    """
    # Set the color palette to Set2
    sns.set_palette("grey")
    
    # Set the figure size to 18 cm in width
    plt.figure(figsize=(18/2.54, 4))  # Convert 18 cm to inches
    
    # Create a boxplot
    sns.boxplot(data=filtered_df, x="Group", y="Value", hue="merged_timepoint",
                fliersize=3 ,flierprops={"marker": "o"})
    
    # Set title and labels
    plt.title(f'{qq}_{mm}_{dd}')
    plt.xlabel('Timepoint')
    plt.ylabel(qq+"(n.a)")
    
    # Show legend without title and frame
    plt.legend(title=None, frameon=False,bbox_to_anchor=(1.05, 1), loc='best')
    plt.tight_layout()
    # Save the plot as an image
    if sig:
        output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm_significant.png')
    else:
        output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm.png')
    plt.savefig(output_path, dpi=100)
    
    # Close the plot to avoid displaying it
    plt.close()

# Get the directory where the code file is located
code_dir = os.path.dirname(os.path.abspath(__file__))

# Get the parent directory of the code directory
parent_dir = os.path.dirname(code_dir)

# Create a new folder for mixed model analysis
mixed_model_dir = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'mixed_model_analysis_dunnett')
os.makedirs(mixed_model_dir, exist_ok=True)

# Step 4: Save the resulting dataframe to a CSV file
input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing.csv')
df = pd.read_csv(input_file_path)
qc_csv = os.path.join(parent_dir,"input","AIDAqc_ouptut_for_data","Voting_remapped.csv")
df_qc = pd.read_csv(qc_csv)
df_qc_5 = df_qc[(df_qc["Voting outliers (from 5)"]>3) & (df_qc["sequence_type"]=="diff")]


# Filtering the dataframe based on conditions
df_f0 = df[~df["merged_timepoint"].isin([42, 56])]
# Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint"
merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True)

# Drop the entries that are present in both df_f1 and df_qc_5
df_f1 = merged_df[merged_df["_merge"] == "left_only"].drop(columns=["_merge"])
# Drop unnecessary columns with NaN values
df_f1 = df_f1.dropna(axis=1, how="all")


# Create lists to store results and terminal output
results = []

# Iterate over unique values of "Qtype" and "mask_name"
for dd in df["dialation_amount"].unique():
    for qq in df["Qtype"].unique():
        for mm in df["mask_name"].unique():
            # Filter the dataframe based on current "Qtype" and "mask_name"
            filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm) & (df_f1["dialation_amount"] == dd)]
            
            # Fit the mixed-effects model
            model_name = f"{qq}_{mm}_{dd}"
            md = smf.mixedlm("Value ~ merged_timepoint", filtered_df, groups=filtered_df["Group"])
            mdf = md.fit(method=["lbfgs"])
            
            # Log file for each combination of qq and mm
            
            
            # Check if the p-value is significant
            if mdf.pvalues['merged_timepoint'] < 0.05:
                log_file = os.path.join(mixed_model_dir, f"{model_name}_log_significant.txt")
                sig = True
            else:
                log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")  
                sig = False
                
            with open(log_file, 'w') as log:
                # Print model name to log file
                log.write(f"Model Name: {model_name}\n")
                
                # Print the model summary to log file
                log.write(str(mdf.summary()) + '\n')
                # Plot and save boxplots for significant models
                PlotBoxplot(filtered_df, qq, mm,dd ,mixed_model_dir,sig)
                
                # Perform Dunnet test for post hoc analysis
                log.write("Dunnett Multiple Comparisons for time effect:\n")
                log.write("Group\tTP1\tTP2\tPValue\tSignificant\n")
                for gg in filtered_df["Group"].unique():
                    control_data = filtered_df[(filtered_df["Group"]==gg) & (filtered_df["merged_timepoint"]==0)]["Value"]
                    for tt in filtered_df["merged_timepoint"].unique():
                        treatment_data = filtered_df[(filtered_df["Group"]==gg) & (filtered_df["merged_timepoint"]==tt)]["Value"]
                        result = dunnett(treatment_data, control=control_data)
                        if result.pvalue < 0.05:  # You can adjust the significance level as needed
                            significant = "Yes"
                        else:
                            significant = "No"
                        
                        # Write the results to the log file
                        log.write(f"{gg}\t0\t{tt}\t{result.pvalue}\t{significant}\n")
                                    
                # Perform the Sidak multiple comparisons for the group effect
                group_combinations = combinations(filtered_df["Group"].unique(), 2)
                log.write("Sidak Multiple Comparisons for group effect:\n")
                log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificant\n")
                for group1, group2 in group_combinations:
                    for time_point in filtered_df["merged_timepoint"].unique():
                        group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
                        group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
                        sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
                        if sidak_result[0]:
                            log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
                        else:
                            log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
        
            # Append the results to the list
            results.append({'Qtype': qq, 'mask_name': mm,'dialation': dd,'p_value': mdf.pvalues['merged_timepoint']})

# Create a DataFrame from results
results_df = pd.DataFrame(results)

# Save results_df as CSV in mixed_model_analysis folder
output_file_path = os.path.join(mixed_model_dir, 'mixed_model_results.csv')
results_df.to_csv(output_file_path, index=False)

# Print the table
print(results_df)