import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from statsmodels.stats.multicomp import MultiComparison
from itertools import combinations
import statsmodels
import scipy

# Set up warning suppression for convergence
import warnings
warnings.simplefilter(action='ignore')

# Define the PlotBoxplot function
def PlotBoxplot(filtered_df, qq, mm, output_folder):
    """
    Plot boxplots for filtered dataframe and save as images.

    Parameters:
    filtered_df (DataFrame): Filtered dataframe based on "Qtype" and "mask_name".
    qq (str): Qtype value.
    mm (str): Mask_name value.
    output_folder (str): Path to the output folder to save images.
    """
    # Set the color palette to Set2
    sns.set_palette("Set2")
    
    # Set the figure size to 18 cm in width
    plt.figure(figsize=(18/2.54, 4))  # Convert 18 cm to inches
    
    # Create a boxplot
    sns.boxplot(data=filtered_df, x="merged_timepoint", y="Value", hue="Group", fliersize=3 ,flierprops={"marker": "o"})
    
    # Set title and labels
    plt.title(f'{mm}')
    plt.xlabel('Timepoint')
    plt.ylabel(qq)
    
    # Show legend without title and frame
    plt.legend(title=None, frameon=False)
    
    # Save the plot as an image
    output_path = os.path.join(output_folder, f'{qq}_{mm}_mixedlm_significant.png')
    plt.savefig(output_path, dpi=100)
    
    # Close the plot to avoid displaying it
    plt.close()

# Get the directory where the code file is located
code_dir = os.path.dirname(os.path.abspath(__file__))

# Get the parent directory of the code directory
parent_dir = os.path.dirname(code_dir)

# Create a new folder for mixed model analysis
mixed_model_dir = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'mixed_model_analysis')
os.makedirs(mixed_model_dir, exist_ok=True)

# Step 4: Save the resulting dataframe to a CSV file
input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing.csv')
df = pd.read_csv(input_file_path)
qc_csv = os.path.join(parent_dir,"input","AIDAqc_ouptut_for_data","Voting_remapped.csv")
df_qc = pd.read_csv(qc_csv)
df_qc_5 = df_qc[(df_qc["Voting outliers (from 5)"]>3) & (df_qc["sequence_type"]=="diff")]


# Filtering the dataframe based on conditions
df_f0 = df[(df["dialation_amount"] == 3) & (~df["merged_timepoint"].isin([42, 56]))]
# Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint"
merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True)

# Drop the entries that are present in both df_f1 and df_qc_5
df_f1 = merged_df[merged_df["_merge"] == "left_only"].drop(columns=["_merge"])
# Drop unnecessary columns with NaN values
df_f1 = df_f1.dropna(axis=1, how="all")


# Create lists to store results and terminal output
results = []

# Iterate over unique values of "Qtype" and "mask_name"
for qq in df["Qtype"].unique():
    for mm in df["mask_name"].unique():
        # Filter the dataframe based on current "Qtype" and "mask_name"
        filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm)]
        
        # Fit the mixed-effects model
        model_name = f"{qq}_{mm}"
        md = smf.mixedlm("Value ~ merged_timepoint", filtered_df, groups=filtered_df["Group"])
        mdf = md.fit(method=["lbfgs"])
        
        # Log file for each combination of qq and mm
        log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")
       
        
        # Check if the p-value is significant
        if mdf.pvalues['merged_timepoint'] < 0.05:
            with open(log_file, 'w') as log:
                # Print model name to log file
                log.write(f"Model Name: {model_name}\n")
                
                # Print the model summary to log file
                log.write(str(mdf.summary()) + '\n')
                # Plot and save boxplots for significant models
                PlotBoxplot(filtered_df, qq, mm, mixed_model_dir)
                
                # Perform Tukey's HSD test for post hoc analysis
                for gg in filtered_df["Group"].unique():
                    filtered_df2 = filtered_df[filtered_df["Group"]==gg]
                    mc = MultiComparison(filtered_df2["Value"], filtered_df2["merged_timepoint"])
                    tukey_result = mc.tukeyhsd()
                    
                    # Print the summary of the test to log file
                    log.write(f"Posthoc Analysis for {gg}:\n")
                    log.write(str(tukey_result.summary()) + '\n')
                
                # Perform the Sidak multiple comparisons for the group effect
                group_combinations = combinations(filtered_df["Group"].unique(), 2)
                log.write("Sidak Multiple Comparisons:\n")
                log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificantDifference\n")
                for group1, group2 in group_combinations:
                    for time_point in filtered_df["merged_timepoint"].unique():
                        group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
                        group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
                        sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
                        if sidak_result[0]:
                            log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
                        else:
                            log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
        
        # Append the results to the list
        results.append({'Qtype': qq, 'mask_name': mm, 'p_value': mdf.pvalues['merged_timepoint']})

# Create a DataFrame from results
results_df = pd.DataFrame(results)

# Save results_df as CSV in mixed_model_analysis folder
output_file_path = os.path.join(mixed_model_dir, 'mixed_model_results.csv')
results_df.to_csv(output_file_path, index=False)

# Print the table
print(results_df)