plotting_quantitative_dti_values.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import os
  2. import pandas as pd
  3. import seaborn as sns
  4. import matplotlib.pyplot as plt
  5. import statsmodels.formula.api as smf
  6. from statsmodels.stats.multicomp import MultiComparison
  7. from itertools import combinations
  8. import statsmodels
  9. import scipy
  10. # Set up warning suppression for convergence
  11. import warnings
  12. warnings.simplefilter(action='ignore')
  13. # Define the PlotBoxplot function
  14. def PlotBoxplot(filtered_df, qq, mm, output_folder):
  15. """
  16. Plot boxplots for filtered dataframe and save as images.
  17. Parameters:
  18. filtered_df (DataFrame): Filtered dataframe based on "Qtype" and "mask_name".
  19. qq (str): Qtype value.
  20. mm (str): Mask_name value.
  21. output_folder (str): Path to the output folder to save images.
  22. """
  23. # Set the color palette to Set2
  24. sns.set_palette("Set2")
  25. # Set the figure size to 18 cm in width
  26. plt.figure(figsize=(18/2.54, 4)) # Convert 18 cm to inches
  27. # Create a boxplot
  28. sns.boxplot(data=filtered_df, x="merged_timepoint", y="Value", hue="Group", fliersize=3 ,flierprops={"marker": "o"})
  29. # Set title and labels
  30. plt.title(f'{mm}')
  31. plt.xlabel('Timepoint')
  32. plt.ylabel(qq)
  33. # Show legend without title and frame
  34. plt.legend(title=None, frameon=False)
  35. # Save the plot as an image
  36. output_path = os.path.join(output_folder, f'{qq}_{mm}_mixedlm_significant.png')
  37. plt.savefig(output_path, dpi=100)
  38. # Close the plot to avoid displaying it
  39. plt.close()
  40. # Get the directory where the code file is located
  41. code_dir = os.path.dirname(os.path.abspath(__file__))
  42. # Get the parent directory of the code directory
  43. parent_dir = os.path.dirname(code_dir)
  44. # Create a new folder for mixed model analysis
  45. mixed_model_dir = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'mixed_model_analysis')
  46. os.makedirs(mixed_model_dir, exist_ok=True)
  47. # Step 4: Save the resulting dataframe to a CSV file
  48. input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing.csv')
  49. df = pd.read_csv(input_file_path)
  50. qc_csv = os.path.join(parent_dir,"input","AIDAqc_ouptut_for_data","Voting_remapped.csv")
  51. df_qc = pd.read_csv(qc_csv)
  52. df_qc_5 = df_qc[(df_qc["Voting outliers (from 5)"]>3) & (df_qc["sequence_type"]=="diff")]
  53. # Filtering the dataframe based on conditions
  54. df_f0 = df[(df["dialation_amount"] == 3) & (~df["merged_timepoint"].isin([42, 56]))]
  55. # Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint"
  56. merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True)
  57. # Drop the entries that are present in both df_f1 and df_qc_5
  58. df_f1 = merged_df[merged_df["_merge"] == "left_only"].drop(columns=["_merge"])
  59. # Drop unnecessary columns with NaN values
  60. df_f1 = df_f1.dropna(axis=1, how="all")
  61. # Create lists to store results and terminal output
  62. results = []
  63. # Iterate over unique values of "Qtype" and "mask_name"
  64. for qq in df["Qtype"].unique():
  65. for mm in df["mask_name"].unique():
  66. # Filter the dataframe based on current "Qtype" and "mask_name"
  67. filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm)]
  68. # Fit the mixed-effects model
  69. model_name = f"{qq}_{mm}"
  70. md = smf.mixedlm("Value ~ merged_timepoint", filtered_df, groups=filtered_df["Group"])
  71. mdf = md.fit(method=["lbfgs"])
  72. # Log file for each combination of qq and mm
  73. log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")
  74. # Check if the p-value is significant
  75. if mdf.pvalues['merged_timepoint'] < 0.05:
  76. with open(log_file, 'w') as log:
  77. # Print model name to log file
  78. log.write(f"Model Name: {model_name}\n")
  79. # Print the model summary to log file
  80. log.write(str(mdf.summary()) + '\n')
  81. # Plot and save boxplots for significant models
  82. PlotBoxplot(filtered_df, qq, mm, mixed_model_dir)
  83. # Perform Tukey's HSD test for post hoc analysis
  84. for gg in filtered_df["Group"].unique():
  85. filtered_df2 = filtered_df[filtered_df["Group"]==gg]
  86. mc = MultiComparison(filtered_df2["Value"], filtered_df2["merged_timepoint"])
  87. tukey_result = mc.tukeyhsd()
  88. # Print the summary of the test to log file
  89. log.write(f"Posthoc Analysis for {gg}:\n")
  90. log.write(str(tukey_result.summary()) + '\n')
  91. # Perform the Sidak multiple comparisons for the group effect
  92. group_combinations = combinations(filtered_df["Group"].unique(), 2)
  93. log.write("Sidak Multiple Comparisons:\n")
  94. log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificantDifference\n")
  95. for group1, group2 in group_combinations:
  96. for time_point in filtered_df["merged_timepoint"].unique():
  97. group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
  98. group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
  99. sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
  100. if sidak_result[0]:
  101. log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
  102. else:
  103. log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
  104. # Append the results to the list
  105. results.append({'Qtype': qq, 'mask_name': mm, 'p_value': mdf.pvalues['merged_timepoint']})
  106. # Create a DataFrame from results
  107. results_df = pd.DataFrame(results)
  108. # Save results_df as CSV in mixed_model_analysis folder
  109. output_file_path = os.path.join(mixed_model_dir, 'mixed_model_results.csv')
  110. results_df.to_csv(output_file_path, index=False)
  111. # Print the table
  112. print(results_df)