|
@@ -13,7 +13,7 @@ import warnings
|
|
|
warnings.simplefilter(action='ignore')
|
|
|
|
|
|
# Define the PlotBoxplot function
|
|
|
-def PlotBoxplot(filtered_df, qq, mm, output_folder):
|
|
|
+def PlotBoxplot(filtered_df, qq, mm,dd, output_folder):
|
|
|
"""
|
|
|
Plot boxplots for filtered dataframe and save as images.
|
|
|
|
|
@@ -41,7 +41,7 @@ def PlotBoxplot(filtered_df, qq, mm, output_folder):
|
|
|
plt.legend(title=None, frameon=False)
|
|
|
|
|
|
# Save the plot as an image
|
|
|
- output_path = os.path.join(output_folder, f'{qq}_{mm}_mixedlm_significant.png')
|
|
|
+ output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm_significant.png')
|
|
|
plt.savefig(output_path, dpi=100)
|
|
|
|
|
|
# Close the plot to avoid displaying it
|
|
@@ -66,7 +66,7 @@ df_qc_5 = df_qc[(df_qc["Voting outliers (from 5)"]>3) & (df_qc["sequence_type"]=
|
|
|
|
|
|
|
|
|
# Filtering the dataframe based on conditions
|
|
|
-df_f0 = df[(df["dialation_amount"] == 3) & (~df["merged_timepoint"].isin([42, 56]))]
|
|
|
+df_f0 = df[(~df["merged_timepoint"].isin([42, 56]))]
|
|
|
# Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint"
|
|
|
merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True)
|
|
|
|
|
@@ -80,57 +80,59 @@ df_f1 = df_f1.dropna(axis=1, how="all")
|
|
|
results = []
|
|
|
|
|
|
# Iterate over unique values of "Qtype" and "mask_name"
|
|
|
-for qq in df["Qtype"].unique():
|
|
|
- for mm in df["mask_name"].unique():
|
|
|
- # Filter the dataframe based on current "Qtype" and "mask_name"
|
|
|
- filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm)]
|
|
|
-
|
|
|
- # Fit the mixed-effects model
|
|
|
- model_name = f"{qq}_{mm}"
|
|
|
- md = smf.mixedlm("Value ~ merged_timepoint", filtered_df, groups=filtered_df["Group"])
|
|
|
- mdf = md.fit(method=["lbfgs"])
|
|
|
-
|
|
|
- # Log file for each combination of qq and mm
|
|
|
- log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")
|
|
|
-
|
|
|
-
|
|
|
- # Check if the p-value is significant
|
|
|
- if mdf.pvalues['merged_timepoint'] < 0.05:
|
|
|
- with open(log_file, 'w') as log:
|
|
|
- # Print model name to log file
|
|
|
- log.write(f"Model Name: {model_name}\n")
|
|
|
-
|
|
|
- # Print the model summary to log file
|
|
|
- log.write(str(mdf.summary()) + '\n')
|
|
|
- # Plot and save boxplots for significant models
|
|
|
- PlotBoxplot(filtered_df, qq, mm, mixed_model_dir)
|
|
|
-
|
|
|
- # Perform Tukey's HSD test for post hoc analysis
|
|
|
- for gg in filtered_df["Group"].unique():
|
|
|
- filtered_df2 = filtered_df[filtered_df["Group"]==gg]
|
|
|
- mc = MultiComparison(filtered_df2["Value"], filtered_df2["merged_timepoint"])
|
|
|
- tukey_result = mc.tukeyhsd()
|
|
|
+for dd in df["dialation_amount"].unique():
|
|
|
+ for qq in df["Qtype"].unique():
|
|
|
+ for mm in df["mask_name"].unique():
|
|
|
+ # Filter the dataframe based on current "Qtype" and "mask_name"
|
|
|
+ filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm)]
|
|
|
+
|
|
|
+ # Fit the mixed-effects model
|
|
|
+ model_name = f"{qq}_{mm}_{dd}"
|
|
|
+ md = smf.mixedlm("Value ~ merged_timepoint", filtered_df, groups=filtered_df["Group"])
|
|
|
+ mdf = md.fit(method=["lbfgs"])
|
|
|
+
|
|
|
+ # Log file for each combination of qq and mm
|
|
|
+ log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")
|
|
|
+
|
|
|
+
|
|
|
+ # Check if the p-value is significant
|
|
|
+ if mdf.pvalues['merged_timepoint'] < 0.05:
|
|
|
+ with open(log_file, 'w') as log:
|
|
|
+ # Print model name to log file
|
|
|
+ log.write(f"Model Name: {model_name}\n")
|
|
|
|
|
|
- # Print the summary of the test to log file
|
|
|
- log.write(f"Posthoc Analysis for {gg}:\n")
|
|
|
- log.write(str(tukey_result.summary()) + '\n')
|
|
|
-
|
|
|
- # Perform the Sidak multiple comparisons for the group effect
|
|
|
- group_combinations = combinations(filtered_df["Group"].unique(), 2)
|
|
|
- log.write("Sidak Multiple Comparisons:\n")
|
|
|
- log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificantDifference\n")
|
|
|
- for group1, group2 in group_combinations:
|
|
|
- for time_point in filtered_df["merged_timepoint"].unique():
|
|
|
- group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
|
|
|
- group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
|
|
|
- sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
|
|
|
- if sidak_result[0]:
|
|
|
- log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
|
|
|
- else:
|
|
|
- log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
|
|
|
-
|
|
|
- # Append the results to the list
|
|
|
- results.append({'Qtype': qq, 'mask_name': mm, 'p_value': mdf.pvalues['merged_timepoint']})
|
|
|
+ # Print the model summary to log file
|
|
|
+ log.write(str(mdf.summary()) + '\n')
|
|
|
+ # Plot and save boxplots for significant models
|
|
|
+ PlotBoxplot(filtered_df, qq, mm,dd ,mixed_model_dir)
|
|
|
+
|
|
|
+ # Perform Tukey's HSD test for post hoc analysis
|
|
|
+ for gg in filtered_df["Group"].unique():
|
|
|
+
|
|
|
+ filtered_df2 = filtered_df[filtered_df["Group"]==gg]
|
|
|
+ mc = MultiComparison(filtered_df2["Value"], filtered_df2["merged_timepoint"])
|
|
|
+ tukey_result = mc.tukeyhsd()
|
|
|
+
|
|
|
+ # Print the summary of the test to log file
|
|
|
+ log.write(f"Posthoc Analysis for {gg}:\n")
|
|
|
+ log.write(str(tukey_result.summary()) + '\n')
|
|
|
+
|
|
|
+ # Perform the Sidak multiple comparisons for the group effect
|
|
|
+ group_combinations = combinations(filtered_df["Group"].unique(), 2)
|
|
|
+ log.write("Sidak Multiple Comparisons:\n")
|
|
|
+ log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificantDifference\n")
|
|
|
+ for group1, group2 in group_combinations:
|
|
|
+ for time_point in filtered_df["merged_timepoint"].unique():
|
|
|
+ group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
|
|
|
+ group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
|
|
|
+ sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
|
|
|
+ if sidak_result[0]:
|
|
|
+ log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
|
|
|
+ else:
|
|
|
+ log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
|
|
|
+
|
|
|
+ # Append the results to the list
|
|
|
+ results.append({'Qtype': qq, 'mask_name': mm,'dialation': dd,'p_value': mdf.pvalues['merged_timepoint']})
|
|
|
|
|
|
# Create a DataFrame from results
|
|
|
results_df = pd.DataFrame(results)
|