|
@@ -13,7 +13,7 @@ import warnings
|
|
warnings.simplefilter(action='ignore')
|
|
warnings.simplefilter(action='ignore')
|
|
|
|
|
|
# Define the PlotBoxplot function
|
|
# Define the PlotBoxplot function
|
|
-def PlotBoxplot(filtered_df, qq, mm,dd, output_folder):
|
|
|
|
|
|
+def PlotBoxplot(filtered_df, qq, mm,dd, output_folder,sig):
|
|
"""
|
|
"""
|
|
Plot boxplots for filtered dataframe and save as images.
|
|
Plot boxplots for filtered dataframe and save as images.
|
|
|
|
|
|
@@ -24,24 +24,28 @@ def PlotBoxplot(filtered_df, qq, mm,dd, output_folder):
|
|
output_folder (str): Path to the output folder to save images.
|
|
output_folder (str): Path to the output folder to save images.
|
|
"""
|
|
"""
|
|
# Set the color palette to Set2
|
|
# Set the color palette to Set2
|
|
- sns.set_palette("Set2")
|
|
|
|
|
|
+ sns.set_palette("grey")
|
|
|
|
|
|
# Set the figure size to 18 cm in width
|
|
# Set the figure size to 18 cm in width
|
|
plt.figure(figsize=(18/2.54, 4)) # Convert 18 cm to inches
|
|
plt.figure(figsize=(18/2.54, 4)) # Convert 18 cm to inches
|
|
|
|
|
|
# Create a boxplot
|
|
# Create a boxplot
|
|
- sns.boxplot(data=filtered_df, x="merged_timepoint", y="Value", hue="Group", fliersize=3 ,flierprops={"marker": "o"})
|
|
|
|
|
|
+ sns.boxplot(data=filtered_df, x="Group", y="Value", hue="merged_timepoint",
|
|
|
|
+ fliersize=3 ,flierprops={"marker": "o"})
|
|
|
|
|
|
# Set title and labels
|
|
# Set title and labels
|
|
- plt.title(f'{mm}')
|
|
|
|
|
|
+ plt.title(f'{qq}_{mm}_{dd}')
|
|
plt.xlabel('Timepoint')
|
|
plt.xlabel('Timepoint')
|
|
- plt.ylabel(qq)
|
|
|
|
|
|
+ plt.ylabel(qq+"(n.a)")
|
|
|
|
|
|
# Show legend without title and frame
|
|
# Show legend without title and frame
|
|
- plt.legend(title=None, frameon=False)
|
|
|
|
-
|
|
|
|
|
|
+ plt.legend(title=None, frameon=False,bbox_to_anchor=(1.05, 1), loc='best')
|
|
|
|
+ plt.tight_layout()
|
|
# Save the plot as an image
|
|
# Save the plot as an image
|
|
- output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm_significant.png')
|
|
|
|
|
|
+ if sig:
|
|
|
|
+ output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm_significant.png')
|
|
|
|
+ else:
|
|
|
|
+ output_path = os.path.join(output_folder, f'{qq}_{mm}_{dd}_mixedlm.png')
|
|
plt.savefig(output_path, dpi=100)
|
|
plt.savefig(output_path, dpi=100)
|
|
|
|
|
|
# Close the plot to avoid displaying it
|
|
# Close the plot to avoid displaying it
|
|
@@ -66,7 +70,7 @@ df_qc_5 = df_qc[(df_qc["Voting outliers (from 5)"]>3) & (df_qc["sequence_type"]=
|
|
|
|
|
|
|
|
|
|
# Filtering the dataframe based on conditions
|
|
# Filtering the dataframe based on conditions
|
|
-df_f0 = df[(~df["merged_timepoint"].isin([42, 56]))]
|
|
|
|
|
|
+df_f0 = df[~df["merged_timepoint"].isin([42, 56])]
|
|
# Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint"
|
|
# Merge df_f1 and df_qc_5 on "subjectID" and "merged_timepoint"
|
|
merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True)
|
|
merged_df = pd.merge(df_f0, df_qc_5, on=["subjectID", "merged_timepoint"], how="left", indicator=True)
|
|
|
|
|
|
@@ -84,7 +88,7 @@ for dd in df["dialation_amount"].unique():
|
|
for qq in df["Qtype"].unique():
|
|
for qq in df["Qtype"].unique():
|
|
for mm in df["mask_name"].unique():
|
|
for mm in df["mask_name"].unique():
|
|
# Filter the dataframe based on current "Qtype" and "mask_name"
|
|
# Filter the dataframe based on current "Qtype" and "mask_name"
|
|
- filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm)]
|
|
|
|
|
|
+ filtered_df = df_f1[(df_f1["Qtype"] == qq) & (df_f1["mask_name"] == mm) & (df_f1["dialation_amount"] == dd)]
|
|
|
|
|
|
# Fit the mixed-effects model
|
|
# Fit the mixed-effects model
|
|
model_name = f"{qq}_{mm}_{dd}"
|
|
model_name = f"{qq}_{mm}_{dd}"
|
|
@@ -92,45 +96,50 @@ for dd in df["dialation_amount"].unique():
|
|
mdf = md.fit(method=["lbfgs"])
|
|
mdf = md.fit(method=["lbfgs"])
|
|
|
|
|
|
# Log file for each combination of qq and mm
|
|
# Log file for each combination of qq and mm
|
|
- log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")
|
|
|
|
-
|
|
|
|
|
|
+
|
|
|
|
|
|
# Check if the p-value is significant
|
|
# Check if the p-value is significant
|
|
if mdf.pvalues['merged_timepoint'] < 0.05:
|
|
if mdf.pvalues['merged_timepoint'] < 0.05:
|
|
- with open(log_file, 'w') as log:
|
|
|
|
- # Print model name to log file
|
|
|
|
- log.write(f"Model Name: {model_name}\n")
|
|
|
|
-
|
|
|
|
- # Print the model summary to log file
|
|
|
|
- log.write(str(mdf.summary()) + '\n')
|
|
|
|
- # Plot and save boxplots for significant models
|
|
|
|
- PlotBoxplot(filtered_df, qq, mm,dd ,mixed_model_dir)
|
|
|
|
|
|
+ log_file = os.path.join(mixed_model_dir, f"{model_name}_log_significant.txt")
|
|
|
|
+ sig = True
|
|
|
|
+ else:
|
|
|
|
+ log_file = os.path.join(mixed_model_dir, f"{model_name}_log.txt")
|
|
|
|
+ sig = False
|
|
|
|
+
|
|
|
|
+ with open(log_file, 'w') as log:
|
|
|
|
+ # Print model name to log file
|
|
|
|
+ log.write(f"Model Name: {model_name}\n")
|
|
|
|
+
|
|
|
|
+ # Print the model summary to log file
|
|
|
|
+ log.write(str(mdf.summary()) + '\n')
|
|
|
|
+ # Plot and save boxplots for significant models
|
|
|
|
+ PlotBoxplot(filtered_df, qq, mm,dd ,mixed_model_dir,sig)
|
|
|
|
+
|
|
|
|
+ # Perform Tukey's HSD test for post hoc analysis
|
|
|
|
+ for gg in filtered_df["Group"].unique():
|
|
|
|
+
|
|
|
|
+ filtered_df2 = filtered_df[filtered_df["Group"]==gg]
|
|
|
|
+ mc = MultiComparison(filtered_df2["Value"], filtered_df2["merged_timepoint"])
|
|
|
|
+ tukey_result = mc.tukeyhsd()
|
|
|
|
|
|
- # Perform Tukey's HSD test for post hoc analysis
|
|
|
|
- for gg in filtered_df["Group"].unique():
|
|
|
|
-
|
|
|
|
- filtered_df2 = filtered_df[filtered_df["Group"]==gg]
|
|
|
|
- mc = MultiComparison(filtered_df2["Value"], filtered_df2["merged_timepoint"])
|
|
|
|
- tukey_result = mc.tukeyhsd()
|
|
|
|
-
|
|
|
|
- # Print the summary of the test to log file
|
|
|
|
- log.write(f"Posthoc Analysis for {gg}:\n")
|
|
|
|
- log.write(str(tukey_result.summary()) + '\n')
|
|
|
|
-
|
|
|
|
- # Perform the Sidak multiple comparisons for the group effect
|
|
|
|
- group_combinations = combinations(filtered_df["Group"].unique(), 2)
|
|
|
|
- log.write("Sidak Multiple Comparisons:\n")
|
|
|
|
- log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificantDifference\n")
|
|
|
|
- for group1, group2 in group_combinations:
|
|
|
|
- for time_point in filtered_df["merged_timepoint"].unique():
|
|
|
|
- group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
|
|
|
|
- group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
|
|
|
|
- sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
|
|
|
|
- if sidak_result[0]:
|
|
|
|
- log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
|
|
|
|
- else:
|
|
|
|
- log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
|
|
|
|
-
|
|
|
|
|
|
+ # Print the summary of the test to log file
|
|
|
|
+ log.write(f"Posthoc Analysis for {gg}:\n")
|
|
|
|
+ log.write(str(tukey_result.summary()) + '\n')
|
|
|
|
+
|
|
|
|
+ # Perform the Sidak multiple comparisons for the group effect
|
|
|
|
+ group_combinations = combinations(filtered_df["Group"].unique(), 2)
|
|
|
|
+ log.write("Sidak Multiple Comparisons:\n")
|
|
|
|
+ log.write("Group1\tGroup2\tTimePoint\tPValue\tSignificantDifference\n")
|
|
|
|
+ for group1, group2 in group_combinations:
|
|
|
|
+ for time_point in filtered_df["merged_timepoint"].unique():
|
|
|
|
+ group1_data = filtered_df[(filtered_df["Group"] == group1) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
|
|
|
|
+ group2_data = filtered_df[(filtered_df["Group"] == group2) & (filtered_df["merged_timepoint"] == time_point)]["Value"]
|
|
|
|
+ sidak_result = statsmodels.stats.multitest.multipletests(scipy.stats.ttest_ind(group1_data, group2_data)[1], method='sidak')
|
|
|
|
+ if sidak_result[0]:
|
|
|
|
+ log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tYes\n")
|
|
|
|
+ else:
|
|
|
|
+ log.write(f"{group1}\t{group2}\t{time_point}\t{sidak_result[1]}\tNo\n")
|
|
|
|
+
|
|
# Append the results to the list
|
|
# Append the results to the list
|
|
results.append({'Qtype': qq, 'mask_name': mm,'dialation': dd,'p_value': mdf.pvalues['merged_timepoint']})
|
|
results.append({'Qtype': qq, 'mask_name': mm,'dialation': dd,'p_value': mdf.pvalues['merged_timepoint']})
|
|
|
|
|