import os import pandas as pd from scipy.stats import spearmanr, shapiro, pearsonr import numpy as np from statsmodels.stats.multitest import multipletests # Get the directory where the code file is located code_dir = os.path.dirname(os.path.abspath(__file__)) # Get the parent directory of the code directory parent_dir = os.path.dirname(code_dir) # Step 4: Save the resulting dataframe to a CSV file input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv') df = pd.read_csv(input_file_path) df = df[(df["merged_timepoint"]<40)] tests = ['paw_dragZ-score', 'hindlimb_dropZ-score', 'foot_faultsZ-score','paw_drag', 'hindlimb_drop', 'foot_faults',"averageScore"] #%% # Empty list to store the results results = [] for tt in tests: for dd in df["dialation_amount"].unique(): # Iterate over unique values of 'Group', 'merged_timepoint', 'Qtype', and 'mask_name' for ss in df["Group"].unique(): for time_point in df["merged_timepoint"].unique(): for qq in df["Qtype"].unique(): for mm in df["mask_name"].unique(): # Filter the DataFrame for the current combination of 'Group', 'merged_timepoint', 'Qtype', and 'mask_name' df_f2 = df[(df["Group"] == ss) & (df["merged_timepoint"] == time_point) & (df["Qtype"] == qq) & (df["mask_name"] == mm) & (df["dialation_amount"] == dd)] # Remove rows with NaN values in 'Value' or 'averageScore' columns df_f2 = df_f2.dropna(subset=['Value', tt]) if not df_f2.empty: shapiro_statValue, shapiro_pvalueQValue = shapiro(df_f2["Value"]) shapiro_statScore, shapiro_pvalueBehavior = shapiro(df_f2[tt]) if shapiro_pvalueQValue < 0.05 or shapiro_pvalueBehavior < 0.05: correlation_coefficient, p_value = pearsonr(df_f2["Value"], df_f2[tt]) else: correlation_coefficient, p_value = pearsonr(df_f2["Value"], df_f2[tt]) # Store the results in a dictionary result = {'Group': ss, 'merged_timepoint': time_point, 'Qtype': qq, 'mask_name': mm, 'Pval': p_value, 'R': correlation_coefficient, "Behavior_test":tt,"dialation_amount":dd} # Append the dictionary to the results list results.append(result) else: print( f"No valid data found for Group: {ss}, merged_timepoint: {time_point}, Qtype: {qq}, and mask_name: {mm}. Skipping.") # Create a DataFrame from the results list correlation_results_df = pd.DataFrame(results) unique_groups = df["Group"].unique() unique_masks = df["mask_name"].unique() unique_tp = df["merged_timepoint"].unique() unique_dd=df["dialation_amount"].unique() # ============================================================================= # for dd in unique_dd: # for tt in unique_tp: # for mask in unique_masks: # for group in unique_groups: # time_mask = correlation_results_df['merged_timepoint'] == tt # mask_mask = correlation_results_df['mask_name'] == mask # group_mask = correlation_results_df['Group'] == group # dd_mask = correlation_results_df["dialation_amount"] == dd # combined_mask = time_mask & mask_mask & group_mask & dd_mask # # p_values = correlation_results_df[combined_mask]['Pval'] # rejected, p_values_corrected, _, _ = multipletests(p_values, method='fdr_bh') # # # Assign the corrected p-values to the DataFrame # correlation_results_df.loc[combined_mask, 'Pval_corrected'] = p_values_corrected # ============================================================================= # Define the output file path output_file_path = os.path.join(parent_dir, 'output', "Correlation_with_behavior", 'correlation_dti_with_behavior.csv') # Save the correlation results DataFrame to a CSV file correlation_results_df.to_csv(output_file_path, index=False) print("Correlation results with corrected p-values saved successfully to 'correlation_results.csv' in the output folder.")