Aswendt_Lab
/
2024_Ruthe_SND


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
							import os
import pandas as pd
from scipy.stats import spearmanr, shapiro, pearsonr
import numpy as np
from statsmodels.stats.multitest import multipletests

# Get the directory where the code file is located
code_dir = os.path.dirname(os.path.abspath(__file__))

# Get the parent directory of the code directory
parent_dir = os.path.dirname(code_dir)

# Step 4: Save the resulting dataframe to a CSV file
input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv')


df = pd.read_csv(input_file_path)

df = df[(df["merged_timepoint"]<40)]


#%%
# Empty list to store the results
results = []
for dd in df["dialation_amount"].unique():
    # Iterate over unique values of 'Group', 'merged_timepoint', 'Qtype', and 'mask_name'
    for ss in df["Group"].unique():
        for time_point in df["merged_timepoint"].unique():
            for qq in df["Qtype"].unique():
                for mm in df["mask_name"].unique():
                    # Filter the DataFrame for the current combination of 'Group', 'merged_timepoint', 'Qtype', and 'mask_name'
                    df_f2 = df[(df["Group"] == ss) & (df["merged_timepoint"] == time_point) & (df["Qtype"] == qq) & (df["mask_name"] == mm) &  (df["dialation_amount"] == dd)]
    
                    # Remove rows with NaN values in 'Value' or 'hindlimb_dropZ-score' columns
                    df_f2 = df_f2.dropna(subset=['Value', 'hindlimb_dropZ-score'])
    
                    if not df_f2.empty:
                        
                        shapiro_statValue, shapiro_pvalueQValue = shapiro(df_f2["Value"])
                        shapiro_statScore, shapiro_pvalueBehavior = shapiro(df_f2["hindlimb_dropZ-score"])
                        
                        if shapiro_pvalueQValue < 0.05 or shapiro_pvalueBehavior < 0.05:
                            correlation_coefficient, p_value = pearsonr(df_f2["Value"], df_f2["hindlimb_dropZ-score"])
                        else:
                            correlation_coefficient, p_value = pearsonr(df_f2["Value"], df_f2["hindlimb_dropZ-score"])
                       
    
                        # Store the results in a dictionary
                        result = {'Group': ss, 'merged_timepoint': time_point, 'Qtype': qq, 'mask_name': mm, 
                                  'Pval': p_value, 'R': correlation_coefficient,
                                  'shapiro-wilk_pvalue_qtype': shapiro_pvalueQValue,'shapiro-wilk_pvalue_behavior': shapiro_pvalueBehavior,"dialation_amount":dd}
    
                        # Append the dictionary to the results list
                        results.append(result)
                    else:
                        print(
                            f"No valid data found for Group: {ss}, merged_timepoint: {time_point}, Qtype: {qq}, and mask_name: {mm}. Skipping.")

# Create a DataFrame from the results list
correlation_results_df = pd.DataFrame(results)
unique_groups = df["Group"].unique()
unique_masks = df["mask_name"].unique()
unique_tp = df["merged_timepoint"].unique()
unique_dd=df["dialation_amount"].unique()
for dd in unique_dd:
    for tt in unique_tp:
        for mask in unique_masks:
            for group in unique_groups:
                time_mask = correlation_results_df['merged_timepoint'] == tt
                mask_mask = correlation_results_df['mask_name'] == mask
                group_mask = correlation_results_df['Group'] == group
                dd_mask =  correlation_results_df["dialation_amount"] == dd
                combined_mask = time_mask & mask_mask & group_mask & dd_mask
                
                p_values = correlation_results_df[combined_mask]['Pval']
                rejected, p_values_corrected, _, _ = multipletests(p_values, method='fdr_bh')
    
                # Assign the corrected p-values to the DataFrame
                correlation_results_df.loc[combined_mask, 'Pval_corrected'] = p_values_corrected
# Define the output file path
output_file_path = os.path.join(parent_dir, 'output', "Correlation_with_behavior", 'correlation_dti_with_behavior.csv')

# Save the correlation results DataFrame to a CSV file
correlation_results_df.to_csv(output_file_path, index=False)

print("Correlation results with corrected p-values saved successfully to 'correlation_results.csv' in the output folder.")