Browse Source

intermidiate step

arefks 1 month ago
parent
commit
164f8d841f

+ 33 - 24
code/correlation_between_bahavior_and_DTI.py

@@ -1,7 +1,8 @@
 import os
 import pandas as pd
-from scipy.stats import pearsonr
+from scipy.stats import spearmanr, shapiro
 import numpy as np
+from statsmodels.stats.multitest import multipletests
 
 # Get the directory where the code file is located
 code_dir = os.path.dirname(os.path.abspath(__file__))
@@ -10,11 +11,12 @@ code_dir = os.path.dirname(os.path.abspath(__file__))
 parent_dir = os.path.dirname(code_dir)
 
 # Step 4: Save the resulting dataframe to a CSV file
-input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output",'Final_Quantitative_output&behavior_data.csv')
+input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv')
 
 df = pd.read_csv(input_file_path)
 
-df_f1 =  (df["dialation_amount"] == 0)
+
+df_f1 = (df["dialation_amount"] == 2)
 
 # Empty list to store the results
 results = []
@@ -26,42 +28,49 @@ for ss in df["Group"].unique():
             for mm in df["mask_name"].unique():
                 # Filter the DataFrame for the current combination of 'Group', 'merged_timepoint', 'Qtype', and 'mask_name'
                 df_f2 = df[df_f1 & (df["Group"] == ss) & (df["merged_timepoint"] == time_point) & (df["Qtype"] == qq) & (df["mask_name"] == mm)]
-                
+
                 # Remove rows with NaN values in 'Value' or 'averageScore' columns
                 df_f2 = df_f2.dropna(subset=['Value', 'averageScore'])
-                
+
                 if not df_f2.empty:
-                    # Calculate Pearson correlation coefficient between 'Value' and 'averageScore' columns
-                    correlation_coefficient, p_value = pearsonr(df_f2["Value"], df_f2["averageScore"])
+
+                    correlation_coefficient, p_value = spearmanr(df_f2["Value"], df_f2["averageScore"])
+                    # Calculate Shapiro-Wilk test for normality
+                    shapiro_statValue, shapiro_pvalueQValue = shapiro(df_f2["Value"])
+                    shapiro_statScore, shapiro_pvalueBehavior = shapiro(df_f2["averageScore"])
                     
+
                     # Store the results in a dictionary
-                    result = {'Group': ss, 'merged_timepoint': time_point, 'Qtype': qq, 'mask_name': mm, 'Pval_corr': p_value, 'R_corr': correlation_coefficient}
-                    
+                    result = {'Group': ss, 'merged_timepoint': time_point, 'Qtype': qq, 'mask_name': mm, 
+                              'Pval_corr': p_value, 'R_corr': correlation_coefficient,
+                              'shapiro-wilk_pvalue_qtype': shapiro_pvalueQValue,'shapiro-wilk_pvalue_behavior': shapiro_pvalueBehavior}
+
                     # Append the dictionary to the results list
                     results.append(result)
                 else:
-                    print(f"No valid data found for Group: {ss}, merged_timepoint: {time_point}, Qtype: {qq}, and mask_name: {mm}. Skipping.")
+                    print(
+                        f"No valid data found for Group: {ss}, merged_timepoint: {time_point}, Qtype: {qq}, and mask_name: {mm}. Skipping.")
 
 # Create a DataFrame from the results list
 correlation_results_df = pd.DataFrame(results)
 
-# Apply Holm-Bonferroni correction to p-values
-p_values = correlation_results_df['Pval_corr']
-num_tests = len(p_values)
-sorted_indices = np.argsort(p_values)
-
-# Initialize an empty array to store the corrected p-values
-p_values_corrected = np.ones(num_tests)
-
-# Loop through the sorted p-values and apply the Holm-Bonferroni correction
-for i, idx in enumerate(sorted_indices):
-    p_values_corrected[idx] = np.min([1, p_values[idx] * (num_tests - i)])
+# Apply FDR correction to p-values within each Qtype and each Group separately
+unique_qtypes = correlation_results_df['Qtype'].unique()
+unique_groups = df["Group"].unique()
+unique_masks = df["mask_name"].unique()
 
-# Assign the corrected p-values to the DataFrame
-correlation_results_df['Pval_corr_corrected'] = p_values_corrected
+for qtype in unique_qtypes:
+    for mask in unique_masks:
+        mask_mask = correlation_results_df['mask_name'] == mask
+        qtype_group_mask = mask_mask & (correlation_results_df['Qtype'] == qtype)
+        p_values = correlation_results_df[qtype_group_mask]['Pval_corr']
+        rejected, p_values_corrected, _, _ = multipletests(p_values, method='fdr_bh')
 
+        # Assign the corrected p-values to the DataFrame
+        correlation_results_df.loc[qtype_group_mask, 'Pval_corr_corrected'] = p_values_corrected
+        
 # Define the output file path
-output_file_path = os.path.join(parent_dir, 'output', "Correlation_with_behavior",'correlation_results.csv')
+output_file_path = os.path.join(parent_dir, 'output', "Correlation_with_behavior", 'correlation_dti_with_behavior5.csv')
 
 # Save the correlation results DataFrame to a CSV file
 correlation_results_df.to_csv(output_file_path, index=False)

+ 2 - 2
code/heat_maps_correlation.py

@@ -6,7 +6,7 @@ import matplotlib.pyplot as plt
 # Set up the directory paths
 code_dir = os.path.dirname(os.path.abspath(__file__))
 parent_dir = os.path.dirname(code_dir)
-csv_file = os.path.join(parent_dir, 'output', "Correlation_with_behavior", 'correlation_dti_with_behavior.csv')
+csv_file = os.path.join(parent_dir, 'output', "Correlation_with_behavior", 'correlation_dti_with_behavior5.csv')
 output_dir = os.path.dirname(csv_file)
 
 # Read the CSV file into a DataFrame
@@ -44,6 +44,6 @@ for qtype in qtypes:
             axes[i, j].tick_params(axis='both', which='major', labelsize=8)
     
     plt.tight_layout()
-    save_path = os.path.join(output_dir, f'heatmap_{qtype}.png')
+    save_path = os.path.join(output_dir, f'heatmap_{qtype}_2dialation_corrected_spearman.png')
     plt.savefig(save_path, dpi=300)
     plt.show()

File diff suppressed because it is too large
+ 1297 - 0
output/Correlation_with_behavior/correlation_dti_with_behavior3.csv


File diff suppressed because it is too large
+ 1297 - 0
output/Correlation_with_behavior/correlation_dti_with_behavior4.csv


File diff suppressed because it is too large
+ 1297 - 0
output/Correlation_with_behavior/correlation_dti_with_behavior5.csv


File diff suppressed because it is too large
+ 1297 - 0
output/Correlation_with_behavior/correlation_dti_with_behavior_2.csv


BIN
output/Correlation_with_behavior/heatmap_ad_2dialation_corrected.png


BIN
output/Correlation_with_behavior/heatmap_ad_2dialation_corrected_spearman.png


BIN
output/Correlation_with_behavior/heatmap_fa_2dialation_corrected.png


BIN
output/Correlation_with_behavior/heatmap_fa_2dialation_corrected_spearman.png


BIN
output/Correlation_with_behavior/heatmap_md_2dialation_corrected.png


BIN
output/Correlation_with_behavior/heatmap_md_2dialation_corrected_spearman.png


BIN
output/Correlation_with_behavior/heatmap_rd_2dialation_corrected.png


BIN
output/Correlation_with_behavior/heatmap_rd_2dialation_corrected_spearman.png