correlation_between_bahavior_and_DTI_anovaSpecific.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. import os
  2. import pandas as pd
  3. from scipy.stats import spearmanr, shapiro, pearsonr
  4. import numpy as np
  5. from statsmodels.stats.multitest import multipletests
  6. # Get the directory where the code file is located
  7. code_dir = os.path.dirname(os.path.abspath(__file__))
  8. # Get the parent directory of the code directory
  9. parent_dir = os.path.dirname(code_dir)
  10. # Step 4: Save the resulting dataframe to a CSV file
  11. input_file_path = os.path.join(parent_dir, 'output', "Final_Quantitative_output", 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv')
  12. input_anova_file = os.path.join(parent_dir, 'output', "Final_Quantitative_output","mixed_model_analysis",'mixed_model_results.csv')
  13. df_annova = pd.read_csv(input_anova_file)
  14. df_annova2 = df_annova[df_annova["p_value"]<=0.05]
  15. df_temp = pd.read_csv(input_file_path)
  16. # Filter df based on condition
  17. df_f1 = df_temp[df_temp["dialation_amount"] == 2]
  18. # Merge df_f1 and df_annova2 on common columns "Qtype" and "mask_name"
  19. merged_df = pd.merge(df_f1, df_annova2[['Qtype', 'mask_name', 'p_value']], on=['Qtype', 'mask_name'], how='left')
  20. # Display the merged DataFrame
  21. print(merged_df)
  22. #%%
  23. # Empty list to store the results
  24. results = []
  25. df = merged_df[(merged_df["p_value"]<=0.05) & (merged_df["merged_timepoint"]<40)]
  26. df.dropna(subset=['p_value'], inplace=True)
  27. # Iterate over unique values of 'Group', 'merged_timepoint', 'Qtype', and 'mask_name'
  28. for ss in df["Group"].unique():
  29. for time_point in df["merged_timepoint"].unique():
  30. for qq in df["Qtype"].unique():
  31. for mm in df["mask_name"].unique():
  32. # Filter the DataFrame for the current combination of 'Group', 'merged_timepoint', 'Qtype', and 'mask_name'
  33. df_f2 = df[(df["Group"] == ss) & (df["merged_timepoint"] == time_point) & (df["Qtype"] == qq) & (df["mask_name"] == mm)]
  34. # Remove rows with NaN values in 'Value' or 'averageScore' columns
  35. df_f2 = df_f2.dropna(subset=['Value', 'averageScore'])
  36. if not df_f2.empty:
  37. shapiro_statValue, shapiro_pvalueQValue = shapiro(df_f2["Value"])
  38. shapiro_statScore, shapiro_pvalueBehavior = shapiro(df_f2["averageScore"])
  39. if shapiro_pvalueQValue < 0.05 or shapiro_pvalueBehavior < 0.05:
  40. correlation_coefficient, p_value = pearsonr(df_f2["Value"], df_f2["averageScore"])
  41. else:
  42. correlation_coefficient, p_value = pearsonr(df_f2["Value"], df_f2["averageScore"])
  43. # Store the results in a dictionary
  44. result = {'Group': ss, 'merged_timepoint': time_point, 'Qtype': qq, 'mask_name': mm,
  45. 'Pval': p_value, 'R': correlation_coefficient,
  46. 'shapiro-wilk_pvalue_qtype': shapiro_pvalueQValue,'shapiro-wilk_pvalue_behavior': shapiro_pvalueBehavior}
  47. # Append the dictionary to the results list
  48. results.append(result)
  49. else:
  50. print(
  51. f"No valid data found for Group: {ss}, merged_timepoint: {time_point}, Qtype: {qq}, and mask_name: {mm}. Skipping.")
  52. # Create a DataFrame from the results list
  53. correlation_results_df = pd.DataFrame(results)
  54. unique_groups = df["Group"].unique()
  55. unique_masks = df["mask_name"].unique()
  56. unique_tp = df["merged_timepoint"].unique()
  57. for tt in unique_tp:
  58. for mask in unique_masks:
  59. for group in unique_groups:
  60. time_mask = correlation_results_df['merged_timepoint'] == tt
  61. mask_mask = correlation_results_df['mask_name'] == mask
  62. group_mask = correlation_results_df['Group'] == group
  63. combined_mask = time_mask & mask_mask & group_mask
  64. p_values = correlation_results_df[combined_mask]['Pval']
  65. rejected, p_values_corrected, _, _ = multipletests(p_values, method='fdr_bh')
  66. # Assign the corrected p-values to the DataFrame
  67. correlation_results_df.loc[combined_mask, 'Pval_corrected'] = p_values_corrected
  68. # Define the output file path
  69. output_file_path = os.path.join(parent_dir, 'output', "Correlation_with_behavior", 'correlation_dti_with_behavior.csv')
  70. # Save the correlation results DataFrame to a CSV file
  71. correlation_results_df.to_csv(output_file_path, index=False)
  72. print("Correlation results with corrected p-values saved successfully to 'correlation_results.csv' in the output folder.")