incooparate_behavior_data.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import os
  2. import pandas as pd
  3. # Get the directory where the code file is located
  4. code_dir = os.path.dirname(os.path.abspath(__file__))
  5. # Get the parent directory of the code directory
  6. parent_dir = os.path.dirname(code_dir)
  7. # Read the quantitative data CSV file into a Pandas DataFrame
  8. input_file_quant = os.path.join(parent_dir, 'output', "Quantitative_outputs", 'Quantitative_results_from_dwi_processing.csv')
  9. df_q = pd.read_csv(input_file_quant)
  10. # Read Quality Control voting maps
  11. input_file_qc = os.path.join(parent_dir, 'output', "quality_control_aidaqc", 'voting_remapped.csv')
  12. df_qc = pd.read_csv(input_file_qc)
  13. # Read the behavior data CSV file into a Pandas DataFrame
  14. input_file_behav = os.path.join(parent_dir, 'output', "behavior_analysis", 'Merged_behaviour_data_ztransform_kmeans_prr_clustered.csv')
  15. df_b = pd.read_csv(input_file_behav)
  16. # Standardize the subjectID columns to match across dataframes
  17. df_q['subjectID'] = df_q['subjectID'].str.replace("sub-", "", regex=False)
  18. df_qc['subjectID'] = df_qc['subjectID'].str.replace("sub-", "", regex=False)
  19. df_b['subjectID'] = df_b['subjectID'].str.replace(r'^(SR.*)s$', r'\1', regex=True)
  20. # Filter the voting remapped data based on sequence_type == "diff"
  21. df_qc_filtered = df_qc[df_qc['sequence_type'] == "diff"]
  22. # Merge specified columns from df_b into df_q based on subjectID and merged_timepoint columns
  23. columns_to_merge = ['subjectID', 'merged_timepoint', "DeficitScore",
  24. '2 Cluster', '3 Cluster', '4 Cluster', '5 Cluster', '6 Cluster','linefit_cluster', 'cluster ii', 'cluster co']
  25. merged_df = pd.merge(df_q, df_b[columns_to_merge], on=['subjectID', 'merged_timepoint'], how="left")
  26. # Merge specified columns from filtered df_qc into the merged DataFrame
  27. qc_columns_to_merge = ['subjectID', 'Exclude_Aref', 'Voting outliers (from 5)']
  28. merged_df = pd.merge(merged_df, df_qc_filtered[qc_columns_to_merge], on=['subjectID'], how="left")
  29. # Remove all rows where Exclude_Aref == True
  30. merged_df = merged_df[merged_df['Exclude_Aref'] != True]
  31. # Define the output file path
  32. output_file_path = os.path.join(parent_dir, 'output', 'Quantitative_outputs', 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv')
  33. # Save the merged DataFrame as a CSV file
  34. merged_df.to_csv(output_file_path, index=False)