12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- import os
- import pandas as pd
- # Get the directory where the code file is located
- code_dir = os.path.dirname(os.path.abspath(__file__))
- # Get the parent directory of the code directory
- parent_dir = os.path.dirname(code_dir)
- # Read the quantitative data CSV file into a Pandas DataFrame
- input_file_quant = os.path.join(parent_dir, 'output', "Quantitative_outputs", 'Quantitative_results_from_dwi_processing.csv')
- df_q = pd.read_csv(input_file_quant)
- # Read Quality Control voting maps
- input_file_qc = os.path.join(parent_dir, 'output', "quality_control_aidaqc", 'voting_remapped.csv')
- df_qc = pd.read_csv(input_file_qc)
- # Read the behavior data CSV file into a Pandas DataFrame
- input_file_behav = os.path.join(parent_dir, 'output', "behavior_analysis", 'Merged_behaviour_data_ztransform_kmeans_prr_clustered.csv')
- df_b = pd.read_csv(input_file_behav)
- # Standardize the subjectID columns to match across dataframes
- df_q['subjectID'] = df_q['subjectID'].str.replace("sub-", "", regex=False)
- df_qc['subjectID'] = df_qc['subjectID'].str.replace("sub-", "", regex=False)
- df_b['subjectID'] = df_b['subjectID'].str.replace(r'^(SR.*)s$', r'\1', regex=True)
- # Filter the voting remapped data based on sequence_type == "diff"
- df_qc_filtered = df_qc[df_qc['sequence_type'] == "diff"]
- # Merge specified columns from df_b into df_q based on subjectID and merged_timepoint columns
- columns_to_merge = ['subjectID', 'merged_timepoint', "DeficitScore",
- '2 Cluster', '3 Cluster', '4 Cluster', '5 Cluster', '6 Cluster','linefit_cluster', 'cluster ii', 'cluster co']
- merged_df = pd.merge(df_q, df_b[columns_to_merge], on=['subjectID', 'merged_timepoint'], how="left")
- # Merge specified columns from filtered df_qc into the merged DataFrame
- qc_columns_to_merge = ['subjectID', 'Exclude_Aref', 'Voting outliers (from 5)']
- merged_df = pd.merge(merged_df, df_qc_filtered[qc_columns_to_merge], on=['subjectID'], how="left")
- # Remove all rows where Exclude_Aref == True
- merged_df = merged_df[merged_df['Exclude_Aref'] != True]
- # Define the output file path
- output_file_path = os.path.join(parent_dir, 'output', 'Quantitative_outputs', 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv')
- # Save the merged DataFrame as a CSV file
- merged_df.to_csv(output_file_path, index=False)
|