# -*- coding: utf-8 -*- """ Created on Tue Nov 5 17:28:49 2024 @author: arefks """ import os import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from scipy.stats import pearsonr, spearmanr, shapiro from statsmodels.stats.multitest import multipletests from tqdm import tqdm # Get the directory where the code file is located code_dir = os.path.dirname(os.path.abspath(__file__)) parent_dir = os.path.dirname(code_dir) # Load the CSV data input_file_path = os.path.join(parent_dir, 'output', "Quantitative_outputs", 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv') df = pd.read_csv(input_file_path, low_memory=False) # Remove duplicate rows df = df.drop_duplicates() # Remove specified columns df = df.drop(columns=['is_it_AMBA', '2 Cluster', '3 Cluster', '4 Cluster', '5 Cluster', '6 Cluster', 'Exclude_Aref', 'Voting outliers (from 5)', 'fullpath']) # Convert columns that should be numeric to numeric, coercing errors to NaN numeric_cols = ['Value', 'DeficitScore'] for col in numeric_cols: if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') # Merge data for timepoint 0 with timepoint 28 for DeficitScore change calculation merged_df_0_28 = pd.merge(df[df['merged_timepoint'] == 0], df[df['merged_timepoint'] == 28], on=['subjectID', 'Group', 'Qtype', 'mask_name', 'dialation_amount'], suffixes=('_0', '_28')) # Store all p-values for multiple test correction all_p_values = [] # Define the settings for selected masks, groups, qtype, and timepoints selected_groups = ["Sham"] selected_qtype = "fa" selected_mask_names = [ "CRuT_MOp-RN_ipsilesional_CCcut", "CRuT_MOp-RN_contralesional_mirrored_CCcut", "CReT_MOp-TRN_ipsilesional_CCcut", "CReT_MOp-TRN_contralesional_CCcut", "CST_MOp-int-py_ipsilesional_selfdrawnROA+CCcut", "CST_MOp-int-py_contralesional_selfdrawnROA+CCcut", "TC_DORsm-SSp_ll+SSp_ul_ipsilesional_END+higherstepsize_", "TC_DORsm-SSp_ll+SSp_ul_contralesional_END+higherstepsize", "CC_MOp-MOp_cut", "OT_och-lgn_lgncut" ] selected_dialation_amount = 0 # Define simple anatomical names for the masks mask_name_mapping = { "CC_MOp-MOp_cut": "Corpus Callosum", "CRuT_MOp-RN_ipsilesional_CCcut": "Rubropsinal (Ipsilesional)", "CRuT_MOp-RN_contralesional_mirrored_CCcut": "Rubropsinal (Contralesional)", "TC_DORsm-SSp_ll+SSp_ul_ipsilesional_END+higherstepsize_": "Thalamocortical (Ipsilesional)", "TC_DORsm-SSp_ll+SSp_ul_contralesional_END+higherstepsize": "Thalamocortical (Contralesional)", "CReT_MOp-TRN_contralesional_CCcut": "Reticulospinal (Contralesional)", "CReT_MOp-TRN_ipsilesional_CCcut": "Reticulospinal (Ipsilesional)", "CST_MOp-int-py_contralesional_selfdrawnROA+CCcut": "Corticospinal (Contralesional)", "CST_MOp-int-py_ipsilesional_selfdrawnROA+CCcut": "Corticospinal (Ipsilesional)", "OT_och-lgn_lgncut": "Optic" } # Create the figure for subplots fig, axes = plt.subplots(5, 2, figsize=(14 / 2.54, 25 / 2.54)) # 2 columns, 5 rows axes = axes.flatten() # Flatten the axes array for easy iteration # Iterate through each mask name for idx, selected_mask_name in enumerate(selected_mask_names): # Filter the merged DataFrame for the current settings df_filtered_28 = merged_df_0_28[(merged_df_0_28["Group"].isin(selected_groups)) & (merged_df_0_28["Qtype"] == selected_qtype) & (merged_df_0_28["mask_name"] == selected_mask_name) & (merged_df_0_28["dialation_amount"] == selected_dialation_amount)] df_filtered_28 = df_filtered_28.drop_duplicates() # Ensure there are enough data points for correlation if len(df_filtered_28) >= 3: # Calculate the change in DeficitScore and Qtype (Value) between timepoint 0 and 28 fa_value_change_28 = df_filtered_28['Value_28'] - df_filtered_28['Value_0'] deficit_score_change_28 = df_filtered_28['DeficitScore_28'] - df_filtered_28['DeficitScore_0'] # Remove rows with NaN or infinite values valid_idx = ~(fa_value_change_28.isin([np.nan, np.inf, -np.inf]) | deficit_score_change_28.isin([np.nan, np.inf, -np.inf])) fa_value_change_28 = fa_value_change_28[valid_idx] deficit_score_change_28 = deficit_score_change_28[valid_idx] # Perform Shapiro-Wilk test for normality if len(fa_value_change_28) >= 3 and len(deficit_score_change_28) >= 3: shapiro_statValue, shapiro_pvalueQValue = shapiro(fa_value_change_28) shapiro_statScore, shapiro_pvalueBehavior = shapiro(deficit_score_change_28) # Use Pearson or Spearman correlation based on normality test if shapiro_pvalueQValue < 0.05 or shapiro_pvalueBehavior < 0.05: # Use Spearman correlation if data is not normally distributed correlation_coefficient, p_value = spearmanr(fa_value_change_28, deficit_score_change_28) else: # Use Pearson correlation if data is normally distributed correlation_coefficient, p_value = pearsonr(fa_value_change_28, deficit_score_change_28) # Store p-value for multiple testing correction all_p_values.append(p_value) # Plot the regression ax = axes[idx] # Select the appropriate subplot axis sns.regplot( x=fa_value_change_28, y=deficit_score_change_28, ci=95, ax=ax, line_kws={'color': 'lightcoral', 'alpha': 0.6}, # Regression line in light red with higher transparency scatter_kws={'color': 'red', 'alpha': 0.6, 's': 10}, # Scatter points in red label=f'Stroke (R={correlation_coefficient:.2f})' ) # Determine significance level for R value pval_text = '' if p_value < 0.001: pval_text = '***' elif p_value < 0.01: pval_text = '**' elif p_value < 0.05: pval_text = '*' # Add text annotation for R and significance level ax.text(0.05, 0.95, f'R={correlation_coefficient:.2f}{pval_text}', transform=ax.transAxes, fontsize=10, verticalalignment='top', bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0)) # Set labels and customize font settings anatomical_name = mask_name_mapping.get(selected_mask_name, selected_mask_name) ax.set_xlabel(f'Δ{selected_qtype}-{anatomical_name}', fontsize=10) ax.set_ylabel('ΔDS[28-0]', fontsize=10) # Remove gridlines ax.grid(False) sns.despine(ax=ax) # Remove the top and right spines for cleaner look else: print(f"Not enough data for combination: Group=Stroke, Qtype={selected_qtype}, Mask={selected_mask_name}, Dilation={selected_dialation_amount}") # Adjust layout plt.tight_layout() # Save the complete figure with all subplots figures_folder = os.path.join(parent_dir, 'output', 'Figures') pythonFigs_folder = os.path.join(figures_folder, 'pythonFigs') os.makedirs(pythonFigs_folder, exist_ok=True) file_suffix = f'timepoint_28-0_dilation_{selected_dialation_amount}_groups_{"-".join(selected_groups)}' fig_filename = f'qtype_{selected_qtype}_behavior_change_correlation_all_masks_{file_suffix}_groups_comparison.svg' plt.savefig(os.path.join(pythonFigs_folder, fig_filename), dpi=300, bbox_inches='tight', format="svg", transparent=True) # Show the figure plt.show()