123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 |
- import os
- import pandas as pd
- import numpy as np
- import seaborn as sns
- import matplotlib.pyplot as plt
- from scipy.stats import pearsonr, spearmanr, shapiro
- from statsmodels.stats.multitest import multipletests
- from tqdm import tqdm
- # Get the directory where the code file is located
- code_dir = os.path.dirname(os.path.abspath(__file__))
- parent_dir = os.path.dirname(code_dir)
- # Load the CSV data
- input_file_path = os.path.join(parent_dir, 'output', "Quantitative_outputs", 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv')
- df = pd.read_csv(input_file_path, low_memory=False)
- # Remove duplicate rows
- df = df.drop_duplicates()
- # Remove specified columns
- df = df.drop(columns=['is_it_AMBA', '2 Cluster', '3 Cluster', '4 Cluster', '5 Cluster', '6 Cluster', 'Exclude_Aref', 'Voting outliers (from 5)', 'fullpath'])
- # Convert columns that should be numeric to numeric, coercing errors to NaN
- numeric_cols = ['Value', 'DeficitScore']
- for col in numeric_cols:
- if col in df.columns:
- df[col] = pd.to_numeric(df[col], errors='coerce')
- # Merge data for timepoint 0 with timepoint 28 for DeficitScore change calculation
- merged_df_0_28 = pd.merge(df[df['merged_timepoint'] == 0],
- df[df['merged_timepoint'] == 28],
- on=['subjectID', 'Group', 'Qtype', 'mask_name', 'dialation_amount'],
- suffixes=('_0', '_28'))
- # Store all p-values for multiple test correction
- all_p_values = []
- # Define the settings for selected masks, groups, qtype, and timepoints
- selected_groups = ["Sham"]
- selected_qtype = "fa"
- selected_mask_names = [
- "CRuT_MOp-RN_ipsilesional_CCcut",
- "CRuT_MOp-RN_contralesional_mirrored_CCcut",
- "CReT_MOp-TRN_ipsilesional_CCcut",
- "CReT_MOp-TRN_contralesional_CCcut",
- "CST_MOp-int-py_ipsilesional_selfdrawnROA+CCcut",
- "CST_MOp-int-py_contralesional_selfdrawnROA+CCcut",
- "TC_DORsm-SSp_ll+SSp_ul_ipsilesional_END+higherstepsize_",
- "TC_DORsm-SSp_ll+SSp_ul_contralesional_END+higherstepsize",
- "CC_MOp-MOp_cut",
- "OT_och-lgn_lgncut"
- ]
- #selected_timepoints = [28]
- selected_dialation_amount = 0
- # Define simple anatomical names for the masks
- mask_name_mapping = {
- "CC_MOp-MOp_cut": "Corpus Callosum",
- "CRuT_MOp-RN_ipsilesional_CCcut": "Rubropsinal (Ipsilesional)",
- "CRuT_MOp-RN_contralesional_mirrored_CCcut": "Rubropsinal (Contralesional)",
- "TC_DORsm-SSp_ll+SSp_ul_ipsilesional_END+higherstepsize_": "Thalamocortical (Ipsilesional)",
- "TC_DORsm-SSp_ll+SSp_ul_contralesional_END+higherstepsize": "Thalamocortical (Contralesional)",
- "CReT_MOp-TRN_contralesional_CCcut": "Reticulospinal (Contralesional)",
- "CReT_MOp-TRN_ipsilesional_CCcut": "Reticulospinal (Ipsilesional)",
- "CST_MOp-int-py_contralesional_selfdrawnROA+CCcut": "Corticospinal (Contralesional)",
- "CST_MOp-int-py_ipsilesional_selfdrawnROA+CCcut": "Corticospinal (Ipsilesional)",
- "OT_och-lgn_lgncut": "Optic"
- }
- # Create the figure for subplots
- fig, axes = plt.subplots(5, 2, figsize=(14 / 2.54, 25 / 2.54)) # 2 columns, 5 rows
- axes = axes.flatten() # Flatten the axes array for easy iteration
- # Iterate through each mask name
- for idx, selected_mask_name in enumerate(selected_mask_names):
- # Filter the merged DataFrame for the current settings
- df_filtered_28 = merged_df_0_28[(merged_df_0_28["Group"].isin(selected_groups)) &
- (merged_df_0_28["Qtype"] == selected_qtype) &
- (merged_df_0_28["mask_name"] == selected_mask_name) &
- (merged_df_0_28["dialation_amount"] == selected_dialation_amount)]
- df_filtered_28 = df_filtered_28.drop_duplicates()
- # Ensure there are enough data points for correlation
- if len(df_filtered_28) >= 3:
- # Calculate the change in DeficitScore between timepoint 0 and 28
- fa_value_28 = df_filtered_28['Value_28']
- deficit_score_change_28 = df_filtered_28['DeficitScore_28'] - df_filtered_28['DeficitScore_0']
- # Remove rows with NaN or infinite values
- valid_idx = ~(fa_value_28.isin([np.nan, np.inf, -np.inf]) | deficit_score_change_28.isin([np.nan, np.inf, -np.inf]))
- fa_value_28 = fa_value_28[valid_idx]
- deficit_score_change_28 = deficit_score_change_28[valid_idx]
- # Perform Shapiro-Wilk test for normality
- if len(fa_value_28) >= 3 and len(deficit_score_change_28) >= 3:
- shapiro_statValue, shapiro_pvalueQValue = shapiro(fa_value_28)
- shapiro_statScore, shapiro_pvalueBehavior = shapiro(deficit_score_change_28)
- # Use Pearson or Spearman correlation based on normality test
- if shapiro_pvalueQValue < 0.05 or shapiro_pvalueBehavior < 0.05:
- # Use Spearman correlation if data is not normally distributed
- correlation_coefficient, p_value = spearmanr(fa_value_28, deficit_score_change_28)
- else:
- # Use Pearson correlation if data is normally distributed
- correlation_coefficient, p_value = pearsonr(fa_value_28, deficit_score_change_28)
- # Store p-value for multiple testing correction
- all_p_values.append(p_value)
- # Plot the regression
- ax = axes[idx] # Select the appropriate subplot axis
- sns.regplot(
- x=fa_value_28, y=deficit_score_change_28, ci=95, ax=ax,
- line_kws={'color': 'lightcoral', 'alpha': 0.6}, # Regression line in light red with higher transparency
- scatter_kws={'color': 'red', 'alpha': 0.6, 's': 10}, # Scatter points in red
- label=f'Stroke (R={correlation_coefficient:.2f})'
- )
-
- # Determine significance level for R value
- pval_text = ''
- if p_value < 0.001:
- pval_text = '***'
- elif p_value < 0.01:
- pval_text = '**'
- elif p_value < 0.05:
- pval_text = '*'
-
- # Add text annotation for R and significance level
- ax.text(0.05, 0.95, f'R={correlation_coefficient:.2f}{pval_text}', transform=ax.transAxes,
- fontsize=10, verticalalignment='top', bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0))
-
- # Set labels and customize font settings
- anatomical_name = mask_name_mapping.get(selected_mask_name, selected_mask_name)
- ax.set_xlabel(f'{selected_qtype} Value (Timepoint 28) - {anatomical_name}', fontsize=10)
- ax.set_ylabel('DS Change [28 - 0]', fontsize=10)
- anatomical_name = mask_name_mapping.get(selected_mask_name, selected_mask_name)
- ax.set_xlabel(f'{selected_qtype}-{anatomical_name}', fontsize=10)
- anatomical_name = mask_name_mapping.get(selected_mask_name, selected_mask_name)
-
- # Remove gridlines
- ax.grid(False)
- sns.despine(ax=ax) # Remove the top and right spines for cleaner look
- else:
- print(f"Not enough data for combination: Group=Stroke, Qtype={selected_qtype}, Mask={selected_mask_name}, Dilation={selected_dialation_amount}")
- # Adjust layout
- plt.tight_layout()
- # Save the complete figure with all subplots
- figures_folder = os.path.join(parent_dir, 'output', 'Figures')
- pythonFigs_folder = os.path.join(figures_folder, 'pythonFigs')
- os.makedirs(pythonFigs_folder, exist_ok=True)
- file_suffix = f'timepoint_{selected_timepoints[0]}_dilation_{selected_dialation_amount}_groups_{"-".join(selected_groups)}'
- fig_filename = f'qtype_{selected_qtype}_behavior_change_correlation_all_masks_{file_suffix}_groups_comparison.svg'
- plt.savefig(os.path.join(pythonFigs_folder, fig_filename), dpi=300, bbox_inches='tight', format="svg", transparent=True)
- # Show the figure
- plt.show()
|