Aswendt_Lab
/
2024_Ruthe_SND


			
			
				
					
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
							# -*- coding: utf-8 -*-
"""
Created on Tue Nov  5 17:28:49 2024

@author: arefks
"""

import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr, shapiro
from statsmodels.stats.multitest import multipletests
from tqdm import tqdm

# Get the directory where the code file is located
code_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(code_dir)

# Load the CSV data
input_file_path = os.path.join(parent_dir, 'output', "Quantitative_outputs", 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv')
df = pd.read_csv(input_file_path, low_memory=False)
# Remove duplicate rows
df = df.drop_duplicates()
# Remove specified columns
df = df.drop(columns=['is_it_AMBA', '2 Cluster', '3 Cluster', '4 Cluster', '5 Cluster', '6 Cluster', 'Exclude_Aref', 'Voting outliers (from 5)', 'fullpath'])
# Convert columns that should be numeric to numeric, coercing errors to NaN
numeric_cols = ['Value', 'DeficitScore']
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Merge data for timepoint 0 with timepoint 28 for DeficitScore change calculation
merged_df_0_28 = pd.merge(df[df['merged_timepoint'] == 0],
                          df[df['merged_timepoint'] == 28],
                          on=['subjectID', 'Group', 'Qtype', 'mask_name', 'dialation_amount'],
                          suffixes=('_0', '_28'))

# Store all p-values for multiple test correction
all_p_values = []

# Define the settings for selected masks, groups, qtype, and timepoints
selected_groups = ["Sham"]
selected_qtype = "fa"
selected_mask_names = [
    "CRuT_MOp-RN_ipsilesional_CCcut",
    "CRuT_MOp-RN_contralesional_mirrored_CCcut",
    "CReT_MOp-TRN_ipsilesional_CCcut",
    "CReT_MOp-TRN_contralesional_CCcut",
    "CST_MOp-int-py_ipsilesional_selfdrawnROA+CCcut",
    "CST_MOp-int-py_contralesional_selfdrawnROA+CCcut",
    "TC_DORsm-SSp_ll+SSp_ul_ipsilesional_END+higherstepsize_",
    "TC_DORsm-SSp_ll+SSp_ul_contralesional_END+higherstepsize",
    "CC_MOp-MOp_cut",
    "OT_och-lgn_lgncut"
]
selected_dialation_amount = 0

# Define simple anatomical names for the masks
mask_name_mapping = {
    "CC_MOp-MOp_cut": "Corpus Callosum",
    "CRuT_MOp-RN_ipsilesional_CCcut": "Rubropsinal (Ipsilesional)",
    "CRuT_MOp-RN_contralesional_mirrored_CCcut": "Rubropsinal (Contralesional)",
    "TC_DORsm-SSp_ll+SSp_ul_ipsilesional_END+higherstepsize_": "Thalamocortical (Ipsilesional)",
    "TC_DORsm-SSp_ll+SSp_ul_contralesional_END+higherstepsize": "Thalamocortical (Contralesional)",
    "CReT_MOp-TRN_contralesional_CCcut": "Reticulospinal (Contralesional)",
    "CReT_MOp-TRN_ipsilesional_CCcut": "Reticulospinal (Ipsilesional)",
    "CST_MOp-int-py_contralesional_selfdrawnROA+CCcut": "Corticospinal (Contralesional)",
    "CST_MOp-int-py_ipsilesional_selfdrawnROA+CCcut": "Corticospinal (Ipsilesional)",
    "OT_och-lgn_lgncut": "Optic"
}

# Create the figure for subplots
fig, axes = plt.subplots(5, 2, figsize=(14 / 2.54, 25 / 2.54))  # 2 columns, 5 rows
axes = axes.flatten()  # Flatten the axes array for easy iteration

# Iterate through each mask name
for idx, selected_mask_name in enumerate(selected_mask_names):
    # Filter the merged DataFrame for the current settings
    df_filtered_28 = merged_df_0_28[(merged_df_0_28["Group"].isin(selected_groups)) &
                                    (merged_df_0_28["Qtype"] == selected_qtype) &
                                    (merged_df_0_28["mask_name"] == selected_mask_name) &
                                    (merged_df_0_28["dialation_amount"] == selected_dialation_amount)]
    df_filtered_28 = df_filtered_28.drop_duplicates()

    # Ensure there are enough data points for correlation
    if len(df_filtered_28) >= 3:
        # Calculate the change in DeficitScore and Qtype (Value) between timepoint 0 and 28
        fa_value_change_28 = df_filtered_28['Value_28'] - df_filtered_28['Value_0']
        deficit_score_change_28 = df_filtered_28['DeficitScore_28'] - df_filtered_28['DeficitScore_0']

        # Remove rows with NaN or infinite values
        valid_idx = ~(fa_value_change_28.isin([np.nan, np.inf, -np.inf]) | deficit_score_change_28.isin([np.nan, np.inf, -np.inf]))
        fa_value_change_28 = fa_value_change_28[valid_idx]
        deficit_score_change_28 = deficit_score_change_28[valid_idx]

        # Perform Shapiro-Wilk test for normality
        if len(fa_value_change_28) >= 3 and len(deficit_score_change_28) >= 3:
            shapiro_statValue, shapiro_pvalueQValue = shapiro(fa_value_change_28)
            shapiro_statScore, shapiro_pvalueBehavior = shapiro(deficit_score_change_28)

            # Use Pearson or Spearman correlation based on normality test
            if shapiro_pvalueQValue < 0.05 or shapiro_pvalueBehavior < 0.05:
                # Use Spearman correlation if data is not normally distributed
                correlation_coefficient, p_value = spearmanr(fa_value_change_28, deficit_score_change_28)
            else:
                # Use Pearson correlation if data is normally distributed
                correlation_coefficient, p_value = pearsonr(fa_value_change_28, deficit_score_change_28)

            # Store p-value for multiple testing correction
            all_p_values.append(p_value)

            # Plot the regression
            ax = axes[idx]  # Select the appropriate subplot axis
            sns.regplot(
                x=fa_value_change_28, y=deficit_score_change_28, ci=95, ax=ax,
                line_kws={'color': 'lightcoral', 'alpha': 0.6},  # Regression line in light red with higher transparency
                scatter_kws={'color': 'red', 'alpha': 0.6, 's': 10},  # Scatter points in red
                label=f'Stroke (R={correlation_coefficient:.2f})'
            )
            
            # Determine significance level for R value
            pval_text = ''
            if p_value < 0.001:
                pval_text = '***'
            elif p_value < 0.01:
                pval_text = '**'
            elif p_value < 0.05:
                pval_text = '*'
            
            # Add text annotation for R and significance level
            ax.text(0.05, 0.95, f'R={correlation_coefficient:.2f}{pval_text}', transform=ax.transAxes,
                    fontsize=10, verticalalignment='top', bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0))
            
            # Set labels and customize font settings
            anatomical_name = mask_name_mapping.get(selected_mask_name, selected_mask_name)
            ax.set_xlabel(f'Δ{selected_qtype}-{anatomical_name}', fontsize=10)
            ax.set_ylabel('ΔDS[28-0]', fontsize=10)

            # Remove gridlines
            ax.grid(False)
            sns.despine(ax=ax)  # Remove the top and right spines for cleaner look

    else:
        print(f"Not enough data for combination: Group=Stroke, Qtype={selected_qtype}, Mask={selected_mask_name}, Dilation={selected_dialation_amount}")

# Adjust layout
plt.tight_layout()

# Save the complete figure with all subplots
figures_folder = os.path.join(parent_dir, 'output', 'Figures')
pythonFigs_folder = os.path.join(figures_folder, 'pythonFigs')
os.makedirs(pythonFigs_folder, exist_ok=True)
file_suffix = f'timepoint_28-0_dilation_{selected_dialation_amount}_groups_{"-".join(selected_groups)}'
fig_filename = f'qtype_{selected_qtype}_behavior_change_correlation_all_masks_{file_suffix}_groups_comparison.svg'
plt.savefig(os.path.join(pythonFigs_folder, fig_filename), dpi=300, bbox_inches='tight', format="svg", transparent=True)

# Show the figure
plt.show()