Aswendt_Lab
/
2024_Ruthe_SND


			
			
				
					
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
							# -*- coding: utf-8 -*-
"""
Created on Mon Oct 28 12:21:08 2024
@author: arefk
"""

import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu

# Get the directory where the code file is located
code_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(code_dir)

# Make sure these variables are set according to your selected settings
selected_groups = ["Sham", "Stroke"]  # Example: Stroke group vs. Control group
selected_qtype = "fa"  # Example: fa Qtype
selected_mask_names = [
    "CC_MOp-MOp_cut",
    "CRuT_MOp-RN_ipsilesional_CCcut",
    "CRuT_MOp-RN_contralesional_mirrored_CCcut",
    "CReT_MOp-TRN_ipsilesional_CCcut",
    "CReT_MOp-TRN_contralesional_CCcut",
    "CST_MOp-int-py_ipsilesional_selfdrawnROA+CCcut",
    "CST_MOp-int-py_contralesional_selfdrawnROA+CCcut",
    "TC_DORsm-SSp_ll+SSp_ul_ipsilesional_END+higherstepsize_",
    "TC_DORsm-SSp_ll+SSp_ul_contralesional_END+higherstepsize",
    "OT_och-lgn_lgncut"
]  # Updated list of specific masks
selected_timepoints = [28]  # Example: multiple selected timepoints
selected_dialation_amount = 3  # Example: dilation value for the time point

# Define simple anatomical names for the masks
mask_name_mapping = {
    "CC_MOp-MOp_cut": "Corpus Callosum",
    "CRuT_MOp-RN_ipsilesional_CCcut": "Rubropsinal (Ipsilesional)",
    "CRuT_MOp-RN_contralesional_mirrored_CCcut": "Rubropsinal (Contralesional)",
    "TC_DORsm-SSp_ll+SSp_ul_ipsilesional_END+higherstepsize_": "Thalamocortical (Ipsilesional)",
    "TC_DORsm-SSp_ll+SSp_ul_contralesional_END+higherstepsize": "Thalamocortical (Contralisional)",
    "CReT_MOp-TRN_contralesional_CCcut": "Reticulospinal (Contralesional)",
    "CReT_MOp-TRN_ipsilesional_CCcut": "Reticulospinal (Ipsilesional)",
    "CST_MOp-int-py_contralesional_selfdrawnROA+CCcut": "Corticospinal (Contralesional)",
    "CST_MOp-int-py_ipsilesional_selfdrawnROA+CCcut": "Corticospinal (Ipsilesional)",
    "OT_och-lgn_lgncut": "Optic"
}

# Load the CSV data
input_file_path = os.path.join(parent_dir, 'output', "Quantitative_outputs", 'Quantitative_results_from_dwi_processing_merged_with_behavior_data.csv')
df = pd.read_csv(input_file_path, low_memory=False)

# Define output paths
figures_folder = os.path.join(parent_dir, 'output', 'Figures')
pythonFigs_folder = os.path.join(figures_folder, 'pythonFigs')

# Create directories if they do not exist
os.makedirs(pythonFigs_folder, exist_ok=True)

# Iterate over each timepoint
for timepoint in selected_timepoints:
    # Filter the DataFrame based on selected settings for each group and mask name
    df_filtered_vis_list = []
    for mask_name in selected_mask_names:
        for group in selected_groups:
            df_filtered = df[(df['Group'] == group) &
                             (df['Qtype'] == selected_qtype) &
                             (df['mask_name'] == mask_name) &
                             (df['dialation_amount'] == selected_dialation_amount) &
                             (df['merged_timepoint'] == timepoint)].copy()
            df_filtered['Region'] = mask_name_mapping.get(mask_name, mask_name)  # Add simplified region name for plotting
            df_filtered_vis_list.append(df_filtered)

    # Combine the filtered DataFrames
    df_filtered_vis = pd.concat(df_filtered_vis_list, ignore_index=True)

    # Remove NaNs and ensure we have enough data for analysis
    df_filtered_vis = df_filtered_vis.dropna(subset=['Value'])

    # Plot the data if available
    if not df_filtered_vis.empty:
        # Custom colors for groups (example colors)
        custom_colors = {"Stroke": 'red', "Sham": 'gray'}

        # Create the figure
        fig, ax = plt.subplots(figsize=(18 / 2.54, 9 / 2.54))

        # Set font to Times New Roman
        plt.rcParams['font.family'] = 'Calibri'

        # Perform Mann-Whitney U test for each region to determine if there's a significant difference
        pval_text_list = []
        p_values = []
        simple_names = []

        # Print p-values in the console
        print(f'--- Timepoint {timepoint} ---')
        for mask_name in selected_mask_names:
            simple_name = mask_name_mapping.get(mask_name, mask_name)
            stroke_values = df_filtered_vis[(df_filtered_vis['Group'] == 'Stroke') & (df_filtered_vis['Region'] == simple_name)]['Value']
            sham_values = df_filtered_vis[(df_filtered_vis['Group'] == 'Sham') & (df_filtered_vis['Region'] == simple_name)]['Value']

            if len(stroke_values) > 0 and len(sham_values) > 0:
                stat, p_value = mannwhitneyu(stroke_values, sham_values, alternative='two-sided')
            else:
                p_value = 1.0  # Default p-value when there's insufficient data
                print("insufficent data")
            p_values.append(p_value)
            simple_names.append(simple_name)

        # Apply Bonferroni correction
        num_comparisons = len(p_values)
        corrected_p_values = [p * num_comparisons for p in p_values]

        # Generate p-value text with Bonferroni correction
        for simple_name, corrected_p_value in zip(simple_names, corrected_p_values):
            if corrected_p_value < 0.001:
                pval_text_list.append(f'Region {simple_name}: p_corrected < 0.001 (***)')
            elif corrected_p_value < 0.01:
                pval_text_list.append(f'Region {simple_name}: p_corrected < 0.01 (**)')
            elif corrected_p_value < 0.05:
                pval_text_list.append(f'Region {simple_name}: p_corrected < 0.05 (*)')
            else:
                pval_text_list.append(f'Region {simple_name}: p_corrected = {corrected_p_value:.3f} (n.s.)')
        pval_text_list.append(f"number of comparisons corrected for: {num_comparisons}")
        # Print corrected p-values in a well-formatted manner
        pval_text_output = "\n".join(pval_text_list)
        print(pval_text_output)

        # Save the p-value results to a text file
        pval_filename = f'{selected_qtype}_pvalues_timepoint_{timepoint}_dialation_{selected_dialation_amount}_regions_groups_comparison_chronic.txt'
        pval_file_path = os.path.join(pythonFigs_folder, pval_filename)
        with open(pval_file_path, 'w') as pval_file:
            pval_file.write(pval_text_output)

        # Create boxplot with dots, grouping by Region and Group
        sns.boxplot(x='Region', y='Value', hue='Group', data=df_filtered_vis, ax=ax, palette=custom_colors, showcaps=True,
                    whiskerprops={'color': 'black', 'linewidth': 0.8}, fliersize=0, linewidth=1.0, boxprops={'linewidth': 0.8})
        sns.despine(ax=ax, right=True, top=True)
        sns.stripplot(x='Region', y='Value', hue='Group', data=df_filtered_vis, ax=ax, dodge=True,
                      palette={'Stroke': '#8B0000', 'Sham': '#4F4F4F'}, alpha=1.0, size=2, jitter=True, marker='o', legend=False)

        # Set labels and customize font settings
        ax.set_ylabel('Mean FA Value [a.u]', fontsize=12, fontname='Calibri')

        # Remove duplicate legends
        ax.get_legend().remove()

        # Adjust layout
        plt.xticks(rotation=20, ha='right')  # Tilt x-axis ticks
        plt.tight_layout()  # Adjust layout to make space for the main title

        # Save the figure, including the region name in the filename
        fig_filename = f'{selected_qtype}_boxplot_timepoint_{timepoint}_dialation_{selected_dialation_amount}_regions_groups_comparison_chronic.svg'
        plt.savefig(os.path.join(pythonFigs_folder, fig_filename), dpi=300, bbox_inches='tight', format="svg", transparent=True)

        # Show the figure
        plt.show()

    else:
        print(f"No data available for the selected settings: Groups={selected_groups}, Qtype={selected_qtype}, Timepoint={timepoint}, Dilation value={selected_dialation_amount}, Regions={selected_mask_names}.")