Aswendt_Lab
/
2024_Ruthe_SND


			
			
				
					
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
							import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

# Define cm for converting cm to inches
cm = 1 / 2.54

# Get the directory where the code file is located
code_dir = os.path.dirname(os.path.abspath(__file__))

# Get the parent directory of the code directory
parent_dir = os.path.dirname(code_dir)

# Define the path for the input CSV files
original_file_path = os.path.join(parent_dir, 'output', 'Quantitative_outputs', 'old', 'Quantitative_results_from_dwi_processing.csv')
results_file_path = os.path.join(parent_dir, 'output', 'Quantitative_outputs', 'Significance_stroke_vs_sham_difference.csv')

# Define the path for the output folders to save plots
plots_output_dir = os.path.join(parent_dir, 'output', 'Figures', 'pythonFigs')
fa_over_time_plots_dir = os.path.join(parent_dir, 'output', 'Figures', 'fa_over_time_plots')
os.makedirs(fa_over_time_plots_dir, exist_ok=True)
os.makedirs(plots_output_dir, exist_ok=True)

# Load the original dataset for analysis
df = pd.read_csv(original_file_path, low_memory=False)

# Load the results CSV
results_df = pd.read_csv(results_file_path)

# Filter results to exclude those with "AMBA" in the mask name
results_df = results_df[~results_df['mask_name'].str.contains("AMBA")]

# Define functions to map abbreviations and locations
def map_abbreviation(mask_name):
    if mask_name.startswith("CC"):
        return "CC"
    elif mask_name.startswith("CRuT"):
        return "RS"
    elif mask_name.startswith("CReT"):
        return "RetS"
    elif mask_name.startswith("CST"):
        return "CST"
    elif mask_name.startswith("TC"):
        return "TC"
    elif mask_name.startswith("OT"):
        return "OT"
    else:
        return "Unknown"

def map_location(mask_name):
    if "ipsi" in mask_name:
        return "Ips"
    elif "contra" in mask_name:
        return "Con"
    else:
        return "None"

# Add new columns to the dataframe for abbreviation and location
results_df['abbreviation'] = results_df['mask_name'].apply(map_abbreviation)
results_df['location'] = results_df['mask_name'].apply(map_location)

# Get unique time points and qtypes
timepoints = results_df['merged_timepoint'].unique()
qtypes = results_df['Qtype'].unique()

# Define different marker shapes for each unique abbreviation
unique_abbreviations = results_df['abbreviation'].unique()
markers = ['o', 's', '^', 'D', 'v', '<', '>', 'p', '*', 'X', 'h']
marker_mapping = {abbr: markers[i % len(markers)] for i, abbr in enumerate(unique_abbreviations)}

# Iterate over each time point and Qtype to create individual volcano plots
for timepoint in timepoints:
    for qtype in qtypes:
        subset_df = results_df[(results_df['merged_timepoint'] == timepoint) & (results_df['Qtype'] == qtype)]

        # Skip if there is no data for the specific subset
        if subset_df.empty:
            continue

        # Calculate mean difference for the current subset
        mean_diff = []
        with tqdm(total=len(subset_df), desc=f"Calculating mean differences for {timepoint}, Qtype: {qtype}") as pbar:
            for _, row in subset_df.iterrows():
                mask = row['mask_name']

                # Filter original data for Stroke and Sham
                stroke_values = df[(df['Group'] == 'Stroke') &
                                   (df['mask_name'] == mask) &
                                   (df['merged_timepoint'] == timepoint) &
                                   (df['dialation_amount'] == row['dialation_amount']) &
                                   (df['Qtype'] == qtype)]['Value'].dropna()

                sham_values = df[(df['Group'] == 'Sham') &
                                 (df['mask_name'] == mask) &
                                 (df['merged_timepoint'] == timepoint) &
                                 (df['dialation_amount'] == row['dialation_amount']) &
                                 (df['Qtype'] == qtype)]['Value'].dropna()

                # Calculate mean difference
                if len(stroke_values) > 0 and len(sham_values) > 0:
                    mean_diff.append(stroke_values.mean() - sham_values.mean())
                else:
                    mean_diff.append(np.nan)
                
                # Update progress bar
                pbar.update(1)

        subset_df['Mean_Difference'] = mean_diff
        subset_df['-log10(Pvalue)'] = -np.log10(subset_df['Pvalue'])

        # Plot the volcano plot for the current time point and Qtype
        plt.figure(figsize=(8 * cm, 8 * cm), dpi=300)  # 8 cm by 8 cm in inches, with high DPI for better quality

        # Plot each mask using its corresponding marker shape and location suffix
        for abbr in unique_abbreviations:
            abbr_subset = subset_df[subset_df['abbreviation'] == abbr]
            for location in abbr_subset['location'].unique():
                loc_subset = abbr_subset[abbr_subset['location'] == location]
                label = f"{abbr} ({location})" if location != "None" else abbr
                plt.scatter(loc_subset['Mean_Difference'], loc_subset['-log10(Pvalue)'],
                            alpha=0.7, s=10, marker=marker_mapping[abbr], label=label)

        # Labels and title for each plot
        plt.axhline(y=-np.log10(0.05), color='blue', linestyle='--')
        plt.xlabel('Mean Difference (Stroke - Sham)', fontsize=12, fontname='Calibri')
        plt.ylabel('-log10(Pvalue)', fontsize=12, fontname='Calibri')
        plt.title(f'Volcano Plot: {qtype} for {timepoint}', fontsize=12, fontname='Calibri')
        plt.grid(False)

        # Create the legend with marker shapes
        plt.legend(loc='best', fontsize=6, frameon=False)

        # Save the plot as an SVG file
        plot_file_name = f'volcano_plot_{timepoint}_{qtype}.svg'
        plot_file_path = os.path.join(plots_output_dir, plot_file_name)
        plt.savefig(plot_file_path, format='svg', bbox_inches='tight')

        plt.show()