Aswendt_Lab
/
2024_Ruthe_SND


			
			
				
					
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
							# -*- coding: utf-8 -*-
"""
Created on Mon Oct 28 12:21:08 2024

@author: arefk
"""

import os
import pandas as pd
import numpy as np
from scipy.stats import shapiro, ttest_ind, mannwhitneyu
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# Get the directory where the code file is located
code_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(code_dir)

# Load the CSV data
input_file_path = os.path.join(parent_dir, 'output', "Quantitative_outputs",'Quantitative_results_from_dwi_processing_only_in_stroke_slices.csv')
df = pd.read_csv(input_file_path, low_memory=False)

# Initialize an empty list to store results
results = []

# Get unique values of masks, qtypes, timepoints, and dilation amounts
unique_masks = df['mask_name'].unique()
unique_qtypes = df['Qtype'].unique()
unique_timepoints = df['merged_timepoint'].unique()
unique_dilations = df['dialation_amount'].unique()

# Prepare the combinations for parallel processing
combinations = [(mask, qtype, timepoint, dilation) for mask in unique_masks for qtype in unique_qtypes for timepoint in unique_timepoints for dilation in unique_dilations]

# Function to process each combination
def process_combination(mask, qtype, timepoint, dilation):
    result = None
    
    # Filter data for Stroke and Sham groups separately
    df_stroke = df[(df['Group'] == 'Stroke') &
                   (df['mask_name'] == mask) &
                   (df['Qtype'] == qtype) &
                   (df['merged_timepoint'] == timepoint) &
                   (df['dialation_amount'] == dilation)]

    df_sham = df[(df['Group'] == 'Sham') &
                 (df['mask_name'] == mask) &
                 (df['Qtype'] == qtype) &
                 (df['merged_timepoint'] == timepoint) &
                 (df['dialation_amount'] == dilation)]

    # Drop NaN values for the 'Value' column
    stroke_values = df_stroke['Value'].dropna()
    sham_values = df_sham['Value'].dropna()

    # Filter data after dropping NaN values to get subjects with non-null values
    df_stroke_filtered = df_stroke[df_stroke['Value'].notna()]
    df_sham_filtered = df_sham[df_sham['Value'].notna()]

    # Only proceed if there are more than 8 subjects in either group after dropping NaNs
    if len(df_stroke_filtered['subjectID'].unique()) > 8 and len(df_sham_filtered['subjectID'].unique()) > 8:

        # Check if we have enough values to perform statistical tests
        if len(stroke_values) > 0 and len(sham_values) > 0:
            # Perform Shapiro-Wilk normality test
            shapiro_stroke_p = shapiro(stroke_values)[1]
            shapiro_sham_p = shapiro(sham_values)[1]

            # Check if data is normally distributed
            if shapiro_stroke_p < 0.05 or shapiro_sham_p < 0.05:
                # Use Mann-Whitney U test if data is not normally distributed
                stat, p_value = mannwhitneyu(stroke_values, sham_values, alternative='two-sided')
            else:
                # Use Welch's t-test if data is normally distributed
                stat, p_value = ttest_ind(stroke_values, sham_values, equal_var=False)

            # Store the result
            result = {
                'mask_name': mask,
                'Qtype': qtype,
                'merged_timepoint': timepoint,
                'dialation_amount': dilation,
                'Pvalue': p_value
            }

    return result

# Parallel processing using ThreadPoolExecutor with 4 workers
with ThreadPoolExecutor(max_workers=6) as executor:
    futures = {executor.submit(process_combination, mask, qtype, timepoint, dilation): (mask, qtype, timepoint, dilation) for mask, qtype, timepoint, dilation in combinations}

    # Iterate through completed tasks with progress bar
    with tqdm(total=len(futures), desc="Processing combinations in parallel") as pbar:
        for future in as_completed(futures):
            combination = futures[future]
            try:
                result = future.result()
                if result:
                    results.append(result)
            except Exception as e:
                print(f"Error processing combination {combination}: {e}")
            finally:
                pbar.update(1)

# Convert results to a DataFrame
results_df = pd.DataFrame(results)

# Define output path for the new CSV
output_file_path = os.path.join(parent_dir, 'output', "Quantitative_outputs", 'Significance_stroke_vs_sham_difference_withoutWMmask_only_in_stroke_slices.csv')

# Save results to CSV
results_df.to_csv(output_file_path, index=False)

print(f"Significance analysis results saved to {output_file_path}")