statistical_analysis_of_quantitative_data.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Fri Sep 27 13:19:08 2024
  4. @author: arefks
  5. """
  6. import os
  7. import pandas as pd
  8. import seaborn as sns
  9. import matplotlib.pyplot as plt
  10. import statsmodels.formula.api as smf
  11. from statsmodels.stats.multicomp import MultiComparison
  12. from itertools import combinations
  13. import statsmodels
  14. import scipy
  15. from scipy.stats import dunnett
  16. # Set up warning suppression for convergence
  17. import warnings
  18. warnings.simplefilter(action='ignore')
  19. # Get the directory where the code file is located
  20. code_dir = os.path.dirname(os.path.abspath(__file__))
  21. # Get the parent directory of the code directory
  22. parent_dir = os.path.dirname(code_dir)
  23. # Create a new folder for mixed model analysis
  24. mixed_model_dir = os.path.join(parent_dir, 'output', "Quantitative_outputs", 'normality')
  25. os.makedirs(mixed_model_dir, exist_ok=True)
  26. # Step 4: Save the resulting dataframe to a CSV file
  27. input_file_path = os.path.join(parent_dir, 'output', "Quantitative_outputs", 'Quantitative_results_from_dwi_processing.csv')
  28. df_init = pd.read_csv(input_file_path)
  29. # Filtering the dataframe based on conditions
  30. df = df_init[~df_init["merged_timepoint"].isin([42, 56])]
  31. # Iterate over unique values of "Qtype" and "mask_name"
  32. #for qq in df["Qtype"].unique():
  33. for qq in ["fa"]:
  34. for dd in df["dialation_amount"].unique():
  35. for mm in df["mask_name"].unique():
  36. # Filter the dataframe based on current "Qtype" and "mask_name"
  37. filtered_df = df[(df["Qtype"] == qq) & (df["mask_name"] == mm) & (df["dialation_amount"] == dd)]
  38. # Identify duplicates based on 'merged_timepoint' and 'subjectID'
  39. duplicates = filtered_df[filtered_df.duplicated(subset=['merged_timepoint', 'subjectID'], keep=False)]
  40. # Print the duplicates
  41. if not duplicates.empty:
  42. print("Duplicate subject ID and timepoint entries found:\n", duplicates)
  43. # Remove duplicates and store in a new DataFrame
  44. temp_df = filtered_df.drop_duplicates(subset=['merged_timepoint', 'subjectID'])
  45. # Create a DataFrame from results
  46. results_df = pd.DataFrame(results)
  47. # Save results_df as CSV in mixed_model_analysis folder
  48. output_file_path = os.path.join(mixed_model_dir, 'mixed_model_results.csv')
  49. results_df.to_csv(output_file_path, index=False)
  50. # Print the table
  51. print(results_df)