remapping_voiting_qc.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. import pandas as pd
  2. import os
  3. # Get the directory where the code file is located
  4. code_dir = os.path.dirname(os.path.abspath(__file__))
  5. # Get the parent directory of the code directory
  6. parent_dir = os.path.dirname(code_dir)
  7. qc_csv = os.path.join(parent_dir, "input", "AIDAqc_ouptut_for_data", "Votings.csv")
  8. df_qc = pd.read_csv(qc_csv)
  9. # Define a function to extract the timepoint information
  10. def extract_timepoint(path):
  11. parts = path.split(os.sep)
  12. for part in parts:
  13. if 'ses' in part:
  14. return part
  15. # Define a function to extract the timepoint number
  16. def extract_timepoint_number(tp):
  17. if 'Baseline' in tp:
  18. return 0
  19. else:
  20. return int(tp.split('ses-')[-1].replace("P",""))
  21. # Define the session mapping dictionary
  22. session_mapping = {
  23. 0: 0,
  24. 1: 3, 2: 3, 3: 3,
  25. 4: 3, 5: 3, 6: 7, 7: 7,
  26. 8: 7, 9: 7, 10: 7, 11: 14, 12: 14,
  27. 13: 14, 14: 14, 15: 14, 16: 14, 17: 14, 18: 14, 19: 21,
  28. 20: 21, 21: 21, 22: 21, 23: 21, 24: 21, 25: 21, 26: 28,
  29. 27: 28, 28: 28, 29: 28, 30: 28 , 42:42, 43:42, 56:56, 57:56
  30. }
  31. # Define a function to extract the subject ID
  32. def extract_subject_id(path):
  33. parts = path.split(os.sep)
  34. for part in parts:
  35. if 'sub-' in part and '.nii.' not in part:
  36. return part
  37. # Remove rows containing "brkraw" or "DN" in the "Pathes" column
  38. df_qc = df_qc[~df_qc['Pathes'].str.contains('brkraw|DN')]
  39. # Create the "tp" column
  40. df_qc['tp'] = df_qc['Pathes'].apply(lambda x: extract_timepoint(x))
  41. # Define a function to map tp_int using session_mapping
  42. def map_merged_timepoint(tp_int):
  43. return session_mapping.get(tp_int, tp_int)
  44. # Create the "tp_int" column
  45. df_qc['tp_int'] = df_qc['tp'].apply(lambda x: extract_timepoint_number(x))
  46. # Create the "merged_timepoint" column
  47. df_qc['merged_timepoint'] = df_qc['tp_int'].apply(map_merged_timepoint)
  48. # Add the "subjectID" column
  49. df_qc['subjectID'] = df_qc['Pathes'].apply(lambda x: extract_subject_id(x))
  50. # Save the DataFrame as a CSV file
  51. output_csv = os.path.join(parent_dir, "input", "AIDAqc_ouptut_for_data", "voting_remapped.csv")
  52. df_qc.to_csv(output_csv, index=False)
  53. print("DataFrame saved as 'voting_remapped.csv'")