MRI_files_summarizer.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import os
  2. import argparse
  3. import glob
  4. import pandas as pd
  5. if __name__ == "__main__":
  6. parser = argparse.ArgumentParser(description='This script processes NIfTI files in a directory. It extracts relevant parts of the file name and creates a DataFrame.')
  7. parser.add_argument('-i', '--input', required=True, help='Path to the parent project folder of the dataset, e.g., raw_data', type=str)
  8. parser.add_argument('-o', '--output', required=True, help='Path where the output CSV file should be saved', type=str)
  9. args = parser.parse_args()
  10. # Search for all subject files in the input path
  11. temp_files = glob.glob(os.path.join(args.input, "**", "*.nii.gz"), recursive=True)
  12. data = []
  13. for tt in temp_files:
  14. filename = os.path.basename(tt)
  15. list_split = filename.split("_")
  16. # Initialize the dictionary to collect file details
  17. file_info = {
  18. "FileAddress": tt,
  19. "Modality": None,
  20. "TimePoint": None,
  21. "SubjectID": None,
  22. "RunNumber": None
  23. }
  24. # Parse the split filename for specific identifiers
  25. for element in list_split:
  26. if "sub-" in element:
  27. file_info['SubjectID'] = element.replace("sub-", "")
  28. elif "ses-" in element:
  29. file_info['TimePoint'] = element.replace("ses-", "")
  30. elif "run-" in element:
  31. file_info['RunNumber'] = element.replace("run-", "")
  32. elif ".nii.gz" in element:
  33. file_info['Modality'] = element.replace(".nii.gz", "")
  34. data.append(file_info)
  35. # Create DataFrame
  36. df = pd.DataFrame(data)
  37. # Remove duplicate rows
  38. df = df.drop_duplicates()
  39. # Save the DataFrame to a CSV file
  40. output_file_path = os.path.join(args.output, "MRI_files_overview.csv")
  41. df.to_csv(output_file_path, index=False)
  42. print("Data processing complete. The DataFrame has been saved to", output_file_path)