extract_element_connectome.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. """Convert one elements SQLite feature storage into .tsv files."""
  2. from junifer.storage import SQLiteFeatureStorage
  3. from pathlib import Path
  4. import argparse
  5. from utils import get_marker_names
  6. from ptpython.ipython import embed
  7. def parse_args():
  8. """Parse arguments."""
  9. parser = argparse.ArgumentParser(
  10. description="Convert the SQLite feauture storage into .tsv files."
  11. )
  12. parser.add_argument(
  13. "dataset",
  14. type=str,
  15. help=("Which dataset to convert. {'PIOP1', 'PIOP2', 'ID1000'}"),
  16. )
  17. parser.add_argument(
  18. "subject",
  19. type=str,
  20. help=("Which subject to convert."),
  21. )
  22. parser.add_argument(
  23. "task",
  24. type=str,
  25. help=("Which task."),
  26. )
  27. parser.add_argument("output_folder", type=str, help="Output directory")
  28. return parser.parse_args()
  29. def validate_args(args):
  30. """Validate arguments."""
  31. datasets = ["ID1000", "PIOP1", "PIOP2"]
  32. assert args.dataset in datasets, (
  33. f"{args.dataset} not a valid dataset! Valid datasets are"
  34. f"{datasets}."
  35. )
  36. return args
  37. def main():
  38. """Convert the SQLite feauture storage into .tsv files."""
  39. args = validate_args(parse_args())
  40. dataset = args.dataset
  41. outpath = Path(args.output_folder)
  42. assert outpath.is_dir(), f"{args.output_folder} is not a directory."
  43. # this will get marker names as defined in the corresponding yaml file
  44. markers = get_marker_names(args.dataset)
  45. for marker in markers:
  46. storage_path = (
  47. Path("..")
  48. / ".."
  49. / "junifer_storage"
  50. / dataset
  51. / f"element_{args.subject}_{args.task}_{dataset}"
  52. )
  53. storage = SQLiteFeatureStorage(storage_path, single_output=True)
  54. print("loading dataframe...")
  55. connectome = storage.read_df(feature_name=marker)
  56. connectome = connectome.reset_index().drop(columns="idx")
  57. outfile = (
  58. outpath / f"{dataset}_{args.subject}_{args.task}_{marker}.tsv"
  59. )
  60. columns_in_order = connectome["pair"]
  61. embed()
  62. connectome = (
  63. connectome.pivot(index="subject", columns="pair", values="0")
  64. .reindex(columns_in_order, axis=1)
  65. .to_csv(outfile, sep="\t")
  66. )
  67. print("saved to tsv, continue!")
  68. if __name__ == "__main__":
  69. main()