123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- """Convert the SQLite feauture storage into .tsv files."""
- from ptpython.ipython import embed
- from junifer.storage import SQLiteFeatureStorage
- from pathlib import Path
- import argparse
- import sys
- sys.path.append("helper_scripts")
- from utils import get_marker_names
- def parse_args():
- """Parse arguments."""
- parser = argparse.ArgumentParser(
- description="Convert the SQLite feauture storage into .tsv files."
- )
- parser.add_argument(
- "dataset",
- type=str,
- help=("Which dataset to convert. {'PIOP1', 'PIOP2', 'ID1000'}"),
- )
- return parser.parse_args()
- def validate_args(args):
- """Validate arguments."""
- datasets = ["ID1000", "PIOP1", "PIOP2"]
- assert args.dataset in datasets, (
- f"{args.dataset} not a valid dataset! Valid datasets are"
- f"{datasets}."
- )
- return args
- def main():
- """Convert the SQLite feauture storage into .tsv files."""
- datasets = {
- "ID1000": ["moviewatching"],
- "PIOP2": ["restingstate", "emomatching", "workingmemory"],
- "PIOP1": ["restingstate", "faces", "emomatching", "workingmemory"],
- }
- args = validate_args(parse_args())
- dataset = args.dataset
- markers = get_marker_names(args.dataset)
- storage_path = Path("..") / "junifer_storage" / dataset / dataset
- storage = SQLiteFeatureStorage(storage_path, single_output=True)
-
- for marker in markers:
- print("loading dataframe...")
- connectomes = storage.read_df(feature_name=marker)
- print("...done")
- for session in datasets[dataset]:
- print(dataset, marker, session)
- print("reshaping dataframe!")
- outfile = (
- Path("..")
- / "junifer_storage"
- / "JUNIFER_AOMIC_TSV_CONNECTOMES"
- / f"{dataset}"
- / f"{dataset}_{marker}_{session}.tsv.gz"
- )
- session_connectomes = connectomes.reset_index().drop(columns="idx")
- #embed()
- if dataset != "ID1000":
- session_connectomes = session_connectomes.query(
- f"task == '{session}'"
- ).drop(columns="task")
- subject = session_connectomes["subject"].unique()[0]
- # get one correct ordering of all final columns
- columns_in_order = (
- session_connectomes
- .query(f"subject == '{subject}'")
- )["pair"]
- print(f"{len(columns_in_order)} columns!")
- # pivot and reindex
- # pivot changes order, so the reindex makes sure the df
- # is in the correct order
- #embed()
- session_connectomes = session_connectomes.pivot(
- index="subject", columns="pair", values="0"
- ).reindex(columns_in_order, axis=1)
-
- columns = session_connectomes.columns
- non_diags = []
- for col in columns:
- a, b = col.split("~")
- if a != b:
- non_diags.append(col)
-
- session_connectomes = session_connectomes[non_diags]
-
- new_index = [f"sub-{ind:04}" for ind in session_connectomes.index]
- session_connectomes.index = new_index
-
- session_connectomes.to_csv(outfile, sep="\t", compression="gzip")
- print("saved to tsv, continue!")
- if __name__ == "__main__":
- main()
|