#!/usr/bin/env python3 # Original file: https://gin.g-node.org/EL1000/metrics import argparse import datetime from functools import reduce import os import warnings import datalad.api import pandas as pd from ChildProject.projects import ChildProject from ChildProject.annotations import AnnotationManager from ChildProject.pipelines.metrics import LenaMetrics, AclewMetrics def date_is_valid(date, fmt): try: datetime.datetime.strptime(date, fmt) except: return False return True def compute_metrics(args): if len(args.experiments): experiments = args.experiments else: datasets = datalad.api.subdatasets(path='DATASETS/') experiments = [os.path.basename(dataset["path"]) for dataset in datasets] print( "pipeline '\033[1m{}\033[0m' will run on experiments '\033[1m{}\033[0m'".format( args.pipeline, ",".join(experiments) ) ) data = [] columns = [] for experiment in experiments: project = ChildProject(os.path.join("DATASETS", experiment), enforce_dtypes=True) am = AnnotationManager(project) if args.pipeline == "aclew": if "vtc" not in am.annotations["set"].tolist(): print(f"skipping {experiment} (no VTC annotation)") continue metrics = AclewMetrics( project, vtc="vtc", alice="alice", vcm="vcm", by="session_id", threads=args.threads, ).extract() elif args.pipeline == "lena": metrics = LenaMetrics( project, set="its", types=["OLN"], by="session_id", threads=args.threads ).extract() elif args.pipeline == "children": data.append(project.children.assign(experiment=experiment)) columns.append(project.children.columns) continue else: raise ValueError("undefined pipeline '{}'".format(args.pipeline)) metrics = metrics.assign(experiment=experiment) if not len(metrics): print( "warning: experiment '{}' did not return any metrics for pipeline '{}'".format( experiment, args.pipeline ) ) continue # compute ages metrics = metrics.merge( project.recordings[["session_id", "date_iso"]].drop_duplicates( "session_id", keep="first" ), how="left", left_on="session_id", right_on="session_id", ) metrics = metrics.merge( project.children[["child_id", "child_dob"]], how="left", left_on="child_id", right_on="child_id", ) metrics["age"] = ( metrics[["date_iso", "child_dob"]] .apply( lambda r: ( datetime.datetime.strptime(r["date_iso"], "%Y-%m-%d") - datetime.datetime.strptime(r["child_dob"], "%Y-%m-%d") ) if ( date_is_valid(r["child_dob"], "%Y-%m-%d") and date_is_valid(r["date_iso"], "%Y-%m-%d") ) else None, axis=1, ) .apply(lambda dt: dt.days / (365.25 / 12) if dt else None) .apply(lambda a: int(a) if not pd.isnull(a) else "NA") ) recordings = project.recordings if "session_offset" not in recordings.columns: recordings = recordings.assign(session_offset=0) # compute missing audio metrics = metrics.merge( recordings[["session_id", "session_offset", "duration"]] .sort_values("session_offset") .groupby("session_id") .agg( last_offset=("session_offset", lambda x: x.iloc[-1]), last_duration=("duration", lambda x: x.iloc[-1]), total=("duration", "sum"), ) .reset_index(), how="left", left_on="session_id", right_on="session_id", ) metrics["missing_audio"] = ( metrics["last_offset"] + metrics["last_duration"] - metrics["total"] ) metrics.drop(columns=["last_offset", "last_duration", "total"], inplace=True) data.append(metrics) if args.pipeline != "children": pd.concat(data).set_index(["experiment", "session_id", "child_id"]).to_csv( args.output ) else: data = pd.concat(data) columns = reduce(lambda x, y: x & set(y), columns, columns[0]) | { "normative", "ses", } data = data[columns] data.set_index("child_id").to_csv(args.output) def main(args): compute_metrics(args) def _parse_args(argv): warnings.filterwarnings("ignore") parser = argparse.ArgumentParser(description="compute metrics") parser.add_argument( "pipeline", help="pipeline to run", choices=["aclew", "lena", "children", "period"] ) parser.add_argument("output", help="output file") parser.add_argument("--experiments", nargs="+", default=[]) parser.add_argument("--threads", default=0, type=int) parser.add_argument("--period", default=None, type=str) args = parser.parse_args(argv) return args if __name__ == '__main__': import sys pgrm_name, argv = sys.argv[0], sys.argv[1:] args = _parse_args(argv) main(**args)