#!usr/bin/env python # -*- coding: utf8 -*- # ----------------------------------------------------------------------------- # File: metrics.py (as part of project URUMETRICS) # Created: 28/07/2022 13:58 # Last Modified: 28/07/2022 13:58 # ----------------------------------------------------------------------------- # Author: William N. Havard # Postdoctoral Researcher # # Mail : william.havard@ens.fr / william.havard@gmail.com # # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique # # ------------------------------------------------------------------------------ # Description: # • # ----------------------------------------------------------------------------- import logging import os import sys from typing import List, Union import numpy as np import pandas as pd from ChildProject.annotations import AnnotationManager from ChildProject.pipelines.metrics import Metrics from ChildProject.projects import ChildProject from metrics_functions import avg_pr_pm_speaker, avg_sr_pm_speaker, avg_wr_pm_speaker, \ mean_mean_pitch_speaker, mean_pitch_range_speaker, \ mlup_speaker, mlus_speaker, mluw_speaker, \ chi_adu_turn_transitions logger = logging.getLogger(__name__) class UruguayMetrics(Metrics): SUBCOMMAND = "uruguay" def __init__( self, project: ChildProject, vtc: str = "vtc", alice: str = "alice", vcm: str = "vcm", acoustic: str="acoustic", recordings: Union[str, List[str], pd.DataFrame] = None, from_time: str = None, to_time: str = None, rec_cols: str = None, child_cols: str = None, period: str = None, by: str = "recording_filename", threads: int = 1, ): self.vtc = vtc self.alice = alice self.vcm = vcm self.acoustic = acoustic self.alice_vtc = 'alice_vtc' self.conversations = 'conversations' # Creating merged alice_vtc set am = AnnotationManager(project) am.read() am.merge_sets( left_set="vtc", right_set="alice", left_columns=["speaker_type"], right_columns=["phonemes", "syllables", "words"], output_set=self.alice_vtc, full_set_merge=False, skip_existing=True, ) METRICS = np.array([]).reshape(0, 3) # VTC if (expected_set:= self.vtc) not in am.annotations["set"].values: print(f"The expected set '{expected_set}' was not found in the index.") else: METRICS = np.concatenate((METRICS, np.array( [["voc_speaker_ph", expected_set, 'FEM'], ["voc_speaker_ph", expected_set, 'CHI'], ["voc_dur_speaker_ph", expected_set, 'FEM'], ["voc_dur_speaker_ph", expected_set, 'CHI'], ["avg_voc_dur_speaker", expected_set, 'FEM'], ["avg_voc_dur_speaker", expected_set, 'CHI'], ]))) # VCM if (expected_set:= self.vcm )not in am.annotations["set"].values: print(f"The expected set '{expected_set}' was not found in the index.") else: METRICS = np.concatenate((METRICS, np.array( [["cry_voc_speaker_ph", expected_set, 'CHI'], ["cry_voc_dur_speaker_ph", expected_set, 'CHI'], ["avg_cry_voc_dur_speaker", expected_set, 'CHI'], ["can_voc_speaker_ph", expected_set, 'CHI'], ["can_voc_dur_speaker_ph", expected_set, 'CHI'], ["avg_can_voc_dur_speaker", expected_set, 'CHI'], ["non_can_voc_speaker_ph", expected_set, 'CHI'], ["non_can_voc_dur_speaker_ph", expected_set, 'CHI'], ["avg_non_can_voc_dur_speaker", expected_set, 'CHI'], ["lp_n", expected_set, pd.NA], ["lp_dur", expected_set, pd.NA], ["cp_n", expected_set, pd.NA], ["cp_dur", expected_set, pd.NA], ]))) # ALICE+VTC if (expected_set := self.alice_vtc) not in am.annotations["set"].values: print(f"The expected set '{expected_set}' was not found in the index.") else: METRICS = np.concatenate((METRICS, np.array( [ [avg_pr_pm_speaker, expected_set, 'FEM'], [avg_sr_pm_speaker, expected_set, 'FEM'], [avg_wr_pm_speaker, expected_set, 'FEM'], ["wc_speaker_ph", expected_set, 'FEM'], ["sc_speaker_ph", expected_set, 'FEM'], ["pc_speaker_ph", expected_set, 'FEM'], [mluw_speaker, expected_set, 'FEM'], [mlus_speaker, expected_set, 'FEM'], [mlup_speaker, expected_set, 'FEM'], ]))) # Acoustic if (expected_set := self.acoustic) not in am.annotations["set"].values: print(f"The expected set '{expected_set}' was not found in the index.") else: METRICS = np.concatenate((METRICS, np.array( [ [mean_mean_pitch_speaker, expected_set, 'FEM'], [mean_mean_pitch_speaker, expected_set, 'CHI'], [mean_pitch_range_speaker, expected_set, 'FEM'], [mean_pitch_range_speaker, expected_set, 'CHI'], ]))) # Turns if (expected_set := self.conversations) not in am.annotations["set"].values: print(f"The expected set '{expected_set}' was not found in the index.") else: METRICS = np.concatenate((METRICS, np.array( [ [chi_adu_turn_transitions, expected_set, None], ]))) METRICS = pd.DataFrame(METRICS, columns=["callable", "set", "speaker"]) super().__init__(project, METRICS, by=by, recordings=recordings, period=period, from_time=from_time, to_time=to_time, rec_cols=rec_cols, child_cols=child_cols, threads=threads) def get_metrics(project_path, metrics_file): project = ChildProject(project_path) project.read() metrics_path = os.path.dirname(metrics_file) if not os.path.exists(metrics_path): os.makedirs(metrics_path) logger.info('Created {}'.format(metrics_path)) if not os.path.exists(metrics_file): new_recordings = set(project.recordings['recording_filename']) existing_metrics = None else: existing_metrics = pd.read_csv(metrics_file) new_recordings = set(project.recordings['recording_filename']) - set(existing_metrics['recording_filename']) if new_recordings: urumetrics = UruguayMetrics(project=project, recordings=list(new_recordings)) metrics = urumetrics.extract() if isinstance(existing_metrics, pd.DataFrame): metrics = pd.concat([existing_metrics, metrics]) metrics.to_csv(metrics_file, index=False) def main(project_path): # Check if running the script from the root of the data set expected_metrics_path = os.path.join(project_path, 'extra') assert os.path.exists(expected_metrics_path), \ ValueError('Expected metrics ({}) path not found. Are you sure to be running this ' 'command from the root of the data set?'.format(expected_metrics_path)) metrics_file = os.path.join(project_path, 'extra', 'metrics', 'metrics.csv') get_metrics(project_path, metrics_file) logger.info('Saved to {}.'.format(metrics_file)) def _parse_args(argv): import argparse parser = argparse.ArgumentParser(description='Create a ChildProject data set.') parser.add_argument('--project-path', required=False, type=str, default='', help="Path to a ChildProject/datalad project (useful for debugging purposes).") args = parser.parse_args(argv) return vars(args) if __name__ == '__main__': import sys pgrm_name, argv = sys.argv[0], sys.argv[1:] args = _parse_args(argv) logging.basicConfig(level=logging.INFO) try: main(**args) sys.exit(0) except Exception as e: print(e) sys.exit(1)