#!usr/bin/env python # -*- coding: utf8 -*- from collections import Counter # ----------------------------------------------------------------------------- # File: metrics_functions.py (as part of project URUMETRICS) # Created: 03/06/2022 17:13 # Last Modified: 03/06/2022 17:13 # ----------------------------------------------------------------------------- # Author: William N. Havard # Postdoctoral Researcher # # Mail : william.havard@ens.fr / william.havard@gmail.com # # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique # # ------------------------------------------------------------------------------ # Description: # • # ----------------------------------------------------------------------------- import pandas as pd from ChildProject.pipelines.metricsFunctions import metricFunction from conversations.toolbox import count_num_turn_transitions @metricFunction({"speaker"}, {"speaker_type", "phonemes", "duration"}) def avg_pr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs): """ Average phoneme rate (pr) per minute by speaker """ unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["phonemes"] segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60 return (unit_count/segment_duration).mean() @metricFunction({"speaker"}, {"speaker_type", "words", "duration"}) def avg_wr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs): """ Average phoneme rate (pr) per minute by speaker """ unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["words"] segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60 return (unit_count/segment_duration).mean() @metricFunction({"speaker"}, {"speaker_type", "syllables", "duration"}) def avg_sr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs): """ Average phoneme rate (pr) per minute by speaker """ unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["syllables"] segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60 return (unit_count/segment_duration).mean() @metricFunction({"speaker"}, {"speaker_type", "mean_pitch_semitone"}) def mean_mean_pitch_speaker(annotations: pd.DataFrame, duration: int, **kwargs): return (annotations[annotations["speaker_type"] == kwargs["speaker"]]["mean_pitch_semitone"]).mean() @metricFunction({"speaker"}, {"speaker_type", "pitch_range_semitone"}) def mean_pitch_range_speaker(annotations: pd.DataFrame, duration: int, **kwargs): return (annotations[annotations["speaker_type"] == kwargs["speaker"]]["pitch_range_semitone"]).mean() @metricFunction({"speaker"}, {"speaker_type", "words"}) def mluw_speaker(annotations: pd.DataFrame, duration: int, **kwargs): """Mean length of utterance in words for a given speaker """ return annotations[annotations["speaker_type"] == kwargs["speaker"]]["words"].mean() @metricFunction({"speaker"}, {"speaker_type", "syllables"}) def mlus_speaker(annotations: pd.DataFrame, duration: int, **kwargs): """Mean length of utterance in syllables for a given speaker """ return annotations[annotations["speaker_type"] == kwargs["speaker"]]["syllables"].mean() @metricFunction({"speaker"}, {"speaker_type", "phonemes"}) def mlup_speaker(annotations: pd.DataFrame, duration: int, **kwargs): """Mean length of utterance in phonemes for a given speaker """ return annotations[annotations["speaker_type"] == kwargs["speaker"]]["phonemes"].mean() @metricFunction(set(), {"speaker_type", "is_response_to", "is_prompt_to", "unit_index"}) def chi_adu_turn_transitions(annotations: pd.DataFrame, duration: int, **kwargs): """ Number of turn transitions between a child and the adult (FEM or MAL) who speaks the most """ cnt_fem_mal = Counter(annotations['speaker_type']) fem_mal = 'FEM' if cnt_fem_mal['FEM'] >= cnt_fem_mal['MAL'] else 'MAL' num_turns = count_num_turn_transitions(annotations, speakers=['CHI', fem_mal], speaker_column='speaker_type') return num_turns