123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100 |
- #!usr/bin/env python
- # -*- coding: utf8 -*-
- from collections import Counter
- # -----------------------------------------------------------------------------
- # File: metrics_functions.py (as part of project URUMETRICS)
- # Created: 03/06/2022 17:13
- # Last Modified: 03/06/2022 17:13
- # -----------------------------------------------------------------------------
- # Author: William N. Havard
- # Postdoctoral Researcher
- #
- # Mail : william.havard@ens.fr / william.havard@gmail.com
- #
- # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
- #
- # ------------------------------------------------------------------------------
- # Description:
- # •
- # -----------------------------------------------------------------------------
- import pandas as pd
- from ChildProject.pipelines.metricsFunctions import metricFunction
- from conversations.toolbox import count_num_turn_transitions
- @metricFunction({"speaker"}, {"speaker_type", "phonemes", "duration"})
- def avg_pr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
- """
- Average phoneme rate (pr) per minute by speaker
- """
- unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["phonemes"]
- segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60
- return (unit_count/segment_duration).mean()
- @metricFunction({"speaker"}, {"speaker_type", "words", "duration"})
- def avg_wr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
- """
- Average phoneme rate (pr) per minute by speaker
- """
- unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["words"]
- segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60
- return (unit_count/segment_duration).mean()
- @metricFunction({"speaker"}, {"speaker_type", "syllables", "duration"})
- def avg_sr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
- """
- Average phoneme rate (pr) per minute by speaker
- """
- unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["syllables"]
- segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60
- return (unit_count/segment_duration).mean()
- @metricFunction({"speaker"}, {"speaker_type", "mean_pitch_semitone"})
- def mean_mean_pitch_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
- return (annotations[annotations["speaker_type"] == kwargs["speaker"]]["mean_pitch_semitone"]).mean()
- @metricFunction({"speaker"}, {"speaker_type", "pitch_range_semitone"})
- def mean_pitch_range_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
- return (annotations[annotations["speaker_type"] == kwargs["speaker"]]["pitch_range_semitone"]).mean()
- @metricFunction({"speaker"}, {"speaker_type", "words"})
- def mluw_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
- """Mean length of utterance in words for a given speaker
- """
- return annotations[annotations["speaker_type"] == kwargs["speaker"]]["words"].mean()
- @metricFunction({"speaker"}, {"speaker_type", "syllables"})
- def mlus_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
- """Mean length of utterance in syllables for a given speaker
- """
- return annotations[annotations["speaker_type"] == kwargs["speaker"]]["syllables"].mean()
- @metricFunction({"speaker"}, {"speaker_type", "phonemes"})
- def mlup_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
- """Mean length of utterance in phonemes for a given speaker
- """
- return annotations[annotations["speaker_type"] == kwargs["speaker"]]["phonemes"].mean()
- @metricFunction(set(), {"speaker_type", "is_response_to", "is_prompt_to", "unit_index"})
- def chi_adu_turn_transitions_ph(annotations: pd.DataFrame, duration: int, **kwargs):
- """
- Number of turn transitions between a child and the adult (FEM or MAL) who speaks the most
- """
- cnt_fem_mal = Counter(annotations['speaker_type'])
- fem_mal = 'FEM' if cnt_fem_mal['FEM'] >= cnt_fem_mal['MAL'] else 'MAL'
- num_turns = count_num_turn_transitions(annotations, speakers=['CHI', fem_mal], speaker_column='speaker_type')
- num_turns_ph = num_turns * (3600000 / duration)
- return num_turns_ph
|