LAAC-LSCP
/
URUMETRICS-CODE


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
							#!usr/bin/env python
# -*- coding: utf8 -*-
from collections import Counter

# -----------------------------------------------------------------------------
#   File: metrics_functions.py (as part of project URUMETRICS)
#   Created: 03/06/2022 17:13
#   Last Modified: 03/06/2022 17:13
# -----------------------------------------------------------------------------
#   Author: William N. Havard
#           Postdoctoral Researcher
#
#   Mail  : william.havard@ens.fr / william.havard@gmail.com
#
#   Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
#
# ------------------------------------------------------------------------------
#   Description:
#       •
# -----------------------------------------------------------------------------

import pandas as pd
from ChildProject.pipelines.metricsFunctions import metricFunction
from conversations.toolbox import count_num_turn_transitions


@metricFunction({"speaker"}, {"speaker_type", "phonemes", "duration"})
def avg_pr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
    """
    Average phoneme rate (pr) per minute by speaker
    """
    unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["phonemes"]
    segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60
    return (unit_count/segment_duration).mean()


@metricFunction({"speaker"}, {"speaker_type", "words", "duration"})
def avg_wr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
    """
    Average phoneme rate (pr) per minute by speaker
    """
    unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["words"]
    segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60
    return (unit_count/segment_duration).mean()


@metricFunction({"speaker"}, {"speaker_type", "syllables", "duration"})
def avg_sr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
    """
    Average phoneme rate (pr) per minute by speaker
    """
    unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["syllables"]
    segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60
    return (unit_count/segment_duration).mean()


@metricFunction({"speaker"}, {"speaker_type", "mean_pitch_semitone"})
def mean_mean_pitch_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
    return (annotations[annotations["speaker_type"] == kwargs["speaker"]]["mean_pitch_semitone"]).mean()


@metricFunction({"speaker"}, {"speaker_type", "pitch_range_semitone"})
def mean_pitch_range_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
    return (annotations[annotations["speaker_type"] == kwargs["speaker"]]["pitch_range_semitone"]).mean()


@metricFunction({"speaker"}, {"speaker_type", "words"})
def mluw_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
    """Mean length of utterance in words for a given speaker
    """
    return annotations[annotations["speaker_type"] == kwargs["speaker"]]["words"].mean()


@metricFunction({"speaker"}, {"speaker_type", "syllables"})
def mlus_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
    """Mean length of utterance in syllables for a given speaker
    """
    return annotations[annotations["speaker_type"] == kwargs["speaker"]]["syllables"].mean()


@metricFunction({"speaker"}, {"speaker_type", "phonemes"})
def mlup_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
    """Mean length of utterance in phonemes for a given speaker
    """
    return annotations[annotations["speaker_type"] == kwargs["speaker"]]["phonemes"].mean()


@metricFunction(set(), {"speaker_type", "is_response_to", "is_prompt_to", "unit_index"})
def chi_adu_turn_transitions(annotations: pd.DataFrame, duration: int, **kwargs):
    """
    Number of turn transitions between a child and the adult (FEM or MAL) who speaks the most
    """

    cnt_fem_mal = Counter(annotations['speaker_type'])
    fem_mal = 'FEM' if cnt_fem_mal['FEM'] >= cnt_fem_mal['MAL'] else 'MAL'

    num_turns = count_num_turn_transitions(annotations, speakers=['CHI', fem_mal], speaker_column='speaker_type')

    return num_turns