metrics_functions.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. #!usr/bin/env python
  2. # -*- coding: utf8 -*-
  3. from collections import Counter
  4. # -----------------------------------------------------------------------------
  5. # File: metrics_functions.py (as part of project URUMETRICS)
  6. # Created: 03/06/2022 17:13
  7. # Last Modified: 03/06/2022 17:13
  8. # -----------------------------------------------------------------------------
  9. # Author: William N. Havard
  10. # Postdoctoral Researcher
  11. #
  12. # Mail : william.havard@ens.fr / william.havard@gmail.com
  13. #
  14. # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
  15. #
  16. # ------------------------------------------------------------------------------
  17. # Description:
  18. # •
  19. # -----------------------------------------------------------------------------
  20. import pandas as pd
  21. from ChildProject.pipelines.metricsFunctions import metricFunction
  22. from conversations.toolbox import count_num_turn_transitions
  23. @metricFunction({"speaker"}, {"speaker_type", "phonemes", "duration"})
  24. def avg_pr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
  25. """
  26. Average phoneme rate (pr) per minute by speaker
  27. """
  28. unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["phonemes"]
  29. segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60
  30. return (unit_count/segment_duration).mean()
  31. @metricFunction({"speaker"}, {"speaker_type", "words", "duration"})
  32. def avg_wr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
  33. """
  34. Average phoneme rate (pr) per minute by speaker
  35. """
  36. unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["words"]
  37. segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60
  38. return (unit_count/segment_duration).mean()
  39. @metricFunction({"speaker"}, {"speaker_type", "syllables", "duration"})
  40. def avg_sr_pm_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
  41. """
  42. Average phoneme rate (pr) per minute by speaker
  43. """
  44. unit_count = annotations[annotations["speaker_type"] == kwargs["speaker"]]["syllables"]
  45. segment_duration = annotations[annotations["speaker_type"] == kwargs["speaker"]]["duration"] / 1000 / 60
  46. return (unit_count/segment_duration).mean()
  47. @metricFunction({"speaker"}, {"speaker_type", "mean_pitch_semitone"})
  48. def mean_mean_pitch_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
  49. return (annotations[annotations["speaker_type"] == kwargs["speaker"]]["mean_pitch_semitone"]).mean()
  50. @metricFunction({"speaker"}, {"speaker_type", "pitch_range_semitone"})
  51. def mean_pitch_range_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
  52. return (annotations[annotations["speaker_type"] == kwargs["speaker"]]["pitch_range_semitone"]).mean()
  53. @metricFunction({"speaker"}, {"speaker_type", "words"})
  54. def mluw_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
  55. """Mean length of utterance in words for a given speaker
  56. """
  57. return annotations[annotations["speaker_type"] == kwargs["speaker"]]["words"].mean()
  58. @metricFunction({"speaker"}, {"speaker_type", "syllables"})
  59. def mlus_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
  60. """Mean length of utterance in syllables for a given speaker
  61. """
  62. return annotations[annotations["speaker_type"] == kwargs["speaker"]]["syllables"].mean()
  63. @metricFunction({"speaker"}, {"speaker_type", "phonemes"})
  64. def mlup_speaker(annotations: pd.DataFrame, duration: int, **kwargs):
  65. """Mean length of utterance in phonemes for a given speaker
  66. """
  67. return annotations[annotations["speaker_type"] == kwargs["speaker"]]["phonemes"].mean()
  68. @metricFunction(set(), {"speaker_type", "is_response_to", "is_prompt_to", "unit_index"})
  69. def chi_adu_turn_transitions_ph(annotations: pd.DataFrame, duration: int, **kwargs):
  70. """
  71. Number of turn transitions between a child and the adult (FEM or MAL) who speaks the most
  72. """
  73. cnt_fem_mal = Counter(annotations['speaker_type'])
  74. fem_mal = 'FEM' if cnt_fem_mal['FEM'] >= cnt_fem_mal['MAL'] else 'MAL'
  75. num_turns = count_num_turn_transitions(annotations, speakers=['CHI', fem_mal], speaker_column='speaker_type')
  76. num_turns_ph = num_turns * (3600000 / duration)
  77. return num_turns_ph