utils_annotations.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. #!usr/bin/env python
  2. # -*- coding: utf8 -*-
  3. # -----------------------------------------------------------------------------
  4. # File: utils_annotations.py (as part of project URUMETRICS)
  5. # Created: 01/06/2022 17:15
  6. # Last Modified: 01/06/2022 17:15
  7. # -----------------------------------------------------------------------------
  8. # Author: William N. Havard
  9. # Postdoctoral Researcher
  10. #
  11. # Mail : william.havard@ens.fr / william.havard@gmail.com
  12. #
  13. # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
  14. #
  15. # ------------------------------------------------------------------------------
  16. # Description:
  17. # •
  18. # -----------------------------------------------------------------------------
  19. import librosa
  20. import numpy as np
  21. def get_pitch(audio_time_series, sampling_rate, func=None):
  22. """
  23. Returns pitch-related annotations.
  24. Regarding pitch range, we use the 5-th percentile as the bottom of the range, and the 95-th percentile as the top.
  25. (see https://www.ibm.com/docs/en/wvs/6.1.1?topic=guide-introduction-pitch-its-use-ssml or
  26. https://languagelog.ldc.upenn.edu/nll/?p=40788 who also use the same methodology)
  27. :param audio_time_series: real-valued vector
  28. :type audio_time_series: np.array
  29. :param sampling_rate: sampling rate
  30. :type sampling_rate: int
  31. :param func: transformation function to apply to the fundamental frequency
  32. :type func: callable
  33. :return: raw pitch, mean pitch, median pitch, 5-th percentile, 95-th percentile, pitch range
  34. :rtype: dict
  35. """
  36. f0 = librosa.yin(audio_time_series,
  37. fmin=60,
  38. fmax=500,
  39. sr=sampling_rate) # pyin does not work, why?
  40. pitch = func(f0) if callable(func) else f0
  41. mean_pitch, median_pitch, p5_pitch, p95_pitch = pitch.mean(), np.quantile(pitch, .5), \
  42. np.percentile(pitch, 5), np.percentile(pitch, 95)
  43. pitch_type = "f0" if not callable(func) else func.__name__
  44. return {"raw_pitch_{}".format(pitch_type): f0,
  45. "mean_pitch_{}".format(pitch_type): mean_pitch,
  46. "median_pitch_{}".format(pitch_type): median_pitch,
  47. "p5_pitch_{}".format(pitch_type): p5_pitch,
  48. "p95_pitch_{}".format(pitch_type): p95_pitch,
  49. "pitch_range_{}".format(pitch_type): p95_pitch - p5_pitch}