12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- #!usr/bin/env python
- # -*- coding: utf8 -*-
- # -----------------------------------------------------------------------------
- # File: utils_annotations.py (as part of project URUMETRICS)
- # Created: 01/06/2022 17:15
- # Last Modified: 01/06/2022 17:15
- # -----------------------------------------------------------------------------
- # Author: William N. Havard
- # Postdoctoral Researcher
- #
- # Mail : william.havard@ens.fr / william.havard@gmail.com
- #
- # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
- #
- # ------------------------------------------------------------------------------
- # Description:
- # •
- # -----------------------------------------------------------------------------
- import librosa
- import numpy as np
- def get_pitch(audio_time_series, sampling_rate, func=None):
- """
- Returns pitch-related annotations.
- Regarding pitch range, we use the 5-th percentile as the bottom of the range, and the 95-th percentile as the top.
- (see https://www.ibm.com/docs/en/wvs/6.1.1?topic=guide-introduction-pitch-its-use-ssml or
- https://languagelog.ldc.upenn.edu/nll/?p=40788 who also use the same methodology)
- :param audio_time_series: real-valued vector
- :type audio_time_series: np.array
- :param sampling_rate: sampling rate
- :type sampling_rate: int
- :param func: transformation function to apply to the fundamental frequency
- :type func: callable
- :return: raw pitch, mean pitch, median pitch, 5-th percentile, 95-th percentile, pitch range
- :rtype: dict
- """
- f0 = librosa.yin(audio_time_series,
- fmin=60,
- fmax=500,
- sr=sampling_rate) # pyin does not work, why?
- pitch = func(f0) if callable(func) else f0
- mean_pitch, median_pitch, p5_pitch, p95_pitch = pitch.mean(), np.quantile(pitch, .5), \
- np.percentile(pitch, 5), np.percentile(pitch, 95)
- pitch_type = "f0" if not callable(func) else func.__name__
- return {"raw_pitch_{}".format(pitch_type): f0,
- "mean_pitch_{}".format(pitch_type): mean_pitch,
- "median_pitch_{}".format(pitch_type): median_pitch,
- "p5_pitch_{}".format(pitch_type): p5_pitch,
- "p95_pitch_{}".format(pitch_type): p95_pitch,
- "pitch_range_{}".format(pitch_type): p95_pitch - p5_pitch}
|