#!usr/bin/env python # -*- coding: utf8 -*- # ----------------------------------------------------------------------------- # File: utils_audio.py (as part of project URUMETRICS) # Created: 01/06/2022 17:15 # Last Modified: 01/06/2022 17:15 # ----------------------------------------------------------------------------- # Author: William N. Havard # Postdoctoral Researcher # # Mail : william.havard@ens.fr / william.havard@gmail.com # # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique # # ------------------------------------------------------------------------------ # Description: # • # ----------------------------------------------------------------------------- import librosa import numpy as np from utils import vectorise @vectorise def f2st(f_Hz, base=50): """ Returns the semitone of the frequency given as input adapted from https://rdrr.io/cran/hqmisc/src/R/hqmisc.R itself adapted from http://ldc.upenn.edu/myl/llog/semitones.R (no longer available). See https://www.internationalphoneticassociation.org/icphs-proceedings/ICPhS2003/papers/p15_0771.pdf for reference :param f_Hz: frequency to convert (in Herz) :type f_Hz: int :param base: baseline frequency relative to which semitones are expressed :type base: int :return: semitone corresponding to the frequency given as input :rtype: float """ # Use a more explicit designation in annotation title f2st.__name__ = 'semitone' semi = np.log(2 ** (1 / 12)) return (np.log(f_Hz) - np.log(base)) / semi @vectorise def f2erb(f_Hz): """ Return the ERB value of the frequency given as input :param f_Hz: frequency to convert (in Herz) :type f_Hz: int :return: ERB value of the frequency given as input :rtype: float """ f2erb.__name__ = 'erb' f_kHz = f_Hz * 1e-3 return 24.7 * (4.37 * f_kHz + 1) def get_audio_slice(audio_time_series, begin, end): """ Returns the acoustic vector between begin and end :param audio_time_series: audio time series :type audio_time_series: Numpy 2D matrix [time, dim] :param begin: start frame :type begin: int :param end: end frame :type end: int :return: sliced time series of size (end-begin, dim) :rtype: Numpy 2D Matrix """ return audio_time_series[begin:end] def read_audio(file_path, target_sr=16000): """ Read an audio file and returns the audio time series and its sampling rate :param file_path: path to an audio file :type file_path: str :return: (audio time series, sampling rate) :rtype: np.array """ file_sr = librosa.get_samplerate(file_path) assert file_sr == target_sr, ValueError("Mismatch between file's true sampling rate ({}) and " "target sampling rate ({})!".format(file_sr, target_sr)) return librosa.load(file_path, mono=True, sr=target_sr)