LAAC-LSCP
/
URUMETRICS-CODE


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
							#!usr/bin/env python
# -*- coding: utf8 -*-

# -----------------------------------------------------------------------------
#   File: utils_audio.py (as part of project URUMETRICS)
#   Created: 01/06/2022 17:15
#   Last Modified: 01/06/2022 17:15
# -----------------------------------------------------------------------------
#   Author: William N. Havard
#           Postdoctoral Researcher
#
#   Mail  : william.havard@ens.fr / william.havard@gmail.com
#  
#   Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
#
# ------------------------------------------------------------------------------
#   Description: 
#       • 
# -----------------------------------------------------------------------------

import librosa
import numpy as np

from utils import vectorise


@vectorise
def f2st(f_Hz, base=50):
    """
    Returns the semitone of the frequency given as input adapted from https://rdrr.io/cran/hqmisc/src/R/hqmisc.R
    itself adapted from http://ldc.upenn.edu/myl/llog/semitones.R (no longer available).
    See https://www.internationalphoneticassociation.org/icphs-proceedings/ICPhS2003/papers/p15_0771.pdf for reference
    :param f_Hz: frequency to convert (in Herz)
    :type f_Hz: int
    :param base: baseline frequency relative to which semitones are expressed
    :type base: int
    :return: semitone corresponding to the frequency given as input
    :rtype: float
    """

    # Use a more explicit designation in annotation title
    f2st.__name__ = 'semitone'

    semi = np.log(2 ** (1 / 12))
    return (np.log(f_Hz) - np.log(base)) / semi


@vectorise
def f2erb(f_Hz):
    """
    Return the ERB value of the frequency given as input
    :param f_Hz: frequency to convert (in Herz)
    :type f_Hz: int
    :return: ERB value of the frequency given as input
    :rtype: float
    """
    f2erb.__name__ = 'erb'
    
    f_kHz = f_Hz * 1e-3
    return 24.7 * (4.37 * f_kHz + 1)


def get_audio_slice(audio_time_series, begin, end):
    """
    Returns the acoustic vector between begin and end
    :param audio_time_series: audio time series
    :type audio_time_series: Numpy 2D matrix [time, dim]
    :param begin:  start frame
    :type begin: int
    :param end: end frame
    :type end: int
    :return: sliced time series of size (end-begin, dim)
    :rtype: Numpy 2D Matrix
    """
    return audio_time_series[begin:end]


def read_audio(file_path, target_sr=16000):
    """
    Read an audio file and returns the audio time series and its sampling rate
    :param file_path: path to an audio file
    :type file_path: str
    :return: (audio time series, sampling rate)
    :rtype: np.array
    """
    file_sr = librosa.get_samplerate(file_path)
    assert file_sr == target_sr, ValueError("Mismatch between file's true sampling rate ({}) and "
                                            "target sampling rate ({})!".format(file_sr, target_sr))
    return librosa.load(file_path, mono=True, sr=target_sr)