LAAC-LSCP
/
URUMETRICS-CODE


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
							#!usr/bin/env python
# -*- coding: utf8 -*-

# -----------------------------------------------------------------------------
#   File: compute_acoustic_annotations.py (as part of project URUMETRICS)
#   Created: 01/06/2022 15:25
#   Last Modified: 01/06/2022 15:25
# -----------------------------------------------------------------------------
#   Author: William N. Havard
#           Postdoctoral Researcher
#
#   Mail  : william.havard@ens.fr / william.havard@gmail.com
#  
#   Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
#
# ------------------------------------------------------------------------------
#   Description: 
#       • This files computes acoustic annotations for each segment identified
#         by the VTC.
# -----------------------------------------------------------------------------

import logging
import os
from math import ceil, floor

import pandas as pd

import utils_audio
from utils import list_audio_files, read_vtc
from utils_annotations import get_pitch
from utils_audio import get_audio_slice, read_audio

logger = logging.getLogger(__name__)

def _annotation_pitch(audio_segments, audio_time_series, sampling_rate):
    """
    Extract pitch related information for the audio segments audio_segments of the time series audio_time_series
    with sr sampling_rate
    :param audio_segments: dataframe of segments for which we want to compute annotations
    :type audio_segments: pd.DataFrame
    :param audio_time_series: audio time series
    :type audio_time_series: np.matrix
    :param sampling_rate: sampling rate
    :type sampling_rate: int
    :return: pitch annotations
    :rtype: pd.DataFrame
    """
    pitch = pd.DataFrame.from_records(audio_segments.apply(lambda row:
                                                           get_pitch(
                                                               get_audio_slice(
                                                                   audio_time_series, row['frame_onset'],
                                                                   row['frame_offset']
                                                               ),
                                                               sampling_rate, func=utils_audio.f2st
                                                           ), axis=1).tolist())

    # Drop raw pitch values
    pitch.drop(list(pitch.filter(regex='raw_')), axis=1, inplace=True)

    pitch.index = audio_segments.index
    audio_segments = pd.concat([audio_segments, pitch], axis=1)

    return audio_segments

def _compute_file_acoustic_annotation(audio_path, audio_segments, target_sr):
    """
    Compute the acoustic annotations for the segments audio_segment of the file audio_path with sr target_sr
    :param audio_path: path to the audio file to be read
    :type audio_path: str
    :param audio_segments: dataframe of segments for which we want to compute annotations
    :type audio_segments: pd.DataFrame
    :param target_sr: target sampling rate of the recording
    :type target_sr: int
    :return: annotations
    :rtype: pd.DataFrame
    """
    audio_time_series, sampling_rate = read_audio(audio_path, target_sr=target_sr)

    # Computes the start frame and end frame of the given segments given is on/offset in seconds
    audio_segments['frame_onset'] = audio_segments['segment_onset'].apply(
        lambda onset: floor(onset / 1000 * sampling_rate))
    audio_segments['frame_offset'] = audio_segments['segment_offset'].apply(
        lambda offset: ceil(offset / 1000 * sampling_rate))

    # Find better solution if more acoustic annotations are added in the future (concat dfs)
    annotations = _annotation_pitch(audio_segments, audio_time_series, target_sr)

    annotations.drop(columns=['frame_onset',
                              'frame_offset'],
                     inplace=True)

    return annotations


def compute_acoustic_annotations(path_vtc, path_recordings, target_sr=16_000):
    """
    Compute the acoustic annotations for the recordings found in the VTC file
    :param path_vtc: path to the VTC file to be read
    :type path_vtc: str
    :param path_recordings: path where the recordings are stored
    :type path_recordings: str
    :param target_sr: target sampling rate of the recordings
    :type target_sr: int
    :return: annotations
    :rtype: pd.DataFrame
    """
    vtc_data = read_vtc(path_vtc, drop_na=True)
    audio_file_list = list_audio_files(path_recordings)

    annotations = []
    vtc_audio_files = vtc_data.groupby(by='file') # Iterate over VTC annotations grouped by file
    for audio_file_name, audio_segments in vtc_audio_files:
        if not audio_file_list.get(audio_file_name, False): continue
        file_anns = _compute_file_acoustic_annotation(audio_file_list[audio_file_name], audio_segments, target_sr)
        annotations.append(file_anns)

    df_annotations = pd.concat(annotations, axis=0)
    return df_annotations


def save_annotations(save_path, save_name, annotations):
    """
    Save the computed annotations
    :param save_path: path where to save the annotations
    :type save_path: str
    :param save_name: name of the file
    :type save_name: str
    :param annotations: annotations to be saved
    :type annotations: pd.DataFrame
    :return: None
    :rtype: None
    """
    full_save_path = os.path.join(save_path, '{}.csv'.format(save_name))
    if os.path.exists(full_save_path):
        raise FileExistsError('File {} already exists!'.format(full_save_path))

    annotations.to_csv(full_save_path, index=False)
    logger.info('Saved to {}.'.format(full_save_path))


def main(path_vtc, path_recordings, save_path, target_sr):
    assert os.path.exists(os.path.abspath(save_path)), IOError('Path {} does not exist!'.format(save_path))

    annotations = compute_acoustic_annotations(path_vtc, path_recordings, target_sr)
    save_name = os.path.splitext(os.path.split(path_vtc)[-1])[0]
    save_annotations(save_path, save_name, annotations)


def _parse_args(argv):
    import argparse

    parser = argparse.ArgumentParser(description='Compute acoustic annotations.')
    parser.add_argument('--path-vtc', required=True,
                        help='Path to the VTC files for which acoustic annotations be computed.')
    parser.add_argument('--path-recordings', required=True,
                        help='Path to the recordings corresponding to the recording filenames contained '
                             'in the VTC file.')
    parser.add_argument('--save-path', required=True,
                        help='Path were the annotations should be saved.')
    parser.add_argument('--target-sr', required=False, default=16_000, type=int,
                        help='Audio files sampling rate.')
    args = parser.parse_args(argv)

    return vars(args)


if __name__ == '__main__':
    import sys

    pgrm_name, argv = sys.argv[0], sys.argv[1:]
    args = _parse_args(argv)

    logging.basicConfig(level=logging.INFO)

    try:
        main(**args)
        sys.exit(0)
    except Exception as e:
        logger.exception(e)
        sys.exit(1)