#!usr/bin/env python # -*- coding: utf8 -*- # ----------------------------------------------------------------------------- # File: compute_acoustic_annotations.py (as part of project URUMETRICS) # Created: 01/06/2022 15:25 # Last Modified: 01/06/2022 15:25 # ----------------------------------------------------------------------------- # Author: William N. Havard # Postdoctoral Researcher # # Mail : william.havard@ens.fr / william.havard@gmail.com # # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique # # ------------------------------------------------------------------------------ # Description: # • This files computes acoustic annotations for each segment identified # by the VTC. # ----------------------------------------------------------------------------- import logging import os from math import ceil, floor import pandas as pd import utils_audio from utils import list_audio_files, read_vtc from utils_annotations import get_pitch from utils_audio import get_audio_slice, read_audio logger = logging.getLogger(__name__) def _annotation_pitch(audio_segments, audio_time_series, sampling_rate): """ Extract pitch related information for the audio segments audio_segments of the time series audio_time_series with sr sampling_rate :param audio_segments: dataframe of segments for which we want to compute annotations :type audio_segments: pd.DataFrame :param audio_time_series: audio time series :type audio_time_series: np.matrix :param sampling_rate: sampling rate :type sampling_rate: int :return: pitch annotations :rtype: pd.DataFrame """ pitch = pd.DataFrame.from_records(audio_segments.apply(lambda row: get_pitch( get_audio_slice( audio_time_series, row['frame_onset'], row['frame_offset'] ), sampling_rate, func=utils_audio.f2st ), axis=1).tolist()) # Drop raw pitch values pitch.drop(list(pitch.filter(regex='raw_')), axis=1, inplace=True) pitch.index = audio_segments.index audio_segments = pd.concat([audio_segments, pitch], axis=1) return audio_segments def _compute_file_acoustic_annotation(audio_path, audio_segments, target_sr): """ Compute the acoustic annotations for the segments audio_segment of the file audio_path with sr target_sr :param audio_path: path to the audio file to be read :type audio_path: str :param audio_segments: dataframe of segments for which we want to compute annotations :type audio_segments: pd.DataFrame :param target_sr: target sampling rate of the recording :type target_sr: int :return: annotations :rtype: pd.DataFrame """ audio_time_series, sampling_rate = read_audio(audio_path, target_sr=target_sr) # Computes the start frame and end frame of the given segments given is on/offset in seconds audio_segments['frame_onset'] = audio_segments['segment_onset'].apply( lambda onset: floor(onset / 1000 * sampling_rate)) audio_segments['frame_offset'] = audio_segments['segment_offset'].apply( lambda offset: ceil(offset / 1000 * sampling_rate)) # Find better solution if more acoustic annotations are added in the future (concat dfs) annotations = _annotation_pitch(audio_segments, audio_time_series, target_sr) annotations.drop(columns=['frame_onset', 'frame_offset'], inplace=True) return annotations def compute_acoustic_annotations(path_vtc, path_recordings, target_sr=16_000): """ Compute the acoustic annotations for the recordings found in the VTC file :param path_vtc: path to the VTC file to be read :type path_vtc: str :param path_recordings: path where the recordings are stored :type path_recordings: str :param target_sr: target sampling rate of the recordings :type target_sr: int :return: annotations :rtype: pd.DataFrame """ vtc_data = read_vtc(path_vtc, drop_na=True) audio_file_list = list_audio_files(path_recordings) annotations = [] vtc_audio_files = vtc_data.groupby(by='file') # Iterate over VTC annotations grouped by file for audio_file_name, audio_segments in vtc_audio_files: if not audio_file_list.get(audio_file_name, False): continue file_anns = _compute_file_acoustic_annotation(audio_file_list[audio_file_name], audio_segments, target_sr) annotations.append(file_anns) df_annotations = pd.concat(annotations, axis=0) return df_annotations def save_annotations(save_path, save_name, annotations): """ Save the computed annotations :param save_path: path where to save the annotations :type save_path: str :param save_name: name of the file :type save_name: str :param annotations: annotations to be saved :type annotations: pd.DataFrame :return: None :rtype: None """ full_save_path = os.path.join(save_path, '{}.csv'.format(save_name)) if os.path.exists(full_save_path): raise FileExistsError('File {} already exists!'.format(full_save_path)) annotations.to_csv(full_save_path, index=False) logger.info('Saved to {}.'.format(full_save_path)) def main(path_vtc, path_recordings, save_path, target_sr): assert os.path.exists(os.path.abspath(save_path)), IOError('Path {} does not exist!'.format(save_path)) annotations = compute_acoustic_annotations(path_vtc, path_recordings, target_sr) save_name = os.path.splitext(os.path.split(path_vtc)[-1])[0] save_annotations(save_path, save_name, annotations) def _parse_args(argv): import argparse parser = argparse.ArgumentParser(description='Compute acoustic annotations.') parser.add_argument('--path-vtc', required=True, help='Path to the VTC files for which acoustic annotations be computed.') parser.add_argument('--path-recordings', required=True, help='Path to the recordings corresponding to the recording filenames contained ' 'in the VTC file.') parser.add_argument('--save-path', required=True, help='Path were the annotations should be saved.') parser.add_argument('--target-sr', required=False, default=16_000, type=int, help='Audio files sampling rate.') args = parser.parse_args(argv) return vars(args) if __name__ == '__main__': import sys pgrm_name, argv = sys.argv[0], sys.argv[1:] args = _parse_args(argv) logging.basicConfig(level=logging.INFO) try: main(**args) sys.exit(0) except Exception as e: logger.exception(e) sys.exit(1)