123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180 |
- #!usr/bin/env python
- # -*- coding: utf8 -*-
- # -----------------------------------------------------------------------------
- # File: compute_acoustic_annotations.py (as part of project URUMETRICS)
- # Created: 01/06/2022 15:25
- # Last Modified: 01/06/2022 15:25
- # -----------------------------------------------------------------------------
- # Author: William N. Havard
- # Postdoctoral Researcher
- #
- # Mail : william.havard@ens.fr / william.havard@gmail.com
- #
- # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
- #
- # ------------------------------------------------------------------------------
- # Description:
- # • This files computes acoustic annotations for each segment identified
- # by the VTC.
- # -----------------------------------------------------------------------------
- import logging
- import os
- from math import ceil, floor
- import pandas as pd
- import utils_audio
- from utils import list_audio_files, read_vtc
- from utils_annotations import get_pitch
- from utils_audio import get_audio_slice, read_audio
- logger = logging.getLogger(__name__)
- def _annotation_pitch(audio_segments, audio_time_series, sampling_rate):
- """
- Extract pitch related information for the audio segments audio_segments of the time series audio_time_series
- with sr sampling_rate
- :param audio_segments: dataframe of segments for which we want to compute annotations
- :type audio_segments: pd.DataFrame
- :param audio_time_series: audio time series
- :type audio_time_series: np.matrix
- :param sampling_rate: sampling rate
- :type sampling_rate: int
- :return: pitch annotations
- :rtype: pd.DataFrame
- """
- pitch = pd.DataFrame.from_records(audio_segments.apply(lambda row:
- get_pitch(
- get_audio_slice(
- audio_time_series, row['frame_onset'],
- row['frame_offset']
- ),
- sampling_rate, func=utils_audio.f2st
- ), axis=1).tolist())
- # Drop raw pitch values
- pitch.drop(list(pitch.filter(regex='raw_')), axis=1, inplace=True)
- pitch.index = audio_segments.index
- audio_segments = pd.concat([audio_segments, pitch], axis=1)
- return audio_segments
- def _compute_file_acoustic_annotation(audio_path, audio_segments, target_sr):
- """
- Compute the acoustic annotations for the segments audio_segment of the file audio_path with sr target_sr
- :param audio_path: path to the audio file to be read
- :type audio_path: str
- :param audio_segments: dataframe of segments for which we want to compute annotations
- :type audio_segments: pd.DataFrame
- :param target_sr: target sampling rate of the recording
- :type target_sr: int
- :return: annotations
- :rtype: pd.DataFrame
- """
- audio_time_series, sampling_rate = read_audio(audio_path, target_sr=target_sr)
- # Computes the start frame and end frame of the given segments given is on/offset in seconds
- audio_segments['frame_onset'] = audio_segments['segment_onset'].apply(
- lambda onset: floor(onset / 1000 * sampling_rate))
- audio_segments['frame_offset'] = audio_segments['segment_offset'].apply(
- lambda offset: ceil(offset / 1000 * sampling_rate))
- # Find better solution if more acoustic annotations are added in the future (concat dfs)
- annotations = _annotation_pitch(audio_segments, audio_time_series, target_sr)
- annotations.drop(columns=['frame_onset',
- 'frame_offset'],
- inplace=True)
- return annotations
- def compute_acoustic_annotations(path_vtc, path_recordings, target_sr=16_000):
- """
- Compute the acoustic annotations for the recordings found in the VTC file
- :param path_vtc: path to the VTC file to be read
- :type path_vtc: str
- :param path_recordings: path where the recordings are stored
- :type path_recordings: str
- :param target_sr: target sampling rate of the recordings
- :type target_sr: int
- :return: annotations
- :rtype: pd.DataFrame
- """
- vtc_data = read_vtc(path_vtc, drop_na=True)
- audio_file_list = list_audio_files(path_recordings)
- annotations = []
- vtc_audio_files = vtc_data.groupby(by='file') # Iterate over VTC annotations grouped by file
- for audio_file_name, audio_segments in vtc_audio_files:
- if not audio_file_list.get(audio_file_name, False): continue
- file_anns = _compute_file_acoustic_annotation(audio_file_list[audio_file_name], audio_segments, target_sr)
- annotations.append(file_anns)
- df_annotations = pd.concat(annotations, axis=0)
- return df_annotations
- def save_annotations(save_path, save_name, annotations):
- """
- Save the computed annotations
- :param save_path: path where to save the annotations
- :type save_path: str
- :param save_name: name of the file
- :type save_name: str
- :param annotations: annotations to be saved
- :type annotations: pd.DataFrame
- :return: None
- :rtype: None
- """
- full_save_path = os.path.join(save_path, '{}.csv'.format(save_name))
- if os.path.exists(full_save_path):
- raise FileExistsError('File {} already exists!'.format(full_save_path))
- annotations.to_csv(full_save_path, index=False)
- logger.info('Saved to {}.'.format(full_save_path))
- def main(path_vtc, path_recordings, save_path, target_sr):
- assert os.path.exists(os.path.abspath(save_path)), IOError('Path {} does not exist!'.format(save_path))
- annotations = compute_acoustic_annotations(path_vtc, path_recordings, target_sr)
- save_name = os.path.splitext(os.path.split(path_vtc)[-1])[0]
- save_annotations(save_path, save_name, annotations)
- def _parse_args(argv):
- import argparse
- parser = argparse.ArgumentParser(description='Compute acoustic annotations.')
- parser.add_argument('--path-vtc', required=True,
- help='Path to the VTC files for which acoustic annotations be computed.')
- parser.add_argument('--path-recordings', required=True,
- help='Path to the recordings corresponding to the recording filenames contained '
- 'in the VTC file.')
- parser.add_argument('--save-path', required=True,
- help='Path were the annotations should be saved.')
- parser.add_argument('--target-sr', required=False, default=16_000, type=int,
- help='Audio files sampling rate.')
- args = parser.parse_args(argv)
- return vars(args)
- if __name__ == '__main__':
- import sys
- pgrm_name, argv = sys.argv[0], sys.argv[1:]
- args = _parse_args(argv)
- logging.basicConfig(level=logging.INFO)
- try:
- main(**args)
- sys.exit(0)
- except Exception as e:
- logger.exception(e)
- sys.exit(1)
|