compute_acoustic_annotations.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. #!usr/bin/env python
  2. # -*- coding: utf8 -*-
  3. # -----------------------------------------------------------------------------
  4. # File: compute_acoustic_annotations.py (as part of project URUMETRICS)
  5. # Created: 01/06/2022 15:25
  6. # Last Modified: 01/06/2022 15:25
  7. # -----------------------------------------------------------------------------
  8. # Author: William N. Havard
  9. # Postdoctoral Researcher
  10. #
  11. # Mail : william.havard@ens.fr / william.havard@gmail.com
  12. #
  13. # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
  14. #
  15. # ------------------------------------------------------------------------------
  16. # Description:
  17. # • This files computes acoustic annotations for each segment identified
  18. # by the VTC.
  19. # -----------------------------------------------------------------------------
  20. import logging
  21. import os
  22. from math import ceil, floor
  23. import pandas as pd
  24. import utils_audio
  25. from utils import list_audio_files, read_vtc
  26. from utils_annotations import get_pitch
  27. from utils_audio import get_audio_slice, read_audio
  28. logger = logging.getLogger(__name__)
  29. def _annotation_pitch(audio_segments, audio_time_series, sampling_rate):
  30. """
  31. Extract pitch related information for the audio segments audio_segments of the time series audio_time_series
  32. with sr sampling_rate
  33. :param audio_segments: dataframe of segments for which we want to compute annotations
  34. :type audio_segments: pd.DataFrame
  35. :param audio_time_series: audio time series
  36. :type audio_time_series: np.matrix
  37. :param sampling_rate: sampling rate
  38. :type sampling_rate: int
  39. :return: pitch annotations
  40. :rtype: pd.DataFrame
  41. """
  42. pitch = pd.DataFrame.from_records(audio_segments.apply(lambda row:
  43. get_pitch(
  44. get_audio_slice(
  45. audio_time_series, row['frame_onset'],
  46. row['frame_offset']
  47. ),
  48. sampling_rate, func=utils_audio.f2st
  49. ), axis=1).tolist())
  50. # Drop raw pitch values
  51. pitch.drop(list(pitch.filter(regex='raw_')), axis=1, inplace=True)
  52. pitch.index = audio_segments.index
  53. audio_segments = pd.concat([audio_segments, pitch], axis=1)
  54. return audio_segments
  55. def _compute_file_acoustic_annotation(audio_path, audio_segments, target_sr):
  56. """
  57. Compute the acoustic annotations for the segments audio_segment of the file audio_path with sr target_sr
  58. :param audio_path: path to the audio file to be read
  59. :type audio_path: str
  60. :param audio_segments: dataframe of segments for which we want to compute annotations
  61. :type audio_segments: pd.DataFrame
  62. :param target_sr: target sampling rate of the recording
  63. :type target_sr: int
  64. :return: annotations
  65. :rtype: pd.DataFrame
  66. """
  67. audio_time_series, sampling_rate = read_audio(audio_path, target_sr=target_sr)
  68. # Computes the start frame and end frame of the given segments given is on/offset in seconds
  69. audio_segments['frame_onset'] = audio_segments['segment_onset'].apply(
  70. lambda onset: floor(onset / 1000 * sampling_rate))
  71. audio_segments['frame_offset'] = audio_segments['segment_offset'].apply(
  72. lambda offset: ceil(offset / 1000 * sampling_rate))
  73. # Find better solution if more acoustic annotations are added in the future (concat dfs)
  74. annotations = _annotation_pitch(audio_segments, audio_time_series, target_sr)
  75. annotations.drop(columns=['frame_onset',
  76. 'frame_offset'],
  77. inplace=True)
  78. return annotations
  79. def compute_acoustic_annotations(path_vtc, path_recordings, target_sr=16_000):
  80. """
  81. Compute the acoustic annotations for the recordings found in the VTC file
  82. :param path_vtc: path to the VTC file to be read
  83. :type path_vtc: str
  84. :param path_recordings: path where the recordings are stored
  85. :type path_recordings: str
  86. :param target_sr: target sampling rate of the recordings
  87. :type target_sr: int
  88. :return: annotations
  89. :rtype: pd.DataFrame
  90. """
  91. vtc_data = read_vtc(path_vtc, drop_na=True)
  92. audio_file_list = list_audio_files(path_recordings)
  93. annotations = []
  94. vtc_audio_files = vtc_data.groupby(by='file') # Iterate over VTC annotations grouped by file
  95. for audio_file_name, audio_segments in vtc_audio_files:
  96. if not audio_file_list.get(audio_file_name, False): continue
  97. file_anns = _compute_file_acoustic_annotation(audio_file_list[audio_file_name], audio_segments, target_sr)
  98. annotations.append(file_anns)
  99. df_annotations = pd.concat(annotations, axis=0)
  100. return df_annotations
  101. def save_annotations(save_path, save_name, annotations):
  102. """
  103. Save the computed annotations
  104. :param save_path: path where to save the annotations
  105. :type save_path: str
  106. :param save_name: name of the file
  107. :type save_name: str
  108. :param annotations: annotations to be saved
  109. :type annotations: pd.DataFrame
  110. :return: None
  111. :rtype: None
  112. """
  113. full_save_path = os.path.join(save_path, '{}.csv'.format(save_name))
  114. if os.path.exists(full_save_path):
  115. raise FileExistsError('File {} already exists!'.format(full_save_path))
  116. annotations.to_csv(full_save_path, index=False)
  117. logger.info('Saved to {}.'.format(full_save_path))
  118. def main(path_vtc, path_recordings, save_path, target_sr):
  119. assert os.path.exists(os.path.abspath(save_path)), IOError('Path {} does not exist!'.format(save_path))
  120. annotations = compute_acoustic_annotations(path_vtc, path_recordings, target_sr)
  121. save_name = os.path.splitext(os.path.split(path_vtc)[-1])[0]
  122. save_annotations(save_path, save_name, annotations)
  123. def _parse_args(argv):
  124. import argparse
  125. parser = argparse.ArgumentParser(description='Compute acoustic annotations.')
  126. parser.add_argument('--path-vtc', required=True,
  127. help='Path to the VTC files for which acoustic annotations be computed.')
  128. parser.add_argument('--path-recordings', required=True,
  129. help='Path to the recordings corresponding to the recording filenames contained '
  130. 'in the VTC file.')
  131. parser.add_argument('--save-path', required=True,
  132. help='Path were the annotations should be saved.')
  133. parser.add_argument('--target-sr', required=False, default=16_000, type=int,
  134. help='Audio files sampling rate.')
  135. args = parser.parse_args(argv)
  136. return vars(args)
  137. if __name__ == '__main__':
  138. import sys
  139. pgrm_name, argv = sys.argv[0], sys.argv[1:]
  140. args = _parse_args(argv)
  141. logging.basicConfig(level=logging.INFO)
  142. try:
  143. main(**args)
  144. sys.exit(0)
  145. except Exception as e:
  146. logger.exception(e)
  147. sys.exit(1)