utils.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. #!usr/bin/env python
  2. # -*- coding: utf8 -*-
  3. # -----------------------------------------------------------------------------
  4. # File: utils.py (as part of project URUMETRICS)
  5. # Created: 01/06/2022 16:36
  6. # Last Modified: 01/06/2022 16:36
  7. # -----------------------------------------------------------------------------
  8. # Author: William N. Havard
  9. # Postdoctoral Researcher
  10. #
  11. # Mail : william.havard@ens.fr / william.havard@gmail.com
  12. #
  13. # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
  14. #
  15. # ------------------------------------------------------------------------------
  16. # Description:
  17. # •
  18. # -----------------------------------------------------------------------------
  19. import os
  20. from functools import wraps
  21. import numpy as np
  22. import pandas as pd
  23. from ChildProject.converters import VtcConverter
  24. def vectorise(func):
  25. """
  26. To be used as decorator to vectorise a function using Numpy. Contrary to Numpy's vectorize function, this function
  27. preserves the name of the original function
  28. :param func: function
  29. :type func: callable
  30. :return: decorated function
  31. :rtype: callable
  32. """
  33. @wraps(func)
  34. def wrapper(args, **kwargs):
  35. return np.vectorize(func)(args, **kwargs)
  36. return wrapper
  37. def list_audio_files(recordings_path, ext='wav'):
  38. """
  39. Returns a list of audio file ending with ext in the path recordings_path
  40. :param recordings_path: path to be explored
  41. :type recordings_path: str
  42. :param ext: extension
  43. :type ext: str
  44. :return: list of files
  45. :rtype: list of str
  46. """
  47. file_list = walk_dir(recordings_path, ext=ext, return_full_path=True)
  48. file_dict = {
  49. get_raw_filename(fp): fp for fp in file_list
  50. }
  51. return file_dict
  52. def read_vtc(path_vtc, drop_na=False):
  53. """
  54. Reads a VTC file and returns a DataFrame (code borrowed from ChildProject.converters.VtcConverter
  55. :param path_vtc: path to VTC file to be read
  56. :return: DataFrame of the annotations contained in the file
  57. """
  58. assert os.path.exists(path_vtc), IOError('Path to VTC file {} does not exist!'.format(path_vtc))
  59. rttm = pd.read_csv(
  60. path_vtc,
  61. sep=" ",
  62. names=[
  63. "type",
  64. "file",
  65. "chnl",
  66. "tbeg",
  67. "tdur",
  68. "ortho",
  69. "stype",
  70. "name",
  71. "conf",
  72. "unk",
  73. ],
  74. )
  75. df = rttm
  76. df["segment_onset"] = df["tbeg"].mul(1000).round().astype(int)
  77. df["segment_offset"] = (df["tbeg"] + df["tdur"]).mul(1000).round().astype(int)
  78. df["speaker_type"] = df["name"].map(VtcConverter.SPEAKER_TYPE_TRANSLATION)
  79. df.drop(
  80. [
  81. "type",
  82. "chnl",
  83. "tbeg",
  84. "tdur",
  85. "ortho",
  86. "stype",
  87. "name",
  88. "conf",
  89. "unk",
  90. ],
  91. axis=1,
  92. inplace=True,
  93. )
  94. if drop_na:
  95. df = df[~df['speaker_type'].isin(['NA'])]
  96. return df
  97. def get_raw_filename(fp):
  98. """
  99. Return the raw filename of a file (path removed and extension removed)
  100. :param fp: path to file
  101. :type fp: str
  102. :return: bare filename
  103. :rtype: str
  104. """
  105. return os.path.basename(os.path.splitext(fp)[0])
  106. def walk_dir(path, ext=[], return_full_path=True):
  107. """
  108. Recursively explore a path to find files ending with the desired extension
  109. :param path: path to be explored
  110. :type path: str
  111. :param ext: extension
  112. :type ext: str
  113. :param return_full_path: whether the list should return the full path to the file (path + path to file) or not
  114. :type return_full_path: boolean
  115. :return: list of files
  116. :rtype: list of str
  117. """
  118. if type(ext) == str: ext = [ext]
  119. files = []
  120. for p, d, f in os.walk(path):
  121. for ff in f:
  122. _, file_extension = os.path.splitext(ff)
  123. if not len(ext) or file_extension[1:] in ext:
  124. path_suffix = p.replace(os.path.commonprefix([path, p]), '').lstrip(os.sep)
  125. if return_full_path:
  126. files.append(os.path.join(path, path_suffix, ff))
  127. else:
  128. files.append(os.path.join(path_suffix, ff))
  129. return sorted(files)