123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- #!usr/bin/env python
- # -*- coding: utf8 -*-
- # -----------------------------------------------------------------------------
- # File: utils.py (as part of project URUMETRICS)
- # Created: 01/06/2022 16:36
- # Last Modified: 01/06/2022 16:36
- # -----------------------------------------------------------------------------
- # Author: William N. Havard
- # Postdoctoral Researcher
- #
- # Mail : william.havard@ens.fr / william.havard@gmail.com
- #
- # Institution: ENS / Laboratoire de Sciences Cognitives et Psycholinguistique
- #
- # ------------------------------------------------------------------------------
- # Description:
- # •
- # -----------------------------------------------------------------------------
- import os
- from functools import wraps
- import numpy as np
- import pandas as pd
- from ChildProject.converters import VtcConverter
- def vectorise(func):
- """
- To be used as decorator to vectorise a function using Numpy. Contrary to Numpy's vectorize function, this function
- preserves the name of the original function
- :param func: function
- :type func: callable
- :return: decorated function
- :rtype: callable
- """
- @wraps(func)
- def wrapper(args, **kwargs):
- return np.vectorize(func)(args, **kwargs)
- return wrapper
- def list_audio_files(recordings_path, ext='wav'):
- """
- Returns a list of audio file ending with ext in the path recordings_path
- :param recordings_path: path to be explored
- :type recordings_path: str
- :param ext: extension
- :type ext: str
- :return: list of files
- :rtype: list of str
- """
- file_list = walk_dir(recordings_path, ext=ext, return_full_path=True)
- file_dict = {
- get_raw_filename(fp): fp for fp in file_list
- }
- return file_dict
- def read_vtc(path_vtc, drop_na=False):
- """
- Reads a VTC file and returns a DataFrame (code borrowed from ChildProject.converters.VtcConverter
- :param path_vtc: path to VTC file to be read
- :return: DataFrame of the annotations contained in the file
- """
- assert os.path.exists(path_vtc), IOError('Path to VTC file {} does not exist!'.format(path_vtc))
- rttm = pd.read_csv(
- path_vtc,
- sep=" ",
- names=[
- "type",
- "file",
- "chnl",
- "tbeg",
- "tdur",
- "ortho",
- "stype",
- "name",
- "conf",
- "unk",
- ],
- )
- df = rttm
- df["segment_onset"] = df["tbeg"].mul(1000).round().astype(int)
- df["segment_offset"] = (df["tbeg"] + df["tdur"]).mul(1000).round().astype(int)
- df["speaker_type"] = df["name"].map(VtcConverter.SPEAKER_TYPE_TRANSLATION)
- df.drop(
- [
- "type",
- "chnl",
- "tbeg",
- "tdur",
- "ortho",
- "stype",
- "name",
- "conf",
- "unk",
- ],
- axis=1,
- inplace=True,
- )
- if drop_na:
- df = df[~df['speaker_type'].isin(['NA'])]
- return df
- def get_raw_filename(fp):
- """
- Return the raw filename of a file (path removed and extension removed)
- :param fp: path to file
- :type fp: str
- :return: bare filename
- :rtype: str
- """
- return os.path.basename(os.path.splitext(fp)[0])
- def walk_dir(path, ext=[], return_full_path=True):
- """
- Recursively explore a path to find files ending with the desired extension
- :param path: path to be explored
- :type path: str
- :param ext: extension
- :type ext: str
- :param return_full_path: whether the list should return the full path to the file (path + path to file) or not
- :type return_full_path: boolean
- :return: list of files
- :rtype: list of str
- """
- if type(ext) == str: ext = [ext]
- files = []
- for p, d, f in os.walk(path):
- for ff in f:
- _, file_extension = os.path.splitext(ff)
- if not len(ext) or file_extension[1:] in ext:
- path_suffix = p.replace(os.path.commonprefix([path, p]), '').lstrip(os.sep)
- if return_full_path:
- files.append(os.path.join(path, path_suffix, ff))
- else:
- files.append(os.path.join(path_suffix, ff))
- return sorted(files)
|