123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- """This module inmplements some methods to artificialy noise the data
- """
- from typing import List, Dict
- from random import choices, sample, shuffle
- import copy
- import random
- random.seed(80)
- class Noise :
- """
- This class simulate noise in the data. Crucially, noise can be made on three points :\
- (1) The noise of phonemes order of a given sequence by making the order of the sequence more aribitrary,\
- (2) Replacement of some phonemes of a given sequence by arbitrary sampled phonemes from a vocabulary and\
- (3) By arbitrary interverting some sequences of two different speakers.
- Atributes
- ---------
- - phonemes_order_noise :
- Parameter for controling the degree of noise at the level of phonemes order. See the point 1 mentioned above.
- - speakers_noise :
- Parameters for controling the degree of noise at the level of speakers. See the point 3 mentioned above.
- - phonemes_noise :
- Parameter for controling the degree of noise at the level of phonemes. See the point 2 mentioned above.
- """
- def __init__(self,
- most_probable_phonemes: list,
- phonemes_order_noise=0.3,
- speakers_noise=(0.5, 0.5),
- phonemes_noise=0.5) :
- self.most_probable_phonemes = most_probable_phonemes
- self.phonemes_order_noise = phonemes_order_noise
- self.speakers_noise = speakers_noise
- self.phonemes_noise = phonemes_noise
-
- def _order_noise(self, sequence: List[str]) -> str :
- """
- Making noise the order of the phonemes in a given sequence
- Parameters
- ----------
- - sequence : list
- The sequence for which the phonemes order must be noised.
-
- Returns
- -------
- - str :
- The sequence with the order of phonemes noised.
- """
- # number of phonemes to noise in the sequence = len(sequence) / nb_phonemes_to_noise
- phonemes_to_noise = round(len(sequence) * self.phonemes_order_noise)
- # sample nb_phonemes_to_noise positions in the sequence
- positions_sampled = list(sample(range(len(sequence)), k=phonemes_to_noise))
- copied_positions = copy.deepcopy(positions_sampled)
- shuffle(copied_positions)
- # change the positions of the sampled phonemes
- for original_position, new_position in zip(positions_sampled, copied_positions):
- sequence[original_position] = sequence[new_position]
- return " ".join(sequence)
- def _phonemes_noise(self, sequence: List[str]) -> str :
- """
- Makinng noise the phonemes of the sequence by replacing\
- some phonemes of the sequence by arbitrary sampled phonemes\
- from the vocabulary.
- Parameters
- ----------
- - sequence : list
- The sequence for which the phonemes must be noised.
-
- Returns
- -------
- - str :
- The sequence with noised phonemes.
- """
- phonemes_to_noise = round(len(sequence) * self.phonemes_noise)
- indexes = choices(range(len(sequence)), k=phonemes_to_noise)
- # choose new phonemes only from the most probable phonemes.
- phonemes = choices(self.most_probable_phonemes, k=phonemes_to_noise)
- # and replace some indices of the sequence by those choosen phonemes
- for idx, phonemes in zip(indexes, phonemes) :
- sequence[idx] = phonemes
- return " ".join(sequence)
-
- def _speakers_noise(self, speakers_sequences: Dict[str, set]) -> Dict[str, set] :
- """
- Making noise in the speaker's statements.
- Parameters
- ----------
- - speakers_sequences : dict
- Dictionary containing the utterances for each speaker.
-
- Returns
- -------
- - dict :
- The dictionary containing the few statements interchanged between the two speakers.
- """
- first_speaker, second_speaker = "Target_Child", "Adult"
- noise_first_speaker, noise_second_speaker = self.speakers_noise
- speakers_sequences[second_speaker] = set(speakers_sequences[second_speaker])
- speakers_sequences[first_speaker] = set(speakers_sequences[first_speaker])
- # sample some percentage of utterances from each speaker
- sequences_to_noise_second_speaker = round(len(speakers_sequences[second_speaker]) * noise_second_speaker)
- sequences_to_noise_first_speaker = round(len(speakers_sequences[first_speaker]) * noise_first_speaker)
- sequences_noise_second_speaker = sample(list(speakers_sequences[second_speaker]), k=sequences_to_noise_second_speaker)
- sequences_noise_first_speaker = sample(list(speakers_sequences[first_speaker]), k=sequences_to_noise_first_speaker)
- # noise by interchanging sampled utterances
- speakers_sequences[second_speaker] -= set(sequences_noise_second_speaker)
- speakers_sequences[first_speaker] -= set(sequences_noise_first_speaker)
- speakers_sequences[second_speaker] |= set(sequences_noise_first_speaker)
- speakers_sequences[first_speaker] |= set(sequences_noise_second_speaker)
- # set to list
- speakers_sequences[first_speaker] = list(speakers_sequences[first_speaker])
- speakers_sequences[second_speaker] = list(speakers_sequences[second_speaker])
-
- return speakers_sequences
-
- def __call__(self, loaded_dataset: dict) -> dict:
- """
- Apply the three types of noise.
- Parameters
- ----------
- loaded_dataset : dict
- The dictionary containing the utterances for each family, at each and for each speaker.
-
- Returns
- -------
- dict :
- The noised data.
- """
- for family in loaded_dataset :
- for age in loaded_dataset[family] :
- if "Adult" in loaded_dataset[family][age] :
- for idx, utterance in enumerate(loaded_dataset[family][age]["Adult"]) :
- splitted_utterance = utterance.split(" ")
- loaded_dataset[family][age]["Adult"][idx] = self._order_noise(splitted_utterance)
- loaded_dataset[family][age]["Adult"][idx] = self._phonemes_noise(splitted_utterance)
- if "Target_Child" in loaded_dataset[family][age] :
- for idx, utterance in enumerate(loaded_dataset[family][age]["Target_Child"]) :
- splitted_utterance = utterance.split(" ")
- loaded_dataset[family][age]["Target_Child"][idx] = self._order_noise(splitted_utterance)
- loaded_dataset[family][age]["Target_Child"][idx] = self._phonemes_noise(splitted_utterance)
- if "Target_Child" in loaded_dataset[family][age] and "Adult" in loaded_dataset[family][age] and all(self.speakers_noise):
- noised_speaker = self._speakers_noise(loaded_dataset[family][age])
- loaded_dataset[family][age]["Target_Child"] = noised_speaker["Target_Child"]
- loaded_dataset[family][age]["Adult"] = noised_speaker["Adult"]
- return loaded_dataset
|