|
@@ -81,10 +81,9 @@ def create_sparse_combinantions(values: Iterable, variables=3) -> set:
|
|
"""
|
|
"""
|
|
This function will create combinantions for noising.
|
|
This function will create combinantions for noising.
|
|
Each item in the returned set contains four values corresponding\
|
|
Each item in the returned set contains four values corresponding\
|
|
- to (1) phoneme noise, (2) noise of from adult to child utterances,\
|
|
|
|
- (3) noise of from child to adult utterances and (4) noise of
|
|
|
|
|
|
+ to (1) phoneme noise, (2) speaker noise and (3) noise of the order of the phonemes.
|
|
These combinantions are sparse because we only noise one value at time.
|
|
These combinantions are sparse because we only noise one value at time.
|
|
- For example, an item can be (0.0, 0.0, 0.0, 0.25), which means that we only
|
|
|
|
|
|
+ For example, an item can be (0.0, 0.0, 0.25), which means that we only
|
|
noise 25 percent of the phonemes, and nothing else is affected.
|
|
noise 25 percent of the phonemes, and nothing else is affected.
|
|
See the file make_noiser.py for more infomrations.
|
|
See the file make_noiser.py for more infomrations.
|
|
"""
|
|
"""
|
|
@@ -104,10 +103,7 @@ def test(json_files_directory, models_directory, train_files, add_noise=True) :
|
|
"age", "perplexity", "entropy", "phonemes_order_noise",\
|
|
"age", "perplexity", "entropy", "phonemes_order_noise",\
|
|
"speakers_noise", "phonemes_noise"]
|
|
"speakers_noise", "phonemes_noise"]
|
|
results = pd.DataFrame(columns=columns, index=None)
|
|
results = pd.DataFrame(columns=columns, index=None)
|
|
- # all_combinations = (list(product((0.0, 0.25, 0.5, 0.75), repeat=4))
|
|
|
|
- # if add_noise else [((0.0, 0.0, 0.0, 0.0))])
|
|
|
|
sparse_combinantions = create_sparse_combinantions((0.0, 0.25, 0.5, 0.75, 1))
|
|
sparse_combinantions = create_sparse_combinantions((0.0, 0.25, 0.5, 0.75, 1))
|
|
- # noise_values = np.linspace(0.0, 1.0, num=6)
|
|
|
|
for phonemes_noise, speakers_noise, phonemes_order_noise in tqdm(sparse_combinantions, total=len(sparse_combinantions)) :
|
|
for phonemes_noise, speakers_noise, phonemes_order_noise in tqdm(sparse_combinantions, total=len(sparse_combinantions)) :
|
|
for test_filename, model_filename in product(os.listdir(json_files_directory), os.listdir(models_directory)) :
|
|
for test_filename, model_filename in product(os.listdir(json_files_directory), os.listdir(models_directory)) :
|
|
lg_iso, _ = test_filename.split(".")
|
|
lg_iso, _ = test_filename.split(".")
|
|
@@ -128,7 +124,6 @@ def test(json_files_directory, models_directory, train_files, add_noise=True) :
|
|
if age == "None" : print(family, lg_iso, age); continue
|
|
if age == "None" : print(family, lg_iso, age); continue
|
|
for speaker in loaded_json[family][age] :
|
|
for speaker in loaded_json[family][age] :
|
|
if speaker not in ["Adult", "Target_Child"] : continue
|
|
if speaker not in ["Adult", "Target_Child"] : continue
|
|
- # results_statistics = statistics_word(loaded_json[family][age][speaker], model)
|
|
|
|
language, typology = LANGUAGES_TYPOLOGIES[lg_iso]
|
|
language, typology = LANGUAGES_TYPOLOGIES[lg_iso]
|
|
ppl = model.perplexity("\n".join(loaded_json[family][age][speaker]))
|
|
ppl = model.perplexity("\n".join(loaded_json[family][age][speaker]))
|
|
entropy = log(ppl)
|
|
entropy = log(ppl)
|