1 year ago · f7c6ced59f
--- a/README.md
+++ b/README.md
@@ -9,7 +9,9 @@ and activate it :
 
																 ```conda activate measuring_cld```
															
 
																-We provide all the data already pre-processed and phonemized. But if you want to re-download the raw data and to re-pre-processed it entierely, then you will need to install phonemizer (https://github.com/bootphon/phonemizer) with the espeak backend.
															
 
																+You will also need to install KenLM (https://github.com/kpu/kenlm).
															
 
																+
															
 
																+We provide all the data already pre-processed and phonemized. But if you want to re-download the raw data and to re-pre-process them entierely, then you will need to install phonemizer (https://github.com/bootphon/phonemizer) with the espeak backend.
															
 
																 ## Folder structure
															
--- a/code/test_on_all_languages.py
+++ b/code/test_on_all_languages.py
@@ -81,10 +81,9 @@ def create_sparse_combinantions(values: Iterable, variables=3) -> set:
 
																     """
															
 
																     This function will create combinantions for noising.
															
 
																     Each item in the returned set contains four values corresponding\
															
 
																-    to (1) phoneme noise, (2) noise of from adult to child utterances,\
															
 
																-    (3) noise of from child to adult utterances and (4) noise of
															
 
																+    to (1) phoneme noise, (2) speaker noise and (3) noise of the order of the phonemes.
															
 
																     These combinantions are sparse because we only noise one value at time.
															
 
																-    For example, an item can be (0.0, 0.0, 0.0, 0.25), which means that we only
															
 
																+    For example, an item can be (0.0, 0.0, 0.25), which means that we only
															
 
																     noise 25 percent of the phonemes, and nothing else is affected.
															
 
																     See the file make_noiser.py for more infomrations.
															
 
																     """
															
@@ -104,10 +103,7 @@ def test(json_files_directory, models_directory, train_files, add_noise=True) :
 
																                 "age", "perplexity", "entropy", "phonemes_order_noise",\
															
 
																                 "speakers_noise", "phonemes_noise"]
															
 
																     results = pd.DataFrame(columns=columns, index=None)
															
 
																-    # all_combinations = (list(product((0.0, 0.25, 0.5, 0.75), repeat=4))
															
 
																-    #                       if add_noise else [((0.0, 0.0, 0.0, 0.0))])
															
 
																     sparse_combinantions = create_sparse_combinantions((0.0, 0.25, 0.5, 0.75, 1))
															
 
																-    # noise_values = np.linspace(0.0, 1.0, num=6)
															
 
																     for phonemes_noise, speakers_noise, phonemes_order_noise in tqdm(sparse_combinantions, total=len(sparse_combinantions)) :
															
 
																         for test_filename, model_filename in product(os.listdir(json_files_directory), os.listdir(models_directory)) :
															
 
																             lg_iso, _ = test_filename.split(".")
															
@@ -128,7 +124,6 @@ def test(json_files_directory, models_directory, train_files, add_noise=True) :
 
																                     if age == "None" : print(family, lg_iso, age); continue
															
 
																                     for speaker in loaded_json[family][age] :
															
 
																                         if speaker not in ["Adult", "Target_Child"] : continue
															
 
																-                        # results_statistics = statistics_word(loaded_json[family][age][speaker], model)
															
 
																                         language, typology = LANGUAGES_TYPOLOGIES[lg_iso]
															
 
																                         ppl = model.perplexity("\n".join(loaded_json[family][age][speaker]))
															
 
																                         entropy = log(ppl)
															
--- a/results/plot_results.png
+++ b/results/plot_results.png
@@ -0,0 +1 @@
 
																+../.git/annex/objects/Vq/1V/MD5E-s1158490--07e64ccccdf09682d892e990390940c3.png/MD5E-s1158490--07e64ccccdf09682d892e990390940c3.png
	`@@ -0,0 +1 @@`
			`+../.git/annex/objects/Vq/1V/MD5E-s1158490--07e64ccccdf09682d892e990390940c3.png/MD5E-s1158490--07e64ccccdf09682d892e990390940c3.png`