1 year ago · f7c6ced59f
--- a/README.md
+++ b/README.md
@@ -9,7 +9,9 @@ and activate it :
 
				 
			
 
				 ```conda activate measuring_cld```
			
 
				 
			
 
				-We provide all the data already pre-processed and phonemized. But if you want to re-download the raw data and to re-pre-processed it entierely, then you will need to install phonemizer (https://github.com/bootphon/phonemizer) with the espeak backend.
			
 
				+You will also need to install KenLM (https://github.com/kpu/kenlm).
			
 
				+
			
 
				+We provide all the data already pre-processed and phonemized. But if you want to re-download the raw data and to re-pre-process them entierely, then you will need to install phonemizer (https://github.com/bootphon/phonemizer) with the espeak backend.
			
 
				 
			
 
				 ## Folder structure
			
 
				 
			
--- a/code/test_on_all_languages.py
+++ b/code/test_on_all_languages.py
@@ -81,10 +81,9 @@ def create_sparse_combinantions(values: Iterable, variables=3) -> set:
 
				     """
			
 
				     This function will create combinantions for noising.
			
 
				     Each item in the returned set contains four values corresponding\
			
 
				-    to (1) phoneme noise, (2) noise of from adult to child utterances,\
			
 
				-    (3) noise of from child to adult utterances and (4) noise of
			
 
				+    to (1) phoneme noise, (2) speaker noise and (3) noise of the order of the phonemes.
			
 
				     These combinantions are sparse because we only noise one value at time.
			
 
				-    For example, an item can be (0.0, 0.0, 0.0, 0.25), which means that we only
			
 
				+    For example, an item can be (0.0, 0.0, 0.25), which means that we only
			
 
				     noise 25 percent of the phonemes, and nothing else is affected.
			
 
				     See the file make_noiser.py for more infomrations.
			
 
				     """
			
@@ -104,10 +103,7 @@ def test(json_files_directory, models_directory, train_files, add_noise=True) :
 
				                 "age", "perplexity", "entropy", "phonemes_order_noise",\
			
 
				                 "speakers_noise", "phonemes_noise"]
			
 
				     results = pd.DataFrame(columns=columns, index=None)
			
 
				-    # all_combinations = (list(product((0.0, 0.25, 0.5, 0.75), repeat=4))
			
 
				-    #                       if add_noise else [((0.0, 0.0, 0.0, 0.0))])
			
 
				     sparse_combinantions = create_sparse_combinantions((0.0, 0.25, 0.5, 0.75, 1))
			
 
				-    # noise_values = np.linspace(0.0, 1.0, num=6)
			
 
				     for phonemes_noise, speakers_noise, phonemes_order_noise in tqdm(sparse_combinantions, total=len(sparse_combinantions)) :
			
 
				         for test_filename, model_filename in product(os.listdir(json_files_directory), os.listdir(models_directory)) :
			
 
				             lg_iso, _ = test_filename.split(".")
			
@@ -128,7 +124,6 @@ def test(json_files_directory, models_directory, train_files, add_noise=True) :
 
				                     if age == "None" : print(family, lg_iso, age); continue
			
 
				                     for speaker in loaded_json[family][age] :
			
 
				                         if speaker not in ["Adult", "Target_Child"] : continue
			
 
				-                        # results_statistics = statistics_word(loaded_json[family][age][speaker], model)
			
 
				                         language, typology = LANGUAGES_TYPOLOGIES[lg_iso]
			
 
				                         ppl = model.perplexity("\n".join(loaded_json[family][age][speaker]))
			
 
				                         entropy = log(ppl)
			
--- a/results/plot_results.png
+++ b/results/plot_results.png
@@ -0,0 +1 @@
 
				+../.git/annex/objects/Vq/1V/MD5E-s1158490--07e64ccccdf09682d892e990390940c3.png/MD5E-s1158490--07e64ccccdf09682d892e990390940c3.png
		`@@ -0,0 +1 @@`
		`+../.git/annex/objects/Vq/1V/MD5E-s1158490--07e64ccccdf09682d892e990390940c3.png/MD5E-s1158490--07e64ccccdf09682d892e990390940c3.png`