Browse Source

add rmarkdown code

yaya-sy 1 year ago
parent
commit
b4fa96e143
50 changed files with 120 additions and 66 deletions
  1. 8 3
      README.md
  2. BIN
      code/__pycache__/get_most_probable_phonemes.cpython-310.pyc
  3. BIN
      code/__pycache__/make_noiser.cpython-310.pyc
  4. BIN
      code/__pycache__/utterances_cleaner.cpython-310.pyc
  5. 3 1
      code/download_childes_corpora.py
  6. 2 3
      code/get_most_probable_phonemes.py
  7. 5 6
      code/make_noiser.py
  8. 2 1
      code/one_utterance_per_line_to_json.py
  9. 18 21
      code/test_on_all_languages.py
  10. 43 18
      code/utterances_cleaner.py
  11. 0 1
      commands_reproduction.txt
  12. 1 1
      datasets/childes_json_corpora/da.json
  13. 1 1
      datasets/childes_json_corpora/de.json
  14. 1 1
      datasets/childes_json_corpora/en.json
  15. 1 1
      datasets/childes_json_corpora/es.json
  16. 1 1
      datasets/childes_json_corpora/et.json
  17. 1 1
      datasets/childes_json_corpora/eu.json
  18. 1 1
      datasets/childes_json_corpora/fr.json
  19. 1 1
      datasets/childes_json_corpora/ja.json
  20. 1 1
      datasets/childes_json_corpora/pl.json
  21. 1 1
      datasets/childes_json_corpora/pt.json
  22. 1 1
      datasets/childes_json_corpora/sr.json
  23. 1 1
      datasets/childes_json_corpora/tr.json
  24. 1 0
      datasets/childes_one_utterance_per_line_files/da.one_utterance_per_line
  25. 1 0
      datasets/childes_one_utterance_per_line_files/de.one_utterance_per_line
  26. 1 0
      datasets/childes_one_utterance_per_line_files/en.one_utterance_per_line
  27. 1 0
      datasets/childes_one_utterance_per_line_files/es.one_utterance_per_line
  28. 1 0
      datasets/childes_one_utterance_per_line_files/et.one_utterance_per_line
  29. 1 0
      datasets/childes_one_utterance_per_line_files/eu.one_utterance_per_line
  30. 1 0
      datasets/childes_one_utterance_per_line_files/fr.one_utterance_per_line
  31. 1 0
      datasets/childes_one_utterance_per_line_files/ja.one_utterance_per_line
  32. 1 0
      datasets/childes_one_utterance_per_line_files/not_downloaded_data.txt
  33. 1 0
      datasets/childes_one_utterance_per_line_files/pl.one_utterance_per_line
  34. 1 0
      datasets/childes_one_utterance_per_line_files/pt.one_utterance_per_line
  35. 1 0
      datasets/childes_one_utterance_per_line_files/sr.one_utterance_per_line
  36. 1 0
      datasets/childes_one_utterance_per_line_files/tr.one_utterance_per_line
  37. 1 0
      estimated/da.one_sentence_per_line.arpa
  38. 1 0
      estimated/de.one_sentence_per_line.arpa
  39. 1 0
      estimated/en.one_sentence_per_line.arpa
  40. 1 0
      estimated/es.one_sentence_per_line.arpa
  41. 1 0
      estimated/et.one_sentence_per_line.arpa
  42. 1 0
      estimated/eu.one_sentence_per_line.arpa
  43. 1 0
      estimated/fr.one_sentence_per_line.arpa
  44. 1 0
      estimated/ja.one_sentence_per_line.arpa
  45. 1 0
      estimated/pl.one_sentence_per_line.arpa
  46. 1 0
      estimated/pt.one_sentence_per_line.arpa
  47. 1 0
      estimated/sr.one_sentence_per_line.arpa
  48. 1 0
      estimated/tr.one_sentence_per_line.arpa
  49. 1 0
      final_results_analysis.Rmd
  50. 1 0
      results/results.csv

+ 8 - 3
README.md

@@ -9,6 +9,8 @@ and activate it :
 
 ```conda activate measuring_cld```
 
+We provide all the data already pre-processed and phonemized. But if you want to re-download the raw data and to re-pre-processed it entierely, then you will need to install phonemizer (https://github.com/bootphon/phonemizer) with the espeak backend.
+
 ## Folder structure
 
 - All source code is located in `code/`
@@ -67,9 +69,12 @@ This will output a `evalution.csv` file in a `results` folder.
 We can now compute the entropies on the CHILDES utterances with the script `code/test_on_all_languages.py`. This script take the following arguments:
 
 > `--train_directory` : The directory containing the train files tokenized in phonemes.
-> `--models_directory`: The directory containing the trained language models.
->  --json_files_directory: The directory containing CHILDES utterances in json format for each language.
->  --add_noise, --no-add_noise: Whether noise the CHILDES utterances or not.
+
+> `--models_directory` : The directory containing the trained language models.
+
+> `--json_files_directory`: The directory containing CHILDES utterances in json format for each language.
+
+> `--add_noise`, `--no-add_noise` : Whether noise the CHILDES utterances or not.
 
 If you stored the language models in the `estimated/` folder, then you can run the script like that :
 

BIN
code/__pycache__/get_most_probable_phonemes.cpython-310.pyc


BIN
code/__pycache__/make_noiser.cpython-310.pyc


BIN
code/__pycache__/utterances_cleaner.cpython-310.pyc


+ 3 - 1
code/download_childes_corpora.py

@@ -174,7 +174,6 @@ class DownloadChildCorpora :
         - phonemize_child: bool
 
         """
-        print(language, languages_to_download_informations[language]["ort_tier"])
         participants_to_consider = languages_to_download_informations[language]["participants"]
         downloading_file = open(f"{out_dirname}/{language}.one_utterance_per_line", 
                                 "w", encoding="UTF-8")
@@ -206,6 +205,9 @@ class DownloadChildCorpora :
                                                                     participant,
                                                                     backend,
                                                                     phonemize_child=phonemize_child) :
+                        utterance = utterance.replace("$", " ").replace("@", " ")
+                        utterance = self.utterances_cleaner.remove_multiple_spaces(utterance)
+                        utterance = utterance.strip()
                         if not utterance :
                             continue
                         family_name = "_".join((corpus, family))

+ 2 - 3
code/get_most_probable_phonemes.py

@@ -7,9 +7,8 @@ def get_most_probable_phonemes(one_sentence_per_line_file, p=0.007) :
     counts = defaultdict(int)
     for sentence in open(one_sentence_per_line_file) :
         sentence = sentence.rstrip()
-        for word in sentence.split("@") :
-            for phoneme in word.split("$") :
-                counts[phoneme] += 1
+        for phoneme in sentence.split(" ") :
+            counts[phoneme] += 1
     total = sum(counts.values())
     for phoneme in counts :
         counts[phoneme] /= total

+ 5 - 6
code/make_noiser.py

@@ -9,8 +9,8 @@ random.seed(80)
 
 class Noise :
     """
-    This class simulate noise in the data. Crucially,\
-    noise can be made on three points :\
+    This class simulates noise in the data. Crucially,\
+    noise can be made on three cases :\
     (1) The noise of phonemes order of a given sequence\
     by making the order of the sequence more aribitrary,\
     (2) Replacement of some phonemes of a given sequence\
@@ -22,16 +22,16 @@ class Noise :
     - phonemes_order_noise :
         Parameter for controling the degree of noise at the level\
         of phonemes order. See the point 1 mentioned above.
-    - speakers_noise :
+    - speakers_noise_values :
         Parameters for controling the degree of noise at the level\
         of speakers. See the point 3 mentioned above.
-    - phonemes_noise :
+    - phonemes_noise_value :
         Parameter for controling the degree of noise at the level of phonemes.
         See the point 2 mentioned above.
     """
 
     def __init__(self,
-                    most_probable_phonemes: list,
+                    most_probable_phonemes,
                     phonemes_order_noise_value=0.3,
                     speakers_noise_values=(0.5, 0.5),
                     phonemes_noise_value=0.5) :
@@ -82,7 +82,6 @@ class Noise :
             The sequence with noised phonemes.
         """
         phonemes_to_noise = round(len(sequence) * self.phonemes_noise_value)
-        assert phonemes_to_noise < len(sequence), "Number of phoneme to noise greather that sequence's length"
         indexes = choices(range(len(sequence)), k=phonemes_to_noise)
         # choose new phonemes only from the most probable phonemes.
         phonemes = choices(self.most_probable_phonemes, k=phonemes_to_noise)

+ 2 - 1
code/one_utterance_per_line_to_json.py

@@ -33,7 +33,8 @@ def one_utterance_per_line_to_json(directory: str, out_dirname: str) -> None:
         for participant_age_utterance in open(f"{directory}/{language_filename}") :
             family, participant, age, utterance = participant_age_utterance.strip().split(",")
             participant = "Adult" if participant in ["Father", "Mother"] else participant
-            if utterance in by_family[family][age][participant] : continue
+            if utterance in by_family[family][age][participant] :
+                continue
             by_family[family][age][participant].append(utterance)
         with open(f"{out_dirname}/{language}.json", "w") as out_filename :
             json.dump(

+ 18 - 21
code/test_on_all_languages.py

@@ -77,7 +77,7 @@ def statistics_word(utterances: list, model: kenlm.Model) -> dict:
 
     return statistics
 
-def create_sparse_combinantions(values: Iterable) -> set:
+def create_sparse_combinantions(values: Iterable, variables=3) -> set:
     """
     This function will create combinantions for noising.
     Each item in the returned set contains four values corresponding\
@@ -90,8 +90,8 @@ def create_sparse_combinantions(values: Iterable) -> set:
     """
     sparse_combinantions = []
     for value in values :
-        for idx in range(len(values)) :
-            sparse_values = [0.0] * len(values)
+        for idx in range(variables) :
+            sparse_values = [0.0] * variables
             sparse_values[idx] = value
             sparse_combinantions.append(tuple(sparse_values))
     return set(sparse_combinantions)
@@ -101,15 +101,14 @@ def test(json_files_directory, models_directory, train_files, add_noise=True) :
     This function will test the language models on CHILDES corpora
     """
     columns = ["language", "typology", "family", "speaker",\
-                "age", "perplexity", "entropy", "mlu", "mlu_without_repetition",\
-                "phonemes_order_noise", "speakers_noise_adult",\
-                "speakers_noise_child", "phonemes_noise"]
+                "age", "perplexity", "entropy", "phonemes_order_noise",\
+                "speakers_noise", "phonemes_noise"]
     results = pd.DataFrame(columns=columns, index=None)
     # all_combinations = (list(product((0.0, 0.25, 0.5, 0.75), repeat=4))
     #                       if add_noise else [((0.0, 0.0, 0.0, 0.0))])
-    sparse_combinantions = create_sparse_combinantions((0.0, 0.25, 0.5, 0.75))
+    sparse_combinantions = create_sparse_combinantions((0.0, 0.25, 0.5, 0.75, 1))
     # noise_values = np.linspace(0.0, 1.0, num=6)
-    for phonemes_noise, speakers_noise_child, speakers_noise_adult, phonemes_order_noise in tqdm(sparse_combinantions, total=len(sparse_combinantions)) :
+    for phonemes_noise, speakers_noise, phonemes_order_noise in tqdm(sparse_combinantions, total=len(sparse_combinantions)) :
         for test_filename, model_filename in product(os.listdir(json_files_directory), os.listdir(models_directory)) :
             lg_iso, _ = test_filename.split(".")
             model_lg = model_filename.split(".")[0]
@@ -120,7 +119,7 @@ def test(json_files_directory, models_directory, train_files, add_noise=True) :
             if add_noise :
                 noise = Noise(most_probable_phonemes,
                                 phonemes_order_noise_value=phonemes_order_noise,
-                                speakers_noise_values=(speakers_noise_child, speakers_noise_adult),
+                                speakers_noise_values=(speakers_noise, speakers_noise),
                                 phonemes_noise_value=phonemes_noise)
                 loaded_json = noise(loaded_json)
             model = kenlm.Model(f"{models_directory}/{model_filename}")
@@ -129,21 +128,19 @@ def test(json_files_directory, models_directory, train_files, add_noise=True) :
                     if age == "None" : print(family, lg_iso, age); continue
                     for speaker in loaded_json[family][age] :
                         if speaker not in ["Adult", "Target_Child"] : continue
-                        results_statistics = statistics_word(loaded_json[family][age][speaker], model)
+                        # results_statistics = statistics_word(loaded_json[family][age][speaker], model)
                         language, typology = LANGUAGES_TYPOLOGIES[lg_iso]
+                        ppl = model.perplexity("\n".join(loaded_json[family][age][speaker]))
+                        entropy = log(ppl)
                         new_row =  {"language" : language,
                                     "typology" : typology,
                                     "family" : family,
                                     "speaker" : speaker,
                                     "age" : float(age),
-                                    "perplexity" : results_statistics["ppl"],
-                                    "entropy" : results_statistics["entropy"],
-                                    "mlu_w" : results_statistics["mlu_w"],
-                                    "mlu_p" : results_statistics["mlu_p"],
-                                    "ttr_w" : results_statistics["ttr_w"],
+                                    "perplexity" : ppl,
+                                    "entropy" : entropy,
                                     "phonemes_order_noise" : phonemes_order_noise,
-                                    "speakers_noise_adult" : speakers_noise_adult,
-                                    "speakers_noise_child" : speakers_noise_child,
+                                    "speakers_noise" : speakers_noise,
                                     "phonemes_noise" : phonemes_noise}
                         results = results.append(new_row, ignore_index=True)
     return results
@@ -151,11 +148,11 @@ if __name__ == "__main__":
     from argparse import ArgumentParser, BooleanOptionalAction
 
     parser = ArgumentParser()
-    parser.add_argument('--train_directory',
+    parser.add_argument('--train_files_directory',
         required=True,
         help="The directory containing the train files tokenized in phonemes."
         )
-    parser.add_argument('--models_directory',
+    parser.add_argument('--model_files_directory',
         required=True,
         help="The directory containing the trained language models."
         )
@@ -172,8 +169,8 @@ if __name__ == "__main__":
     args = parser.parse_args()
     add_noise = args.add_noise
     json_files_directory = args.json_files_directory
-    phoneme_train_files = args.train_directory
-    models_directory = args.models_directory
+    phoneme_train_files = args.train_files_directory
+    models_directory = args.model_files_directory
 
     if not os.path.exists("results"):
         os.makedirs("results")

+ 43 - 18
code/utterances_cleaner.py

@@ -1,4 +1,3 @@
-# pylint: disable=no-member
 """This module contains an implementation of a class that help /
     to clean orthographic or IPA transcripts of utterances. /
     Crucially, this class will clean utterances by removing or replacing /
@@ -8,9 +7,11 @@
 import re
 import string
 
-
 class UtterancesCleaner :
     """
+    This class will clean utterances from CHILDES,\
+    by deleting words, patterns, ponctuation or replacing\
+    or replacing them by other things.
     """
     def __init__(self, markers: dict) :
         self.delete_marker_pattern = '|'.join(markers["marker_to_delete"])
@@ -27,12 +28,12 @@ class UtterancesCleaner :
 
         Parameters
         ----------
-        - utterance : list
-            list of words utterance
-        - pattern : list
-            regex pattern containing markers to delete from the utterance
+        - utterance : str
+            Utterance from which markers will be replaced
+        - pattern : str
+            Regex pattern containing markers to delete from the utterance
         - replacement :
-            symbol that will replace markers
+            Symbol that will replace markers
         """
         return " ".join(re.sub(pattern, replacement, word) for word in utterance.split(" "))
 
@@ -43,8 +44,8 @@ class UtterancesCleaner :
 
         Parameters
         ----------
-        - utterance : list
-            list of words utterance
+        - utterance : str
+            Utterance from which those words will be removed
         """
         return " ".join(word for word in utterance.split(" ") \
             if not re.match(self.word_contains_delete_pattern, word))
@@ -57,14 +58,14 @@ class UtterancesCleaner :
         ----------
         - utterance : str
             The utterance from which the punctuation will be removed.
-        
+
         Returns
         -------
         str :
             The utterance without punctuations.
         """
         return utterance.translate(str.maketrans('', '', string.punctuation))
-    
+
     def remove_brackets(self, utterance: str) -> str :
         """
         Remove brackets from a given utterance.
@@ -73,7 +74,7 @@ class UtterancesCleaner :
         ----------
         - utterance : str
             The utterance from which the brackets will be removed.
-        
+
         Returns
         -------
         str :
@@ -82,6 +83,15 @@ class UtterancesCleaner :
         return re.sub(r"[\(\[].*?[\)\]]", '', utterance)
 
     def handle_repetitions(self, utterance: str) -> str:
+        """
+        This function will repeat n times some units from\
+        a give utterance.
+
+        Parameters
+        ----------
+        utterance: str
+            Utterance from which some units will be repeated.
+        """
         while True:
             matched = re.search(self.pattern_repetition, utterance)
 
@@ -91,13 +101,27 @@ class UtterancesCleaner :
             all_match = matched.group(0)
             separator = matched.group(1)
             word, repetitions = matched.group(2),matched.group(3)
-            repeated_word = '{}{}'.format(separator, ' '.join([word] * int(repetitions)))
+            repeated_word = f"{separator}{' '.join([word] * int(repetitions))}"
 
             utterance = utterance.replace(all_match, repeated_word, 1)
 
         return utterance
-    
+
     def remove_multiple_spaces(self, utterance: str) -> str :
+        """
+        Remove multiple spaces from a given utterance.
+
+        Parameters
+        ----------
+        utterance: str
+            Utterance from which multiple successive spaces\
+            will be replaced.
+
+        Returns
+        -------
+        - str
+            Utterance without multiple successive spaces.
+        """
         return re.sub(' +', ' ', utterance)
 
     def clean(self, utterance: str) -> str :
@@ -108,18 +132,19 @@ class UtterancesCleaner :
 
         Parameters
         ----------
-        - utterances : list
-            list of utterances to clean
+        - utterances : str
+            Utterance to clean
         Returns
         -------
-        - generator over cleaned utterances
+        - str
+            Cleaned utterance
         """
         utterance = self.handle_repetitions(utterance)
         utterance = self.replace_marker(utterance, self.delete_marker_pattern, "")
         utterance = self.delete_words(utterance)
         utterance = self.replace_marker(utterance, self.poncts_to_delete_pattern, "")
         utterance = self.replace_marker(utterance, self.delete_comments_pattern, "")
-        utterance = self.replace_marker(utterance, self.replace_unk_pattern, "") # pour mot non retranscrit
+        utterance = self.replace_marker(utterance, self.replace_unk_pattern, "")
         utterance = self.remove_brackets(utterance)
         utterance = self.remove_ponctuations(utterance)
         utterance = self.remove_multiple_spaces(utterance)

+ 0 - 1
commands_reproduction.txt

@@ -1 +0,0 @@
-.git/annex/objects/8v/Zm/MD5E-s459--11999fdb245d2931764986dd3e7ee155.txt/MD5E-s459--11999fdb245d2931764986dd3e7ee155.txt

+ 1 - 1
datasets/childes_json_corpora/da.json

@@ -1 +1 @@
-../../.git/annex/objects/4W/Xg/MD5E-s2940316--507efe8e52bf5ce75f8df711d87d1f38.json/MD5E-s2940316--507efe8e52bf5ce75f8df711d87d1f38.json
+../../.git/annex/objects/pQ/K0/MD5E-s2929654--c67f2b9d013cecd89ad1f90d3f1042d1.json/MD5E-s2929654--c67f2b9d013cecd89ad1f90d3f1042d1.json

+ 1 - 1
datasets/childes_json_corpora/de.json

@@ -1 +1 @@
-../../.git/annex/objects/m8/K7/MD5E-s45738282--c42b1e618dc3371fc04798b8aec56033.json/MD5E-s45738282--c42b1e618dc3371fc04798b8aec56033.json
+../../.git/annex/objects/V1/pk/MD5E-s45122753--28c70804c58919db510891f7cd21e9ad.json/MD5E-s45122753--28c70804c58919db510891f7cd21e9ad.json

+ 1 - 1
datasets/childes_json_corpora/en.json

@@ -1 +1 @@
-../../.git/annex/objects/mM/Fk/MD5E-s32246267--53fa0ec80e98ef57b52100fa3e52a686.json/MD5E-s32246267--53fa0ec80e98ef57b52100fa3e52a686.json
+../../.git/annex/objects/3m/jj/MD5E-s32243800--1e4701d6cc0a5ce2362254232cdd1818.json/MD5E-s32243800--1e4701d6cc0a5ce2362254232cdd1818.json

+ 1 - 1
datasets/childes_json_corpora/es.json

@@ -1 +1 @@
-../../.git/annex/objects/qz/0X/MD5E-s7528611--1534de247b52108a71b2c32d24a2a07a.json/MD5E-s7528611--1534de247b52108a71b2c32d24a2a07a.json
+../../.git/annex/objects/8g/pZ/MD5E-s7526224--8c4754f17b5bfbb314384bcc8da75abb.json/MD5E-s7526224--8c4754f17b5bfbb314384bcc8da75abb.json

+ 1 - 1
datasets/childes_json_corpora/et.json

@@ -1 +1 @@
-../../.git/annex/objects/1x/Gv/MD5E-s8512506--8f79ccb462b01e1ca3ef1ff5ae5461cd.json/MD5E-s8512506--8f79ccb462b01e1ca3ef1ff5ae5461cd.json
+../../.git/annex/objects/6G/qj/MD5E-s8510895--eda1192d7270394f8c8f803cf32e98d8.json/MD5E-s8510895--eda1192d7270394f8c8f803cf32e98d8.json

+ 1 - 1
datasets/childes_json_corpora/eu.json

@@ -1 +1 @@
-../../.git/annex/objects/gw/pQ/MD5E-s1472131--5b9acb68d334a8e8682beb740356dc91.json/MD5E-s1472131--5b9acb68d334a8e8682beb740356dc91.json
+../../.git/annex/objects/Mj/k6/MD5E-s1471684--5a1181aa8b8978b87a9dc0c892037c7a.json/MD5E-s1471684--5a1181aa8b8978b87a9dc0c892037c7a.json

+ 1 - 1
datasets/childes_json_corpora/fr.json

@@ -1 +1 @@
-../../.git/annex/objects/jq/Vx/MD5E-s3234823--ca293387898ab63ac7c7794b873f9e0c.json/MD5E-s3234823--ca293387898ab63ac7c7794b873f9e0c.json
+../../.git/annex/objects/WJ/Fq/MD5E-s3229799--9ca02feed25b6e39d947530451144a3b.json/MD5E-s3229799--9ca02feed25b6e39d947530451144a3b.json

+ 1 - 1
datasets/childes_json_corpora/ja.json

@@ -1 +1 @@
-../../.git/annex/objects/Vk/mV/MD5E-s7591871--e2e5bc66db54eaf1b073a6d115d86a04.json/MD5E-s7591871--e2e5bc66db54eaf1b073a6d115d86a04.json
+../../.git/annex/objects/mM/fW/MD5E-s7576050--1345451c2cb335107bca1b2b76f787a6.json/MD5E-s7576050--1345451c2cb335107bca1b2b76f787a6.json

+ 1 - 1
datasets/childes_json_corpora/pl.json

@@ -1 +1 @@
-../../.git/annex/objects/WM/qg/MD5E-s16258378--858cf5b3b3eaa45e0146ba94778f0fe5.json/MD5E-s16258378--858cf5b3b3eaa45e0146ba94778f0fe5.json
+../../.git/annex/objects/W8/Pv/MD5E-s16205319--65fa716e7e1bf98973d28b6a0825ab90.json/MD5E-s16205319--65fa716e7e1bf98973d28b6a0825ab90.json

+ 1 - 1
datasets/childes_json_corpora/pt.json

@@ -1 +1 @@
-../../.git/annex/objects/Qf/kG/MD5E-s6161273--53ac1b2e1337d602434858fb01393f5b.json/MD5E-s6161273--53ac1b2e1337d602434858fb01393f5b.json
+../../.git/annex/objects/F0/WM/MD5E-s6160640--c494b016644486e985bdf66ee5026f24.json/MD5E-s6160640--c494b016644486e985bdf66ee5026f24.json

+ 1 - 1
datasets/childes_json_corpora/sr.json

@@ -1 +1 @@
-../../.git/annex/objects/0w/3x/MD5E-s7914319--f4e61920a973cda4ec9629c33959a501.json/MD5E-s7914319--f4e61920a973cda4ec9629c33959a501.json
+../../.git/annex/objects/jJ/FF/MD5E-s7911866--42b4530f71f2f27b6b3a8bae229d7f8d.json/MD5E-s7911866--42b4530f71f2f27b6b3a8bae229d7f8d.json

+ 1 - 1
datasets/childes_json_corpora/tr.json

@@ -1 +1 @@
-../../.git/annex/objects/ZG/V9/MD5E-s913674--2990a8cf6fd26ef03c0c28154606bbb6.json/MD5E-s913674--2990a8cf6fd26ef03c0c28154606bbb6.json
+../../.git/annex/objects/7J/KZ/MD5E-s913366--6c2544ff0dad5f838aee28aa3cc31287.json/MD5E-s913366--6c2544ff0dad5f838aee28aa3cc31287.json

+ 1 - 0
datasets/childes_one_utterance_per_line_files/da.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/81/8Z/MD5E-s4522557--f2af9f79413a10c65273a1b3e8ebdae5/MD5E-s4522557--f2af9f79413a10c65273a1b3e8ebdae5

+ 1 - 0
datasets/childes_one_utterance_per_line_files/de.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/VP/k7/MD5E-s48271457--1571db7b4153ac79aa881130cbb180c4/MD5E-s48271457--1571db7b4153ac79aa881130cbb180c4

+ 1 - 0
datasets/childes_one_utterance_per_line_files/en.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/03/Fm/MD5E-s39751512--db561b3c2b236c543d148d4be6c68302/MD5E-s39751512--db561b3c2b236c543d148d4be6c68302

+ 1 - 0
datasets/childes_one_utterance_per_line_files/es.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/68/37/MD5E-s13920227--df0a021631fc74f8b7a21ad2947a0e8f/MD5E-s13920227--df0a021631fc74f8b7a21ad2947a0e8f

+ 1 - 0
datasets/childes_one_utterance_per_line_files/et.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/wz/mq/MD5E-s12262395--56255fe3e6502be7117803bed1891236/MD5E-s12262395--56255fe3e6502be7117803bed1891236

+ 1 - 0
datasets/childes_one_utterance_per_line_files/eu.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/Km/XJ/MD5E-s2347275--3ff726a380f86ea7829070770d3a5add/MD5E-s2347275--3ff726a380f86ea7829070770d3a5add

+ 1 - 0
datasets/childes_one_utterance_per_line_files/fr.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/1J/VJ/MD5E-s4950748--53eb94bc964b4944c4786e0bc116aa12/MD5E-s4950748--53eb94bc964b4944c4786e0bc116aa12

+ 1 - 0
datasets/childes_one_utterance_per_line_files/ja.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/g0/q2/MD5E-s11677950--362fdb2c641231bac1a59772d4347984/MD5E-s11677950--362fdb2c641231bac1a59772d4347984

+ 1 - 0
datasets/childes_one_utterance_per_line_files/not_downloaded_data.txt

@@ -0,0 +1 @@
+../../.git/annex/objects/93/1P/MD5E-s103--3d2a6963fc888f7069784dc560979713.txt/MD5E-s103--3d2a6963fc888f7069784dc560979713.txt

+ 1 - 0
datasets/childes_one_utterance_per_line_files/pl.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/XK/Zj/MD5E-s15486053--8e85693f81dcd0d4432983c9d47e568a/MD5E-s15486053--8e85693f81dcd0d4432983c9d47e568a

+ 1 - 0
datasets/childes_one_utterance_per_line_files/pt.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/z1/7V/MD5E-s8140234--9401d864c5b1263e16f740b85ac1b3d9/MD5E-s8140234--9401d864c5b1263e16f740b85ac1b3d9

+ 1 - 0
datasets/childes_one_utterance_per_line_files/sr.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/MW/81/MD5E-s13343895--deccb5e7ad1ac130de88bd7bbb107ece/MD5E-s13343895--deccb5e7ad1ac130de88bd7bbb107ece

+ 1 - 0
datasets/childes_one_utterance_per_line_files/tr.one_utterance_per_line

@@ -0,0 +1 @@
+../../.git/annex/objects/Fw/F5/MD5E-s1229238--b544181cb4b319bb4ef87b348c91a9cb/MD5E-s1229238--b544181cb4b319bb4ef87b348c91a9cb

+ 1 - 0
estimated/da.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/5x/74/MD5E-s28028840--71fbf9fb169884d736da26c047e16f4e.arpa/MD5E-s28028840--71fbf9fb169884d736da26c047e16f4e.arpa

+ 1 - 0
estimated/de.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/Z2/0W/MD5E-s22540364--11e64685c900b25e47a7c2a137dd7a9b.arpa/MD5E-s22540364--11e64685c900b25e47a7c2a137dd7a9b.arpa

+ 1 - 0
estimated/en.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/KG/5q/MD5E-s31436879--847b2a7d2e5210d87f638963a8764808.arpa/MD5E-s31436879--847b2a7d2e5210d87f638963a8764808.arpa

+ 1 - 0
estimated/es.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/Zq/pj/MD5E-s10061705--b466f7fc80c31c74891f85256d324c43.arpa/MD5E-s10061705--b466f7fc80c31c74891f85256d324c43.arpa

+ 1 - 0
estimated/et.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/w4/9Q/MD5E-s18873182--89176dfdd746dd62fe277cf760489709.arpa/MD5E-s18873182--89176dfdd746dd62fe277cf760489709.arpa

+ 1 - 0
estimated/eu.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/vZ/2G/MD5E-s12176188--ae20d403fb51fef0b7572521b95d47a9.arpa/MD5E-s12176188--ae20d403fb51fef0b7572521b95d47a9.arpa

+ 1 - 0
estimated/fr.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/QG/ff/MD5E-s20901089--1873e4fa871af748a4028e962a941b74.arpa/MD5E-s20901089--1873e4fa871af748a4028e962a941b74.arpa

+ 1 - 0
estimated/ja.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/6W/kM/MD5E-s8026445--d320df753b865052827e96c0be67e418.arpa/MD5E-s8026445--d320df753b865052827e96c0be67e418.arpa

+ 1 - 0
estimated/pl.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/5j/46/MD5E-s23833364--0c4492ab80d3c7f37ff923288dc88d80.arpa/MD5E-s23833364--0c4492ab80d3c7f37ff923288dc88d80.arpa

+ 1 - 0
estimated/pt.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/ZF/pz/MD5E-s22346672--1a9f56836b07f9a0d981e329ce47e1c9.arpa/MD5E-s22346672--1a9f56836b07f9a0d981e329ce47e1c9.arpa

+ 1 - 0
estimated/sr.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/6M/xg/MD5E-s20755431--b4f26a89a36c9c4a61bb39a00c83c116.arpa/MD5E-s20755431--b4f26a89a36c9c4a61bb39a00c83c116.arpa

+ 1 - 0
estimated/tr.one_sentence_per_line.arpa

@@ -0,0 +1 @@
+../.git/annex/objects/Gf/70/MD5E-s18935056--4fe9ce073a5c9cb9e601fa1424524c3a.arpa/MD5E-s18935056--4fe9ce073a5c9cb9e601fa1424524c3a.arpa

+ 1 - 0
final_results_analysis.Rmd

@@ -0,0 +1 @@
+.git/annex/objects/QF/K4/MD5E-s8054--5b841a6350a21641fbf42de9283b83a3.Rmd/MD5E-s8054--5b841a6350a21641fbf42de9283b83a3.Rmd

+ 1 - 0
results/results.csv

@@ -0,0 +1 @@
+../.git/annex/objects/V1/Q3/MD5E-s10147494--3d57c9e2bb1e22146849572799f84041.csv/MD5E-s10147494--3d57c9e2bb1e22146849572799f84041.csv