1 year ago · 202ad44f37
--- a/code/__pycache__/make_noiser.cpython-310.pyc
+++ b/code/__pycache__/make_noiser.cpython-310.pyc
--- a/code/download_childes_corpora.py
+++ b/code/download_childes_corpora.py
@@ -246,7 +246,9 @@ if __name__ == "__main__" :
 
																     parser.add_argument("--markers_json",
															
 
																                         help="Json markers that serve for cleaning.",
															
 
																                         required=True)
															
 
																-    parser.add_argument("--phonemize_child", action=BooleanOptionalAction)
															
 
																+    parser.add_argument("--phonemize_child",
															
 
																+        help="Whether phonemize child utterances or not.",
															
 
																+        action=BooleanOptionalAction)
															
 
																     args = parser.parse_args()
															
 
																     phonemize_child_or_not = args.phonemize_child
															
 
																     yaml_file = args.yaml_file
															
--- a/code/download_opensubtitles_corpora.py
+++ b/code/download_opensubtitles_corpora.py
@@ -8,18 +8,18 @@
 
																     and only extract the extract the necessary number of sentences only on these chunks.
															
 
																 """
															
 
																 import os
															
 
																-from random import shuffle
															
 
																 import re
															
 
																+from typing import Iterator
															
 
																+import string
															
 
																+import random
															
 
																 from io import BytesIO
															
 
																+from random import shuffle
															
 
																 import gzip
															
 
																 import yaml
															
 
																 import requests
															
 
																 from tqdm import tqdm
															
 
																-import string
															
 
																 from phonemizer.backend import EspeakBackend
															
 
																 from phonemizer.separator import Separator
															
 
																-from typing import Iterator
															
 
																-import random
															
 
																 random.seed(80)
															
 
																 class DownloadOpenSubtitlesData :
															
@@ -40,11 +40,11 @@ class DownloadOpenSubtitlesData :
 
																         self.base_url  = f"https://opus.nlpl.eu/download.php?f=OpenSubtitles/v{version}/mono/OpenSubtitles.raw."
															
 
																         self.separator = Separator(phone='$', word='@')
															
 
																         self.total_sents = 0
															
 
																-    
															
 
																+
															
 
																     def _remove_ponctuations(self, sentence: str) -> str :
															
 
																         """
															
 
																         Method that removes ponctuations from a given sentence.
															
 
																-        
															
 
																+
															
 
																         Parameters
															
 
																         ----------
															
 
																         - sent : str
															
@@ -60,20 +60,22 @@ class DownloadOpenSubtitlesData :
 
																     def _remove_brackets(self, sentence: str) -> str:
															
 
																         """
															
 
																         Method that removes brackets from a given sentence.
															
 
																-        
															
 
																+
															
 
																         Parameters
															
 
																         ----------
															
 
																         - sentence : str
															
 
																             The sentence for which brackets need to be removed.
															
 
																-        
															
 
																+
															
 
																         Returns
															
 
																         -------
															
 
																         - str :
															
 
																             The sentence without brackets.
															
 
																         """
															
 
																         return re.sub(r"[\(\[].*?[\)\]]", "", sentence)
															
 
																-        
															
 
																-    def get_sentences(self, language: str, max_sents_to_download: int, chunk: int=128) -> Iterator[tuple]:
															
 
																+
															
 
																+    def get_sentences(self, language: str,
															
 
																+                        max_sents_to_download: int,
															
 
																+                        chunk: int=128) -> Iterator[tuple]:
															
 
																         """
															
 
																         Function for getting sentences from opensubtitles for a given language\
															
 
																         and a number of sentences.
															
@@ -84,10 +86,10 @@ class DownloadOpenSubtitlesData :
 
																             The language for which to retrieve the sentences.
															
 
																         - max_sents_to_process : str
															
 
																             The number of sentences to retrieve.
															
 
																-        
															
 
																+
															
 
																         Returns
															
 
																         -------
															
 
																-        - Iterator : 
															
 
																+        - Iterator :
															
 
																             Iterator over sentences and progressbars
															
 
																         """
															
 
																         # stream in order to not load all on memory
															
@@ -102,28 +104,37 @@ class DownloadOpenSubtitlesData :
 
																                 chunk = next(chunks)
															
 
																                 try :
															
 
																                     for sent in gzip.open(BytesIO(chunk), "rt") :
															
 
																-                        if self.total_sents >= max_sents_to_download : 
															
 
																+                        if self.total_sents >= max_sents_to_download :
															
 
																                             break
															
 
																                         else :
															
 
																                             yield sent, progress_bar
															
 
																                 except : # if exception, this means the chunk size is too small for gzip
															
 
																-                    print(f"The chunk size is to small for {max_sents_to_download} sentences to download")
															
 
																+                    print(f"The chunk size is to small for {max_sents_to_download}\
															
 
																+                            sentences to download")
															
 
																                     break
															
 
																-    
															
 
																-    def __call__(self, loaded_yaml_file, train_sentences, dev_sentences, chunk, out_dirname) -> None:
															
 
																+
															
 
																+    def __call__(self,
															
 
																+                loaded_yaml_file,
															
 
																+                train_sentences,
															
 
																+                dev_sentences,
															
 
																+                chunk,
															
 
																+                out_dirname) -> None:
															
 
																         """
															
 
																         Collect the sentences for all languages.
															
 
																         Paramaters
															
 
																         ----------
															
 
																         - loaded_yaml_file : dict
															
 
																-            This dictionary contains all informations relevant for this study, for each language. \
															
 
																-            This dictionary also contains informations about espeak ids for the languages, and this is relevant
															
 
																-            for phonemization.
															
 
																+            This dictionary contains all informations relevant\
															
 
																+            for this study, for each language. This dictionary also\
															
 
																+            contains informations about espeak ids for the languages,\
															
 
																+            and this is relevant for phonemization.
															
 
																         - train_sentences : int
															
 
																-            Number of sentences to download for train corpora. This number is the same for all languages.
															
 
																+            Number of sentences to download for train corpora.\
															
 
																+            This number is the same for all languages.
															
 
																         - dev_sentences : int
															
 
																-            Number of sentences to download for dev corpora. This number is the same for all languages.
															
 
																+            Number of sentences to download for dev corpora.\
															
 
																+            This number is the same for all languages.
															
 
																         - out_dirname : str
															
 
																             The folder where the outputs will be saved.
															
 
																         """
															
@@ -154,7 +165,7 @@ class DownloadOpenSubtitlesData :
 
																             shuffle(added_sents)
															
 
																             train = added_sents[:train_sentences]
															
 
																             dev = added_sents[train_sentences:max_sents_to_download]
															
 
																-            for sent_train in train : 
															
 
																+            for sent_train in train :
															
 
																                 output_file_train.write(sent_train + "\n")
															
 
																             for sent_dev in dev :
															
 
																                 output_file_dev.write(sent_dev + "\n")
															
@@ -165,22 +176,24 @@ if __name__ == "__main__" :
 
																     parser = ArgumentParser()
															
 
																     parser.add_argument("--yaml_file",
															
 
																-                        help="YAML File containing for each language, all relevant information for downloading the data.",
															
 
																+                        help="YAML File containing for each language,\
															
 
																+                            all relevant information for downloading the data.",
															
 
																                         required=True)
															
 
																     parser.add_argument("--out_dirname",
															
 
																                         help="The directory where outputs will be stored.",
															
 
																                         required=True)
															
 
																     parser.add_argument("--chunk",
															
 
																-                        help="For the chunk size. This number should grow as much as you want to download many sentences.\
															
 
																+                        help="For the chunk size. This number should\
															
 
																+                            grow as much as you want to download many sentences.\
															
 
																                             256 is a good number when you want to get 1_000_000 or less sentences",
															
 
																                         default=1024,
															
 
																                         required=False)
															
 
																     parser.add_argument("--train_sentences",
															
 
																-                        help="Number of sent for the train corpora.",
															
 
																+                        help="Number of sent for the training corpora.",
															
 
																                         default=200_000,
															
 
																                         required=False)
															
 
																     parser.add_argument("--dev_sentences",
															
 
																-                        help="Number of sent for the dev copora.",
															
 
																+                        help="Number of sent for the dev or test copora.",
															
 
																                         default=10_000,
															
 
																                         required=False)
															
 
																     args = parser.parse_args()
															
@@ -196,5 +209,8 @@ if __name__ == "__main__" :
 
																         os.makedirs(f"{out_dirname}/tokenized_in_phonemes_dev")
															
 
																     languages_to_download_informations = yaml.safe_load(open(args.yaml_file))
															
 
																     downloader = DownloadOpenSubtitlesData()
															
 
																-    downloader(languages_to_download_informations, args.train_sentences, args.dev_sentences, chunk, out_dirname)
															
 
																-
															
 
																+    downloader(languages_to_download_informations,
															
 
																+                args.train_sentences,
															
 
																+                args.dev_sentences,
															
 
																+                chunk,
															
 
																+                out_dirname)
															
--- a/code/evaluate_language_models.py
+++ b/code/evaluate_language_models.py
@@ -0,0 +1,85 @@
 
																+"""This module implements a function that\
															
 
																+    evaluate the trained language moedels"""
															
 
																+import os
															
 
																+from math import log
															
 
																+import random
															
 
																+import pandas as pd
															
 
																+import kenlm
															
 
																+random.seed(1023)
															
 
																+
															
 
																+LANGUAGES_TYPOLOGIES = {
															
 
																+    'da' : ("Danish", "fusional"),
															
 
																+    'de' : ("German", "fusional"),
															
 
																+    'en' : ("English", "fusional"),
															
 
																+    'es' : ("Spanish", "fusional"),
															
 
																+    'et' : ("Estonian", "agglutinative"),
															
 
																+    'eu' : ("Basque", "agglutinative"),
															
 
																+    'fr' : ("French", "fusional"),
															
 
																+    'ja' : ("Japanese", "agglutinative"),
															
 
																+    'pl' : ("Polish", "fusional"),
															
 
																+    'pt' : ("Portuguese", "fusional"),
															
 
																+    'sr' : ("Serbian", "fusional"),
															
 
																+    'tr' : ("Turkish", "agglutinative")}
															
 
																+
															
 
																+def evaluate(train_files_directory: str,
															
 
																+                dev_files_directory: str,
															
 
																+                models_directory: str) -> pd.DataFrame:
															
 
																+    """
															
 
																+    This function will compute the entropies of\
															
 
																+    test files for all languages.
															
 
																+
															
 
																+    Parameters
															
 
																+    ----------
															
 
																+    - train_files_directory: str
															
 
																+        The path to the directory containing training files.
															
 
																+    - dev_files_directory: str
															
 
																+        The path to the directory containing testing/development files.
															
 
																+    - models_directory: str
															
 
																+        The path to the directory containing training trained\
															
 
																+        language models.
															
 
																+    """
															
 
																+    triplets_files_model = zip(sorted(os.listdir(train_files_directory)),
															
 
																+                                sorted(os.listdir(dev_files_directory)),
															
 
																+                                sorted(os.listdir(models_directory)))
															
 
																+    columns = ["language", "train_entropy", "dev_entropy"]
															
 
																+    evaluation = pd.DataFrame(columns=columns, index=None)
															
 
																+    for train_filename, dev_filename, model_filename in triplets_files_model :
															
 
																+        language, _ = train_filename.split(".")
															
 
																+        model = model = kenlm.Model(f"{models_directory}/{model_filename}")
															
 
																+        train_sents = "\n".join(sent.strip() for sent in open(f"{train_files_directory}/{train_filename}"))
															
 
																+        train_entropy = log(model.perplexity(train_sents))
															
 
																+        dev_sents = "\n".join(sent.strip() for sent in open(f"{dev_files_directory}/{dev_filename}"))
															
 
																+        dev_entropy = log(model.perplexity(dev_sents))
															
 
																+        new_row = {
															
 
																+            "language" : LANGUAGES_TYPOLOGIES[language][0],
															
 
																+            "train_entropy" : train_entropy,
															
 
																+            "dev_entropy" : dev_entropy
															
 
																+        }
															
 
																+        evaluation = evaluation.append(new_row, ignore_index=True)
															
 
																+    return evaluation
															
 
																+if __name__ == "__main__":
															
 
																+    import argparse
															
 
																+
															
 
																+    parser = argparse.ArgumentParser()
															
 
																+    parser.add_argument('--train_files_directory',
															
 
																+        required=True,
															
 
																+        help="The directory containing the OpenSubtitles training files"
															
 
																+        )
															
 
																+    parser.add_argument('--dev_files_directory',
															
 
																+        required=True,
															
 
																+        help="The directory containing the OpenSubtitles test files"
															
 
																+        )
															
 
																+    parser.add_argument('--models_directory',
															
 
																+        required=True,
															
 
																+        help="The directory containing the trained language models"
															
 
																+        )
															
 
																+
															
 
																+    args = parser.parse_args()
															
 
																+    train_files = args.train_files_directory
															
 
																+    dev_files = args.dev_files_directory
															
 
																+    models_directory = args.models_directory
															
 
																+    if not os.path.exists("results"):
															
 
																+        os.makedirs("results")
															
 
																+    evaluate(train_files,
															
 
																+                dev_files,
															
 
																+                models_directory).to_csv("results/evaluation.csv")
															
--- a/code/make_noiser.py
+++ b/code/make_noiser.py
@@ -9,32 +9,38 @@ random.seed(80)
 
																 class Noise :
															
 
																     """
															
 
																-    This class simulate noise in the data. Crucially, noise can be made on three points :\
															
 
																-    (1) The noise of phonemes order of a given sequence by making the order of the sequence more aribitrary,\
															
 
																-    (2) Replacement of some phonemes of a given sequence by arbitrary sampled phonemes from a vocabulary and\
															
 
																+    This class simulate noise in the data. Crucially,\
															
 
																+    noise can be made on three points :\
															
 
																+    (1) The noise of phonemes order of a given sequence\
															
 
																+    by making the order of the sequence more aribitrary,\
															
 
																+    (2) Replacement of some phonemes of a given sequence\
															
 
																+    by arbitrary sampled phonemes from a vocabulary and\
															
 
																     (3) By arbitrary interverting some sequences of two different speakers.
															
 
																     Atributes
															
 
																     ---------
															
 
																     - phonemes_order_noise :
															
 
																-        Parameter for controling the degree of noise at the level of phonemes order. See the point 1 mentioned above.
															
 
																+        Parameter for controling the degree of noise at the level\
															
 
																+        of phonemes order. See the point 1 mentioned above.
															
 
																     - speakers_noise :
															
 
																-        Parameters for controling the degree of noise at the level of speakers. See the point 3 mentioned above.
															
 
																+        Parameters for controling the degree of noise at the level\
															
 
																+        of speakers. See the point 3 mentioned above.
															
 
																     - phonemes_noise :
															
 
																-        Parameter for controling the degree of noise at the level of phonemes. See the point 2 mentioned above.
															
 
																+        Parameter for controling the degree of noise at the level of phonemes.
															
 
																+        See the point 2 mentioned above.
															
 
																     """
															
 
																     def __init__(self,
															
 
																                     most_probable_phonemes: list,
															
 
																-                    phonemes_order_noise=0.3,
															
 
																-                    speakers_noise=(0.5, 0.5),
															
 
																-                    phonemes_noise=0.5) :
															
 
																+                    phonemes_order_noise_value=0.3,
															
 
																+                    speakers_noise_values=(0.5, 0.5),
															
 
																+                    phonemes_noise_value=0.5) :
															
 
																         self.most_probable_phonemes = most_probable_phonemes
															
 
																-        self.phonemes_order_noise = phonemes_order_noise
															
 
																-        self.speakers_noise = speakers_noise
															
 
																-        self.phonemes_noise = phonemes_noise
															
 
																-    
															
 
																-    def _order_noise(self, sequence: List[str]) -> str :
															
 
																+        self.phonemes_order_noise_value = phonemes_order_noise_value
															
 
																+        self.speakers_noise_values = speakers_noise_values
															
 
																+        self.phonemes_noise_value = phonemes_noise_value
															
 
																+
															
 
																+    def order_noise(self, sequence: List[str]) -> str :
															
 
																         """
															
 
																         Making noise the order of the phonemes in a given sequence
															
@@ -42,24 +48,24 @@ class Noise :
 
																         ----------
															
 
																         - sequence : list
															
 
																             The sequence for which the phonemes order must be noised.
															
 
																-        
															
 
																+
															
 
																         Returns
															
 
																         -------
															
 
																         - str :
															
 
																             The sequence with the order of phonemes noised.
															
 
																         """
															
 
																         # number of phonemes to noise in the sequence = len(sequence) / nb_phonemes_to_noise
															
 
																-        phonemes_to_noise = round(len(sequence) * self.phonemes_order_noise)
															
 
																+        phonemes_to_noise = round(len(sequence) * self.phonemes_order_noise_value)
															
 
																         # sample nb_phonemes_to_noise positions in the sequence
															
 
																         positions_sampled = list(sample(range(len(sequence)), k=phonemes_to_noise))
															
 
																         copied_positions = copy.deepcopy(positions_sampled)
															
 
																         shuffle(copied_positions)
															
 
																-        # change the positions of the sampled phonemes 
															
 
																+        # change the positions of the sampled phonemes
															
 
																         for original_position, new_position in zip(positions_sampled, copied_positions):
															
 
																             sequence[original_position] = sequence[new_position]
															
 
																         return " ".join(sequence)
															
 
																-    def _phonemes_noise(self, sequence: List[str]) -> str :
															
 
																+    def phonemes_noise(self, sequence: List[str]) -> str :
															
 
																         """
															
 
																         Makinng noise the phonemes of the sequence by replacing\
															
 
																         some phonemes of the sequence by arbitrary sampled phonemes\
															
@@ -69,13 +75,14 @@ class Noise :
 
																         ----------
															
 
																         - sequence : list
															
 
																             The sequence for which the phonemes must be noised.
															
 
																-        
															
 
																+
															
 
																         Returns
															
 
																         -------
															
 
																         - str :
															
 
																             The sequence with noised phonemes.
															
 
																         """
															
 
																-        phonemes_to_noise = round(len(sequence) * self.phonemes_noise)
															
 
																+        phonemes_to_noise = round(len(sequence) * self.phonemes_noise_value)
															
 
																+        assert phonemes_to_noise < len(sequence), "Number of phoneme to noise greather that sequence's length"
															
 
																         indexes = choices(range(len(sequence)), k=phonemes_to_noise)
															
 
																         # choose new phonemes only from the most probable phonemes.
															
 
																         phonemes = choices(self.most_probable_phonemes, k=phonemes_to_noise)
															
@@ -83,8 +90,8 @@ class Noise :
 
																         for idx, phonemes in zip(indexes, phonemes) :
															
 
																             sequence[idx] = phonemes
															
 
																         return " ".join(sequence)
															
 
																-    
															
 
																-    def _speakers_noise(self, speakers_sequences: Dict[str, set]) -> Dict[str, set] :
															
 
																+
															
 
																+    def speakers_noise(self, speakers_sequences: Dict[str, set]) -> Dict[str, set] :
															
 
																         """
															
 
																         Making noise in the speaker's statements.
															
@@ -92,14 +99,15 @@ class Noise :
 
																         ----------
															
 
																         - speakers_sequences : dict
															
 
																             Dictionary containing the utterances for each speaker.
															
 
																-        
															
 
																+
															
 
																         Returns
															
 
																         -------
															
 
																         - dict :
															
 
																-            The dictionary containing the few statements interchanged between the two speakers.
															
 
																+            The dictionary containing the few statements\
															
 
																+            interchanged between the two speakers.
															
 
																         """
															
 
																         first_speaker, second_speaker = "Target_Child", "Adult"
															
 
																-        noise_first_speaker, noise_second_speaker = self.speakers_noise
															
 
																+        noise_first_speaker, noise_second_speaker = self.speakers_noise_values
															
 
																         speakers_sequences[second_speaker] = set(speakers_sequences[second_speaker])
															
 
																         speakers_sequences[first_speaker] = set(speakers_sequences[first_speaker])
															
 
																         # sample some percentage of utterances from each speaker
															
@@ -115,9 +123,9 @@ class Noise :
 
																         # set to list
															
 
																         speakers_sequences[first_speaker] = list(speakers_sequences[first_speaker])
															
 
																         speakers_sequences[second_speaker] = list(speakers_sequences[second_speaker])
															
 
																-        
															
 
																+
															
 
																         return speakers_sequences
															
 
																-    
															
 
																+
															
 
																     def __call__(self, loaded_dataset: dict) -> dict:
															
 
																         """
															
 
																         Apply the three types of noise.
															
@@ -125,8 +133,9 @@ class Noise :
 
																         Parameters
															
 
																         ----------
															
 
																         loaded_dataset : dict
															
 
																-            The dictionary containing the utterances for each family, at each and for each speaker.
															
 
																-        
															
 
																+            The dictionary containing the utterances for each family,\
															
 
																+            at each and for each speaker.
															
 
																+
															
 
																         Returns
															
 
																         -------
															
 
																         dict :
															
@@ -137,15 +146,17 @@ class Noise :
 
																                 if "Adult" in loaded_dataset[family][age] :
															
 
																                     for idx, utterance in enumerate(loaded_dataset[family][age]["Adult"]) :
															
 
																                         splitted_utterance = utterance.split(" ")
															
 
																-                        loaded_dataset[family][age]["Adult"][idx] = self._order_noise(splitted_utterance)
															
 
																-                        loaded_dataset[family][age]["Adult"][idx] = self._phonemes_noise(splitted_utterance)
															
 
																+                        loaded_dataset[family][age]["Adult"][idx] = self.order_noise(splitted_utterance)
															
 
																+                        loaded_dataset[family][age]["Adult"][idx] = self.phonemes_noise(splitted_utterance)
															
 
																                 if "Target_Child" in loaded_dataset[family][age] :
															
 
																                     for idx, utterance in enumerate(loaded_dataset[family][age]["Target_Child"]) :
															
 
																                         splitted_utterance = utterance.split(" ")
															
 
																-                        loaded_dataset[family][age]["Target_Child"][idx] = self._order_noise(splitted_utterance)
															
 
																-                        loaded_dataset[family][age]["Target_Child"][idx] = self._phonemes_noise(splitted_utterance)
															
 
																-                if "Target_Child" in loaded_dataset[family][age] and "Adult" in loaded_dataset[family][age] and all(self.speakers_noise):
															
 
																-                    noised_speaker = self._speakers_noise(loaded_dataset[family][age])
															
 
																+                        loaded_dataset[family][age]["Target_Child"][idx] = self.order_noise(splitted_utterance)
															
 
																+                        loaded_dataset[family][age]["Target_Child"][idx] = self.phonemes_noise(splitted_utterance)
															
 
																+                if("Target_Child" in loaded_dataset[family][age]
															
 
																+                    and "Adult" in loaded_dataset[family][age]
															
 
																+                    and all(self.speakers_noise_values)):
															
 
																+                    noised_speaker = self.speakers_noise(loaded_dataset[family][age])
															
 
																                     loaded_dataset[family][age]["Target_Child"] = noised_speaker["Target_Child"]
															
 
																                     loaded_dataset[family][age]["Adult"] = noised_speaker["Adult"]
															
 
																-        return loaded_dataset
															
 
																+        return loaded_dataset
															
--- a/code/test_on_all_languages.py
+++ b/code/test_on_all_languages.py
@@ -1,20 +1,14 @@
 
																 import os
															
 
																-import sys
															
 
																+import random
															
 
																 import json
															
 
																-sys.path.append("./")
															
 
																-sys.path.append("../")
															
 
																-sys.path.append(".../")
															
 
																+from math import log
															
 
																+from typing import Iterable
															
 
																 from itertools import product
															
 
																 from tqdm import tqdm
															
 
																 import kenlm
															
 
																-from math import log
															
 
																-import numpy as np
															
 
																 from make_noiser import Noise
															
 
																 import pandas as pd
															
 
																-import sys
															
 
																 from get_most_probable_phonemes import get_most_probable_phonemes
															
 
																-import random
															
 
																-from collections import Counter
															
 
																 random.seed(1023)
															
@@ -26,29 +20,29 @@ LANGUAGES_TYPOLOGIES = {
 
																     'et' : ("Estonian", "agglutinative"),
															
 
																     'eu' : ("Basque", "agglutinative"),
															
 
																     'fr' : ("French", "fusional"),
															
 
																-    'ja' : ("Japanese", "agglutinative"), 
															
 
																-    'pl' : ("Polish", "fusional"), 
															
 
																-    'pt' : ("Portuguese", "fusional"), 
															
 
																-    'sr' : ("Serbian", "fusional"), 
															
 
																+    'ja' : ("Japanese", "agglutinative"),
															
 
																+    'pl' : ("Polish", "fusional"),
															
 
																+    'pt' : ("Portuguese", "fusional"),
															
 
																+    'sr' : ("Serbian", "fusional"),
															
 
																     'tr' : ("Turkish", "agglutinative")}
															
 
																-def compute_word_frequencies(word_train_corpus, pct=0.95) :
															
 
																-    frequencies = Counter()
															
 
																-    for line in word_train_corpus :
															
 
																-        line = line.strip()
															
 
																-        if not line : continue
															
 
																-        # line = line.strip()
															
 
																-        frequencies.update(Counter(line.split(" ")))
															
 
																-    return dict(frequencies)
															
 
																-
															
 
																-
															
 
																-def statistics_word(utterances, word_frequencies, model) :
															
 
																+def statistics_word(utterances: list, model: kenlm.Model) -> dict:
															
 
																+    """
															
 
																+    This function will test a given language model\
															
 
																+    on a given list of utterances.\
															
 
																+    The function will also compute some statistics; MLU, TTR, etc
															
 
																+
															
 
																+    Parameters
															
 
																+    ----------
															
 
																+    - model
															
 
																+        The estimated language model
															
 
																+    - utterances: list
															
 
																+        The utterances to test
															
 
																+    """
															
 
																     phoneme_utterances = []
															
 
																     unique_words = set()
															
 
																-    nb_unk = 0
															
 
																     mlu_w = 0.0
															
 
																     mlu_p = 0.0
															
 
																-    mean_word_frequencies = 0
															
 
																     nb_utterances = 0
															
 
																     nb_words = 0
															
@@ -68,13 +62,6 @@ def statistics_word(utterances, word_frequencies, model) :
 
																         nb_words += len(utterance_words)
															
 
																         unique_words |= set(utterance_words)
															
 
																-        for word in utterance_words :
															
 
																-            word = word.strip()
															
 
																-            if word in word_frequencies :
															
 
																-                mean_word_frequencies += word_frequencies[word]
															
 
																-            else : 
															
 
																-                nb_unk += 1
															
 
																-    
															
 
																     mlu_w /= nb_utterances
															
 
																     mlu_p /= nb_utterances
															
 
																     ttr_w = len(unique_words) / nb_words
															
@@ -87,60 +74,62 @@ def statistics_word(utterances, word_frequencies, model) :
 
																     statistics["mlu_w"] = mlu_w
															
 
																     statistics["mlu_p"] = mlu_p
															
 
																     statistics["ttr_w"] = ttr_w
															
 
																-    statistics["mean_word_frequencies"] = mean_word_frequencies
															
 
																-    statistics["nb_unk"] = nb_unk
															
 
																     return statistics
															
 
																-def create_sparse_combinantions(values) :
															
 
																+def create_sparse_combinantions(values: Iterable) -> set:
															
 
																+    """
															
 
																+    This function will create combinantions for noising.
															
 
																+    Each item in the returned set contains four values corresponding\
															
 
																+    to (1) phoneme noise, (2) noise of from adult to child utterances,\
															
 
																+    (3) noise of from child to adult utterances and (4) noise of
															
 
																+    These combinantions are sparse because we only noise one value at time.
															
 
																+    For example, an item can be (0.0, 0.0, 0.0, 0.25), which means that we only
															
 
																+    noise 25 percent of the phonemes, and nothing else is affected.
															
 
																+    See the file make_noiser.py for more infomrations.
															
 
																+    """
															
 
																     sparse_combinantions = []
															
 
																     for value in values :
															
 
																-        for idx in range(len(values)) : 
															
 
																+        for idx in range(len(values)) :
															
 
																             sparse_values = [0.0] * len(values)
															
 
																             sparse_values[idx] = value
															
 
																             sparse_combinantions.append(tuple(sparse_values))
															
 
																     return set(sparse_combinantions)
															
 
																-def test(json_files_directory, models_directory, phoneme_train_files, word_train_files, add_noise=False) :
															
 
																+def test(json_files_directory, models_directory, train_files, add_noise=True) :
															
 
																     """
															
 
																+    This function will test the language models on CHILDES corpora
															
 
																     """
															
 
																     columns = ["language", "typology", "family", "speaker",\
															
 
																                 "age", "perplexity", "entropy", "mlu", "mlu_without_repetition",\
															
 
																                 "phonemes_order_noise", "speakers_noise_adult",\
															
 
																                 "speakers_noise_child", "phonemes_noise"]
															
 
																     results = pd.DataFrame(columns=columns, index=None)
															
 
																-    all_combinations = list(product((0.0, 0.25, 0.5, 0.75), repeat=4)) if add_noise else [((0.0, 0.0, 0.0, 0.0))]
															
 
																-    # sparse_combinantions = create_sparse_combinantions((0.0, 0.25, 0.5, 0.75))
															
 
																+    # all_combinations = (list(product((0.0, 0.25, 0.5, 0.75), repeat=4))
															
 
																+    #                       if add_noise else [((0.0, 0.0, 0.0, 0.0))])
															
 
																+    sparse_combinantions = create_sparse_combinantions((0.0, 0.25, 0.5, 0.75))
															
 
																     # noise_values = np.linspace(0.0, 1.0, num=6)
															
 
																-    for phonemes_noise, speakers_noise_child, speakers_noise_adult, phonemes_order_noise in tqdm(all_combinations, total=len(all_combinations)) :
															
 
																+    for phonemes_noise, speakers_noise_child, speakers_noise_adult, phonemes_order_noise in tqdm(sparse_combinantions, total=len(sparse_combinantions)) :
															
 
																         for test_filename, model_filename in product(os.listdir(json_files_directory), os.listdir(models_directory)) :
															
 
																             lg_iso, _ = test_filename.split(".")
															
 
																             model_lg = model_filename.split(".")[0]
															
 
																-            if lg_iso != model_lg : continue
															
 
																-            print(lg_iso, model_lg)
															
 
																-            most_probable_phonemes = get_most_probable_phonemes(f"{phoneme_train_files}/{lg_iso}.one_sentence_per_line")
															
 
																-            word_frequencies = compute_word_frequencies(f"{word_train_files}/{lg_iso}.one_sentence_per_line")
															
 
																+            if lg_iso != model_lg :
															
 
																+                continue
															
 
																+            most_probable_phonemes = get_most_probable_phonemes(f"{train_files}/{lg_iso}.one_sentence_per_line")
															
 
																             loaded_json = json.load(open(f"{json_files_directory}/{test_filename}"))
															
 
																             if add_noise :
															
 
																                 noise = Noise(most_probable_phonemes,
															
 
																-                                phonemes_order_noise=phonemes_order_noise,
															
 
																-                                speakers_noise=(speakers_noise_child, speakers_noise_adult),
															
 
																-                                phonemes_noise=phonemes_noise)
															
 
																+                                phonemes_order_noise_value=phonemes_order_noise,
															
 
																+                                speakers_noise_values=(speakers_noise_child, speakers_noise_adult),
															
 
																+                                phonemes_noise_value=phonemes_noise)
															
 
																                 loaded_json = noise(loaded_json)
															
 
																             model = kenlm.Model(f"{models_directory}/{model_filename}")
															
 
																             for family in loaded_json :
															
 
																-                for age in loaded_json[family] : 
															
 
																+                for age in loaded_json[family] :
															
 
																                     if age == "None" : print(family, lg_iso, age); continue
															
 
																                     for speaker in loaded_json[family][age] :
															
 
																                         if speaker not in ["Adult", "Target_Child"] : continue
															
 
																-                        # test_utterances = "\n".join(loaded_json[family][age][speaker])
															
 
																-                        # utterances = [utterance.split(" ") for utterance in loaded_json[family][age][speaker]]
															
 
																-                        # mlu = np.mean([len(utterance) for utterance in utterances])
															
 
																-                        # mlu_without_repetition = np.mean([len(set(utterance)) for utterance in utterances])
															
 
																-                        # ppl = model.perplexity(test_utterances)
															
 
																-                        # entropy = log(ppl)
															
 
																-
															
 
																-                        results_statistics = statistics_word(loaded_json[family][age][speaker], word_frequencies, model)
															
 
																+                        results_statistics = statistics_word(loaded_json[family][age][speaker], model)
															
 
																                         language, typology = LANGUAGES_TYPOLOGIES[lg_iso]
															
 
																                         new_row =  {"language" : language,
															
 
																                                     "typology" : typology,
															
@@ -152,8 +141,6 @@ def test(json_files_directory, models_directory, phoneme_train_files, word_train
 
																                                     "mlu_w" : results_statistics["mlu_w"],
															
 
																                                     "mlu_p" : results_statistics["mlu_p"],
															
 
																                                     "ttr_w" : results_statistics["ttr_w"],
															
 
																-                                    "mean_word_frequencies" : results_statistics["mean_word_frequencies"],
															
 
																-                                    "nb_unk" : results_statistics["nb_unk"],
															
 
																                                     "phonemes_order_noise" : phonemes_order_noise,
															
 
																                                     "speakers_noise_adult" : speakers_noise_adult,
															
 
																                                     "speakers_noise_child" : speakers_noise_child,
															
@@ -164,46 +151,33 @@ if __name__ == "__main__":
 
																     from argparse import ArgumentParser, BooleanOptionalAction
															
 
																     parser = ArgumentParser()
															
 
																-    parser.add_argument('--phoneme_train_directory',
															
 
																-        required=True,
															
 
																-        help="Dataset containing the train files in phonemes (dot one_sentence_per_line) "
															
 
																-        )
															
 
																-    parser.add_argument('--word_train_directory',
															
 
																+    parser.add_argument('--train_directory',
															
 
																         required=True,
															
 
																-        help="Dataset containing the train files in words (dot one_sentence_per_line) "
															
 
																+        help="The directory containing the train files tokenized in phonemes."
															
 
																         )
															
 
																-    parser.add_argument('--models_directory', 
															
 
																+    parser.add_argument('--models_directory',
															
 
																         required=True,
															
 
																-        help="Folder containing the estimated parameters"
															
 
																+        help="The directory containing the trained language models."
															
 
																         )
															
 
																-    
															
 
																+
															
 
																     parser.add_argument('--json_files_directory',
															
 
																         required=True,
															
 
																-        help="Directory containing json files for test"
															
 
																+        help="The directory containing CHILDES utterances in json format for each language"
															
 
																         )
															
 
																-    
															
 
																-    parser.add_argument('--out_dirname',
															
 
																-        required=True,
															
 
																-        help="Out directory"
															
 
																-        )
															
 
																-    parser.add_argument('--out_filename',
															
 
																-            required=True,
															
 
																-            help="Out filename"
															
 
																-            )
															
 
																-    parser.add_argument("--add_noise", action=BooleanOptionalAction)
															
 
																+
															
 
																+    parser.add_argument("--add_noise",
															
 
																+        help="Whether noise the CHILDES utterances or not",
															
 
																+        action=BooleanOptionalAction)
															
 
																     args = parser.parse_args()
															
 
																     add_noise = args.add_noise
															
 
																     json_files_directory = args.json_files_directory
															
 
																-    phoneme_train_files, word_train_files = args.phoneme_train_directory, args.word_train_directory
															
 
																+    phoneme_train_files = args.train_directory
															
 
																     models_directory = args.models_directory
															
 
																-    out_dirname = args.out_dirname
															
 
																-    out_filename = args.out_filename
															
 
																     if not os.path.exists("results"):
															
 
																         os.makedirs("results")
															
 
																-    test(json_files_directory, 
															
 
																-        models_directory, 
															
 
																-        phoneme_train_files,
															
 
																-        word_train_files, 
															
 
																-        add_noise).to_csv(f"{out_dirname}/{out_filename}.csv")
															
 
																+    test(json_files_directory,
															
 
																+            models_directory,
															
 
																+            phoneme_train_files,
															
 
																+            add_noise=add_noise).to_csv("results/results.csv")
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/da.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/da.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/Xf/J3/MD5E-s15872283--589ddd6867dae20a40048aaed160f751/MD5E-s15872283--589ddd6867dae20a40048aaed160f751
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/de.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/de.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/PK/VG/MD5E-s16478486--16c8639a6e1bc848b9e7c835138bc779/MD5E-s16478486--16c8639a6e1bc848b9e7c835138bc779
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/en.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/en.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/k5/41/MD5E-s24346046--828c32d227d0c2d934ac85954c0549be/MD5E-s24346046--828c32d227d0c2d934ac85954c0549be
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/es.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/es.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/Vj/Zq/MD5E-s15998814--588f8969805d7e19a2fb8555c805c7bf/MD5E-s15998814--588f8969805d7e19a2fb8555c805c7bf
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/et.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/et.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/kK/m3/MD5E-s16250386--1fa013016100de4c1369aaaf5b51c3bc/MD5E-s16250386--1fa013016100de4c1369aaaf5b51c3bc
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/eu.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/eu.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/Z5/WJ/MD5E-s10355435--f2c85a6f418cd351e643387f77f80466/MD5E-s10355435--f2c85a6f418cd351e643387f77f80466
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/fr.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/fr.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/xQ/Mz/MD5E-s13149110--66bc34c1a1005f80f734ff7c9e0330ef/MD5E-s13149110--66bc34c1a1005f80f734ff7c9e0330ef
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/ja.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/ja.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/Fx/P8/MD5E-s14790606--0468f28b5726a09d980c07fb217974e8/MD5E-s14790606--0468f28b5726a09d980c07fb217974e8
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/pl.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/pl.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/8k/jP/MD5E-s27403501--65e8c8d45fe6075ab088da985dde5bfd/MD5E-s27403501--65e8c8d45fe6075ab088da985dde5bfd
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/pt.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/pt.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/pq/Gx/MD5E-s19442758--174c64a1b49c9d94615dfcfb5c52508b/MD5E-s19442758--174c64a1b49c9d94615dfcfb5c52508b
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/sr.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/sr.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/VK/59/MD5E-s15029711--76eb34c274a6e5ab100575ce2fa60ab0/MD5E-s15029711--76eb34c274a6e5ab100575ce2fa60ab0
															
--- a/datasets/opensubtitles_corpora/tokenized_in_words/tr.one_sentence_per_line
+++ b/datasets/opensubtitles_corpora/tokenized_in_words/tr.one_sentence_per_line
@@ -1 +0,0 @@
 
																-../../../.git/annex/objects/FM/Vk/MD5E-s22131015--b159f0714684e9fc60945904418a1240/MD5E-s22131015--b159f0714684e9fc60945904418a1240
															
--- a/environment.yml
+++ b/environment.yml
@@ -0,0 +1 @@
 
																+.git/annex/objects/V8/56/MD5E-s5478--6ce1a1dfc33f3c2aee2a0c4f0f11aa02.yml/MD5E-s5478--6ce1a1dfc33f3c2aee2a0c4f0f11aa02.yml
															
--- a/estimated/da.one_sentence_per_line.arpa
+++ b/estimated/da.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/5x/74/MD5E-s28028840--71fbf9fb169884d736da26c047e16f4e.arpa/MD5E-s28028840--71fbf9fb169884d736da26c047e16f4e.arpa
															
--- a/estimated/de.one_sentence_per_line.arpa
+++ b/estimated/de.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/Z2/0W/MD5E-s22540364--11e64685c900b25e47a7c2a137dd7a9b.arpa/MD5E-s22540364--11e64685c900b25e47a7c2a137dd7a9b.arpa
															
--- a/estimated/en.one_sentence_per_line.arpa
+++ b/estimated/en.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/KG/5q/MD5E-s31436879--847b2a7d2e5210d87f638963a8764808.arpa/MD5E-s31436879--847b2a7d2e5210d87f638963a8764808.arpa
															
--- a/estimated/es.one_sentence_per_line.arpa
+++ b/estimated/es.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/Zq/pj/MD5E-s10061705--b466f7fc80c31c74891f85256d324c43.arpa/MD5E-s10061705--b466f7fc80c31c74891f85256d324c43.arpa
															
--- a/estimated/et.one_sentence_per_line.arpa
+++ b/estimated/et.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/w4/9Q/MD5E-s18873182--89176dfdd746dd62fe277cf760489709.arpa/MD5E-s18873182--89176dfdd746dd62fe277cf760489709.arpa
															
--- a/estimated/eu.one_sentence_per_line.arpa
+++ b/estimated/eu.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/vZ/2G/MD5E-s12176188--ae20d403fb51fef0b7572521b95d47a9.arpa/MD5E-s12176188--ae20d403fb51fef0b7572521b95d47a9.arpa
															
--- a/estimated/fr.one_sentence_per_line.arpa
+++ b/estimated/fr.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/QG/ff/MD5E-s20901089--1873e4fa871af748a4028e962a941b74.arpa/MD5E-s20901089--1873e4fa871af748a4028e962a941b74.arpa
															
--- a/estimated/ja.one_sentence_per_line.arpa
+++ b/estimated/ja.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/6W/kM/MD5E-s8026445--d320df753b865052827e96c0be67e418.arpa/MD5E-s8026445--d320df753b865052827e96c0be67e418.arpa
															
--- a/estimated/pl.one_sentence_per_line.arpa
+++ b/estimated/pl.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/5j/46/MD5E-s23833364--0c4492ab80d3c7f37ff923288dc88d80.arpa/MD5E-s23833364--0c4492ab80d3c7f37ff923288dc88d80.arpa
															
--- a/estimated/pt.one_sentence_per_line.arpa
+++ b/estimated/pt.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/ZF/pz/MD5E-s22346672--1a9f56836b07f9a0d981e329ce47e1c9.arpa/MD5E-s22346672--1a9f56836b07f9a0d981e329ce47e1c9.arpa
															
--- a/estimated/sr.one_sentence_per_line.arpa
+++ b/estimated/sr.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/6M/xg/MD5E-s20755431--b4f26a89a36c9c4a61bb39a00c83c116.arpa/MD5E-s20755431--b4f26a89a36c9c4a61bb39a00c83c116.arpa
															
--- a/estimated/tr.one_sentence_per_line.arpa
+++ b/estimated/tr.one_sentence_per_line.arpa
@@ -0,0 +1 @@
 
																+../.git/annex/objects/Gf/70/MD5E-s18935056--4fe9ce073a5c9cb9e601fa1424524c3a.arpa/MD5E-s18935056--4fe9ce073a5c9cb9e601fa1424524c3a.arpa
															
--- a/results/evaluation.csv
+++ b/results/evaluation.csv
@@ -0,0 +1 @@
 
																+../.git/annex/objects/5g/Gj/MD5E-s607--3a8fb15dbd039d29e12a0e126c73112d.csv/MD5E-s607--3a8fb15dbd039d29e12a0e126c73112d.csv
															
--- a/results/results_for_study2_datalad.csv
+++ b/results/results_for_study2_datalad.csv
@@ -1 +0,0 @@
 
																-../.git/annex/objects/gg/pJ/MD5E-s1271727--76f1f59ac68fe619ae34bd03ede011a8.csv/MD5E-s1271727--76f1f59ac68fe619ae34bd03ede011a8.csv
															
--- a/ter
+++ b/ter
@@ -1 +0,0 @@
 
																-.git/annex/objects/K5/g1/MD5E-s11843--f7a613986d426f7c5667fc59487dd559/MD5E-s11843--f7a613986d426f7c5667fc59487dd559
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/Xf/J3/MD5E-s15872283--589ddd6867dae20a40048aaed160f751/MD5E-s15872283--589ddd6867dae20a40048aaed160f751`
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/PK/VG/MD5E-s16478486--16c8639a6e1bc848b9e7c835138bc779/MD5E-s16478486--16c8639a6e1bc848b9e7c835138bc779`
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/k5/41/MD5E-s24346046--828c32d227d0c2d934ac85954c0549be/MD5E-s24346046--828c32d227d0c2d934ac85954c0549be`
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/Vj/Zq/MD5E-s15998814--588f8969805d7e19a2fb8555c805c7bf/MD5E-s15998814--588f8969805d7e19a2fb8555c805c7bf`
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/kK/m3/MD5E-s16250386--1fa013016100de4c1369aaaf5b51c3bc/MD5E-s16250386--1fa013016100de4c1369aaaf5b51c3bc`
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/Z5/WJ/MD5E-s10355435--f2c85a6f418cd351e643387f77f80466/MD5E-s10355435--f2c85a6f418cd351e643387f77f80466`
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/xQ/Mz/MD5E-s13149110--66bc34c1a1005f80f734ff7c9e0330ef/MD5E-s13149110--66bc34c1a1005f80f734ff7c9e0330ef`
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/Fx/P8/MD5E-s14790606--0468f28b5726a09d980c07fb217974e8/MD5E-s14790606--0468f28b5726a09d980c07fb217974e8`
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/8k/jP/MD5E-s27403501--65e8c8d45fe6075ab088da985dde5bfd/MD5E-s27403501--65e8c8d45fe6075ab088da985dde5bfd`
	`@@ -1 +0,0 @@`
	`-../../../.git/annex/objects/pq/Gx/MD5E-s19442758--174c64a1b49c9d94615dfcfb5c52508b/MD5E-s19442758--174c64a1b49c9d94615dfcfb5c52508b`