"""This module only implements one function that transforms\ dot one_utterance_per_line files to json. Dot one_utterance_per_line\ are files containing one utterance per line with the speaker role\ that produced this utterance, his family and the age of the child\ at the time the utterance was produced. """ import os import json from collections import defaultdict from tqdm import tqdm import random random.seed(1023) def one_utterance_per_line_to_json(directory: str, out_dirname: str) -> None: """ This function will transform dot one_utterance_per_line\ files to json files. Parameters ---------- - directory : str The path to the directory where the one_utterance_per_line\ files are stored. - out_dirname : str The path where the output json files will be stored. """ for language_filename in tqdm(os.listdir(directory)) : # Dot all files contain raw childes data. One line contains the family, \ # the participant who speaks, the age, and the produced utterance. if not language_filename.endswith(".one_utterance_per_line") : continue language, _ = language_filename.split(".") by_family = defaultdict(lambda : defaultdict(lambda : defaultdict(list))) for participant_age_utterance in open(f"{directory}/{language_filename}") : family, participant, age, utterance = participant_age_utterance.strip().split(",") participant = "Adult" if participant in ["Father", "Mother"] else participant if utterance in by_family[family][age][participant] : continue by_family[family][age][participant].append(utterance) with open(f"{out_dirname}/{language}.json", "w") as out_filename : json.dump( by_family, out_filename) if __name__ == "__main__" : from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument("--files_directory", help="Directory containing one_utterance_per_line files.", required=True) parser.add_argument("--out_dirname", help="The directory where outputs will be stored.", required=True) args = parser.parse_args() files_directory = args.files_directory out_dirname = args.out_dirname if not os.path.exists(out_dirname): os.makedirs(out_dirname) one_utterance_per_line_to_json(files_directory, out_dirname)