"""This module implements a function that\
    evaluate the trained language moedels"""
import os
from math import log
import random
import pandas as pd
import kenlm
random.seed(1023)

LANGUAGES_TYPOLOGIES = {
    'da' : ("Danish", "fusional"),
    'de' : ("German", "fusional"),
    'en' : ("English", "fusional"),
    'es' : ("Spanish", "fusional"),
    'et' : ("Estonian", "agglutinative"),
    'eu' : ("Basque", "agglutinative"),
    'fr' : ("French", "fusional"),
    'ja' : ("Japanese", "agglutinative"),
    'pl' : ("Polish", "fusional"),
    'pt' : ("Portuguese", "fusional"),
    'sr' : ("Serbian", "fusional"),
    'tr' : ("Turkish", "agglutinative")}

def evaluate(train_files_directory: str,
                dev_files_directory: str,
                models_directory: str) -> pd.DataFrame:
    """
    This function will compute the entropies of\
    test files for all languages.

    Parameters
    ----------
    - train_files_directory: str
        The path to the directory containing training files.
    - dev_files_directory: str
        The path to the directory containing testing/development files.
    - models_directory: str
        The path to the directory containing training trained\
        language models.
    """
    triplets_files_model = zip(sorted(os.listdir(train_files_directory)),
                                sorted(os.listdir(dev_files_directory)),
                                sorted(os.listdir(models_directory)))
    columns = ["language", "train_entropy", "dev_entropy"]
    evaluation = pd.DataFrame(columns=columns, index=None)
    for train_filename, dev_filename, model_filename in triplets_files_model :
        language, _ = train_filename.split(".")
        model = model = kenlm.Model(f"{models_directory}/{model_filename}")
        train_sents = "\n".join(sent.strip() for sent in open(f"{train_files_directory}/{train_filename}"))
        train_entropy = log(model.perplexity(train_sents))
        dev_sents = "\n".join(sent.strip() for sent in open(f"{dev_files_directory}/{dev_filename}"))
        dev_entropy = log(model.perplexity(dev_sents))
        new_row = {
            "language" : LANGUAGES_TYPOLOGIES[language][0],
            "train_entropy" : train_entropy,
            "dev_entropy" : dev_entropy
        }
        evaluation = evaluation.append(new_row, ignore_index=True)
    return evaluation
if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('--train_files_directory',
        required=True,
        help="The directory containing the OpenSubtitles training files"
        )
    parser.add_argument('--dev_files_directory',
        required=True,
        help="The directory containing the OpenSubtitles test files"
        )
    parser.add_argument('--models_directory',
        required=True,
        help="The directory containing the trained language models"
        )

    args = parser.parse_args()
    train_files = args.train_files_directory
    dev_files = args.dev_files_directory
    models_directory = args.models_directory
    if not os.path.exists("results"):
        os.makedirs("results")
    evaluate(train_files,
                dev_files,
                models_directory).to_csv("results/evaluation.csv")