#!/bin/bash programname=$0 function usage { echo "usage: $programname [-h] [-t trainfolder] [-o outfolder] [-k kenlm_path] [-n ngram_size]" echo " -h display help" echo " -n size of the ngrams for the language model" echo " -t folder that contains the train files" echo " -o out folder where the estimated parameters will be saved" echo " -k path to kenlm folder" exit 1 } while getopts t:o:k:n: flag do case "${flag}" in n) ngram_size=${OPTARG};; t) train_files=${OPTARG};; o) out_dirname=${OPTARG};; k) kenlm_folder=${OPTARG};; esac done case $1 in -h) usage; shift ;; esac shift echo "======================================================" echo "ngram size: $ngram_size"; echo "trainfolder: $train_files"; echo "outfolder: $out_dirname"; echo "kenlm: $kenlm_folder"; mkdir -p $out_dirname echo "================= STARTING ESTIMATION ================" for filename in $train_files/*.one_sentence_per_line; do # train_files*.one_sentence_per_line $kenlm_folder/build/bin/lmplz --discount_fallback -o $ngram_size < $filename > $out_dirname/${filename##*/}.arpa done