from collections import defaultdict
def get_most_probable_phonemes(one_sentence_per_line_file, p=0.007) :
    """
    Compute the probabilities of phonemes and return the phonemes for
    which probabilities > p.
    """
    counts = defaultdict(int)
    for sentence in open(one_sentence_per_line_file) :
        sentence = sentence.rstrip()
        for word in sentence.split("@") :
            for phoneme in word.split("$") :
                counts[phoneme] += 1
    total = sum(counts.values())
    for phoneme in counts :
        counts[phoneme] /= total
    return [phoneme for phoneme, probability in counts.items() if probability >= p]