123456789101112131415 |
- from collections import defaultdict
- def get_most_probable_phonemes(one_sentence_per_line_file, p=0.007) :
- """
- Compute the probabilities of phonemes and return the phonemes for
- which probabilities > p.
- """
- counts = defaultdict(int)
- for sentence in open(one_sentence_per_line_file) :
- sentence = sentence.rstrip()
- for phoneme in sentence.split(" ") :
- counts[phoneme] += 1
- total = sum(counts.values())
- for phoneme in counts :
- counts[phoneme] /= total
- return [phoneme for phoneme, probability in counts.items() if probability >= p]
|