get_most_probable_phonemes.py 598 B

123456789101112131415
  1. from collections import defaultdict
  2. def get_most_probable_phonemes(one_sentence_per_line_file, p=0.007) :
  3. """
  4. Compute the probabilities of phonemes and return the phonemes for
  5. which probabilities > p.
  6. """
  7. counts = defaultdict(int)
  8. for sentence in open(one_sentence_per_line_file) :
  9. sentence = sentence.rstrip()
  10. for phoneme in sentence.split(" ") :
  11. counts[phoneme] += 1
  12. total = sum(counts.values())
  13. for phoneme in counts :
  14. counts[phoneme] /= total
  15. return [phoneme for phoneme, probability in counts.items() if probability >= p]