confusion_matrix.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. #!/usr/bin/env python3
  2. from ChildProject.projects import ChildProject
  3. from ChildProject.annotations import AnnotationManager
  4. from ChildProject.metrics import gamma, segments_to_grid
  5. import numpy as np
  6. import pandas as pd
  7. from sklearn.metrics import confusion_matrix
  8. from sklearn.preprocessing import normalize
  9. import seaborn as sns
  10. import matplotlib.pyplot as plt
  11. import sys
  12. speakers = ['CHI', 'OCH', 'FEM', 'MAL']
  13. path = sys.argv[1]
  14. project = ChildProject(path)
  15. am = AnnotationManager(project)
  16. am.read()
  17. intersection = AnnotationManager.intersection(am.annotations, ['vtc', 'its'])
  18. segments = am.get_collapsed_segments(intersection)
  19. segments = segments[segments['speaker_type'].isin(speakers)]
  20. vtc = segments_to_grid(segments[segments['set'] == 'vtc'], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
  21. its = segments_to_grid(segments[segments['set'] == 'its'], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
  22. speakers.extend(['overlap', 'none'])
  23. def get_pick(row):
  24. for cat in reversed(speakers):
  25. if row[cat]:
  26. return cat
  27. def conf_matrix(horizontal, vertical, categories):
  28. vertical = pd.DataFrame(vertical, columns = categories)
  29. vertical['pick'] = vertical.apply(
  30. get_pick,
  31. axis = 1
  32. )
  33. vertical = vertical['pick'].values
  34. horizontal = pd.DataFrame(horizontal, columns = categories)
  35. horizontal['pick'] = horizontal.apply(
  36. get_pick,
  37. axis = 1
  38. )
  39. horizontal = horizontal['pick'].values
  40. confusion = confusion_matrix(vertical, horizontal, labels = categories)
  41. confusion = normalize(confusion, axis = 1, norm = 'l1')
  42. return confusion
  43. plt.rcParams.update({'font.size': 12})
  44. plt.rc('xtick', labelsize = 10)
  45. plt.rc('ytick', labelsize = 10)
  46. fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize=(6.4*2, 4.8))
  47. confusion = conf_matrix(its, vtc, speakers)
  48. sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[0], cmap = 'Reds')
  49. axes[0].set_xlabel('its')
  50. axes[0].set_ylabel('vtc')
  51. axes[0].xaxis.set_ticklabels(speakers)
  52. axes[0].yaxis.set_ticklabels(speakers)
  53. confusion = conf_matrix(vtc, its, speakers)
  54. sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[1], cmap = 'Reds')
  55. axes[1].set_xlabel('vtc')
  56. axes[1].set_ylabel('its')
  57. axes[1].xaxis.set_ticklabels(speakers)
  58. axes[1].yaxis.set_ticklabels(speakers)
  59. plt.savefig('Fig5.pdf', bbox_inches = 'tight')