compare.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. from numpy import mod
  2. import pandas as pd
  3. from ChildProject.projects import ChildProject
  4. from ChildProject.annotations import AnnotationManager
  5. from ChildProject.metrics import segments_to_grid, conf_matrix, segments_to_annotation
  6. from pathlib import Path
  7. def compare_vandam(set1: str, set2: str) :
  8. speakers = ['CHI', 'OCH', 'FEM', 'MAL']
  9. project = ChildProject('inputs/vandam-data')
  10. am = AnnotationManager(project)
  11. am.read()
  12. #get segments that intercept between two annotations
  13. intersection = AnnotationManager.intersection(am.annotations, [set1, set2])
  14. #output directory
  15. dirName = "outputs/compare/" + set1.replace("/","") + "-" + set2.replace("/","")
  16. try:
  17. # Create target Directory
  18. Path(dirName).mkdir(parents= True)
  19. print("Directory " , dirName , " Created ")
  20. except FileExistsError:
  21. print("Directory " , dirName , " already exists")
  22. #opens output file
  23. file= open("{0}/{1}-{2}.txt".format(dirName, set1.replace("/",""), set2.replace("/","")),"a")
  24. for speaker in speakers:
  25. #retrieve contents
  26. segments = am.get_collapsed_segments(intersection)
  27. segments = segments[segments['speaker_type'].isin(pd.Series(speaker))]
  28. ref = segments_to_annotation(segments[segments['set'] == set1], 'speaker_type')
  29. hyp = segments_to_annotation(segments[segments['set'] == set2], 'speaker_type')
  30. if __name__ == '__main__':
  31. #compute metrics
  32. from pyannote.metrics.detection import DetectionPrecisionRecallFMeasure
  33. metric = DetectionPrecisionRecallFMeasure()
  34. detail = metric.compute_components(ref, hyp)
  35. precision, recall, f = metric.compute_metrics(detail)
  36. #saves metrics to output file
  37. metric_output = "precision: {0} / recall : {1} / f: {2}\n".format(precision, recall, f)
  38. file.write(speaker + ": " + metric_output)
  39. print("Metrics [precision & recall & f] saved! for {0}".format(speaker))
  40. file.close
  41. #generates segments
  42. set1_segm = segments_to_grid(segments[segments['set'] == set1], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
  43. set2_segm = segments_to_grid(segments[segments['set'] == set2], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
  44. matrix_df = pd.DataFrame(conf_matrix(set1_segm, set2_segm))
  45. matrix_df.to_csv("{0}/{1}-{2}-confusion-matrix.csv".format(dirName, set1.replace("/",""), set2.replace("/","")), mode = "w", index=False)
  46. print("Confusion matrix saved for {0} and {1}!".format(set1, set2))
  47. compare_vandam('eaf', 'cha')
  48. compare_vandam('eaf', 'cha/aligned')