from numpy import mod import pandas as pd from ChildProject.projects import ChildProject from ChildProject.annotations import AnnotationManager from ChildProject.metrics import segments_to_grid, conf_matrix, segments_to_annotation from pathlib import Path import seaborn as sns import matplotlib.pyplot as plt import numpy as np def compare_vandam(set1: str, set2: str) : speakers = ['CHI', 'OCH', 'FEM', 'MAL'] project = ChildProject('inputs/vandam-data') am = AnnotationManager(project) am.read() #get segments that intercept between two annotations intersection = AnnotationManager.intersection(am.annotations, [set1, set2]) #output directory dirName = "outputs/compare/" + set1.replace("/","") + "-" + set2.replace("/","") try: # Create target Directory Path(dirName).mkdir(parents= True) print("Directory " , dirName , " Created ") except FileExistsError: print("Directory " , dirName , " already exists") #opens output file file= open("{0}/{1}-{2}.txt".format(dirName, set1.replace("/",""), set2.replace("/","")),"a") for speaker in speakers: #retrieve contents segments = am.get_collapsed_segments(intersection) segments = segments[segments['speaker_type'].isin(pd.Series(speaker))] ref = segments_to_annotation(segments[segments['set'] == set1], 'speaker_type') hyp = segments_to_annotation(segments[segments['set'] == set2], 'speaker_type') if __name__ == '__main__': #compute metrics from pyannote.metrics.detection import DetectionPrecisionRecallFMeasure metric = DetectionPrecisionRecallFMeasure() detail = metric.compute_components(ref, hyp) precision, recall, f = metric.compute_metrics(detail) #saves metrics to output file metric_output = "precision: {0} / recall : {1} / f: {2}\n".format(precision, recall, f) file.write(speaker + ": " + metric_output) print("Metrics [precision & recall & f] saved! for {0}".format(speaker)) file.close segments = am.get_collapsed_segments(intersection) #generates segments set1_segm = segments_to_grid(segments[segments['set'] == set1], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers) set2_segm = segments_to_grid(segments[segments['set'] == set2], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers) speakers.extend(['none']) confusion_counts = conf_matrix(set1_segm, set2_segm) plt.rcParams.update({'font.size': 12}) plt.rc('xtick', labelsize = 10) plt.rc('ytick', labelsize = 10) fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize=(6.4*2, 4.8)) confusion = confusion_counts/np.sum(set1_segm, axis = 0)[:,None] sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[0], cmap = 'Reds') axes[0].set_xlabel(set2) axes[0].set_ylabel(set1) axes[0].xaxis.set_ticklabels(speakers) axes[0].yaxis.set_ticklabels(speakers) confusion_counts = np.transpose(confusion_counts) confusion = confusion_counts/np.sum(set2_segm, axis = 0)[:,None] sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[1], cmap = 'Reds') axes[1].set_xlabel(set1) axes[1].set_ylabel(set2) axes[1].xaxis.set_ticklabels(speakers) axes[1].yaxis.set_ticklabels(speakers) plt.savefig("{0}/{1}-{2}-confusion-matrix.jpg".format(dirName, set1.replace("/",""), set2.replace("/",""), bbox_inches = 'tight')) #matrix_df = pd.DataFrame(conf_matrix(set1_segm, set2_segm)) # matrix_df.to_csv("{0}/{1}-{2}-confusion-matrix.csv".format(dirName, set1.replace("/",""), set2.replace("/","")), mode = "w", index=False) print("Confusion matrix saved for {0} and {1}!".format(set1, set2)) compare_vandam('eaf', 'cha') compare_vandam('eaf', 'cha/aligned') compare_vandam('cha', 'cha/aligned')