|
@@ -4,8 +4,6 @@ from ChildProject.projects import ChildProject
|
|
|
from ChildProject.annotations import AnnotationManager
|
|
|
from ChildProject.metrics import segments_to_annotation
|
|
|
|
|
|
-from pyannote.metrics.detection import DetectionPrecisionRecallFMeasure
|
|
|
-
|
|
|
import matplotlib.pyplot as plt
|
|
|
import numpy as np
|
|
|
import os
|
|
@@ -25,107 +23,110 @@ def confusion(segments, prob):
|
|
|
def drop(segments, prob):
|
|
|
return segments.sample(frac = 1-prob)
|
|
|
|
|
|
-if not os.path.exists('scores.csv'):
|
|
|
- path = sys.argv[1]
|
|
|
- project = ChildProject(path)
|
|
|
- am = AnnotationManager(project)
|
|
|
- am.read()
|
|
|
+if __name__ == '__main__':
|
|
|
+ if not os.path.exists('scores.csv'):
|
|
|
+ from pyannote.metrics.detection import DetectionPrecisionRecallFMeasure
|
|
|
+
|
|
|
+ path = sys.argv[1]
|
|
|
+ project = ChildProject(path)
|
|
|
+ am = AnnotationManager(project)
|
|
|
+ am.read()
|
|
|
+
|
|
|
+ intersection = AnnotationManager.intersection(am.annotations, ['vtc', 'its'])
|
|
|
+ segments = am.get_collapsed_segments(intersection)
|
|
|
+ segments = segments[segments['speaker_type'].isin(speakers)]
|
|
|
+
|
|
|
+ conf50 = segments[segments['set'] == 'vtc'].copy()
|
|
|
+ conf50 = confusion(conf50, 0.5)
|
|
|
+ conf50['set'] = 'vtc (conf 50%)'
|
|
|
+
|
|
|
+ conf75 = segments[segments['set'] == 'vtc'].copy()
|
|
|
+ conf75 = confusion(conf75, 0.75)
|
|
|
+ conf75['set'] = 'vtc (conf 75%)'
|
|
|
+
|
|
|
+ drop50 = segments[segments['set'] == 'vtc'].copy()
|
|
|
+ drop50 = drop(drop50, 0.5)
|
|
|
+ drop50['set'] = 'vtc (drop 50%)'
|
|
|
+
|
|
|
+ drop75 = segments[segments['set'] == 'vtc'].copy()
|
|
|
+ drop75 = drop(drop75, 0.75)
|
|
|
+ drop75['set'] = 'vtc (drop 75%)'
|
|
|
+
|
|
|
+ segments = pd.concat([segments, conf50, conf75, drop50, drop75])
|
|
|
+
|
|
|
+ metric = DetectionPrecisionRecallFMeasure()
|
|
|
+
|
|
|
+ scores = []
|
|
|
+ for speaker in speakers:
|
|
|
+ ref = segments_to_annotation(segments[(segments['set'] == 'vtc') & (segments['speaker_type'] == speaker)], 'speaker_type')
|
|
|
+
|
|
|
+ for s in sets:
|
|
|
+ hyp = segments_to_annotation(segments[(segments['set'] == s) & (segments['speaker_type'] == speaker)], 'speaker_type')
|
|
|
+ detail = metric.compute_components(ref, hyp)
|
|
|
+ precision, recall, f = metric.compute_metrics(detail)
|
|
|
+
|
|
|
+ scores.append({
|
|
|
+ 'set': s,
|
|
|
+ 'speaker': speaker,
|
|
|
+ 'recall': recall,
|
|
|
+ 'precision': precision,
|
|
|
+ 'f': f
|
|
|
+ })
|
|
|
|
|
|
- intersection = AnnotationManager.intersection(am.annotations, ['vtc', 'its'])
|
|
|
- segments = am.get_collapsed_segments(intersection)
|
|
|
- segments = segments[segments['speaker_type'].isin(speakers)]
|
|
|
+ scores = pd.DataFrame(scores)
|
|
|
+ scores.to_csv('scores.csv', index = False)
|
|
|
|
|
|
- conf50 = segments[segments['set'] == 'vtc'].copy()
|
|
|
- conf50 = confusion(conf50, 0.5)
|
|
|
- conf50['set'] = 'vtc (conf 50%)'
|
|
|
+ scores = pd.read_csv('scores.csv')
|
|
|
|
|
|
- conf75 = segments[segments['set'] == 'vtc'].copy()
|
|
|
- conf75 = confusion(conf75, 0.75)
|
|
|
- conf75['set'] = 'vtc (conf 75%)'
|
|
|
+ plt.rcParams.update({'font.size': 12})
|
|
|
+ plt.rc('xtick', labelsize = 10)
|
|
|
+ plt.rc('ytick', labelsize = 10)
|
|
|
|
|
|
- drop50 = segments[segments['set'] == 'vtc'].copy()
|
|
|
- drop50 = drop(drop50, 0.5)
|
|
|
- drop50['set'] = 'vtc (drop 50%)'
|
|
|
+ print(scores)
|
|
|
|
|
|
- drop75 = segments[segments['set'] == 'vtc'].copy()
|
|
|
- drop75 = drop(drop75, 0.75)
|
|
|
- drop75['set'] = 'vtc (drop 75%)'
|
|
|
+ styles = {
|
|
|
+ 'recall': 's',
|
|
|
+ 'precision': 'D',
|
|
|
+ 'f': 'o'
|
|
|
+ }
|
|
|
|
|
|
- segments = pd.concat([segments, conf50, conf75, drop50, drop75])
|
|
|
+ labels = {
|
|
|
+ 'recall': 'recall',
|
|
|
+ 'precision': 'precision',
|
|
|
+ 'f': 'F-measure'
|
|
|
+ }
|
|
|
|
|
|
- metric = DetectionPrecisionRecallFMeasure()
|
|
|
+ plt.figure(figsize = (6.4*1, 4.8*1+0.25*4.8))
|
|
|
|
|
|
- scores = []
|
|
|
for speaker in speakers:
|
|
|
- ref = segments_to_annotation(segments[(segments['set'] == 'vtc') & (segments['speaker_type'] == speaker)], 'speaker_type')
|
|
|
-
|
|
|
- for s in sets:
|
|
|
- hyp = segments_to_annotation(segments[(segments['set'] == s) & (segments['speaker_type'] == speaker)], 'speaker_type')
|
|
|
- detail = metric.compute_components(ref, hyp)
|
|
|
- precision, recall, f = metric.compute_metrics(detail)
|
|
|
-
|
|
|
- scores.append({
|
|
|
- 'set': s,
|
|
|
- 'speaker': speaker,
|
|
|
- 'recall': recall,
|
|
|
- 'precision': precision,
|
|
|
- 'f': f
|
|
|
- })
|
|
|
-
|
|
|
- scores = pd.DataFrame(scores)
|
|
|
- scores.to_csv('scores.csv', index = False)
|
|
|
-
|
|
|
-scores = pd.read_csv('scores.csv')
|
|
|
-
|
|
|
-plt.rcParams.update({'font.size': 12})
|
|
|
-plt.rc('xtick', labelsize = 10)
|
|
|
-plt.rc('ytick', labelsize = 10)
|
|
|
-
|
|
|
-print(scores)
|
|
|
-
|
|
|
-styles = {
|
|
|
- 'recall': 's',
|
|
|
- 'precision': 'D',
|
|
|
- 'f': 'o'
|
|
|
-}
|
|
|
-
|
|
|
-labels = {
|
|
|
- 'recall': 'recall',
|
|
|
- 'precision': 'precision',
|
|
|
- 'f': 'F-measure'
|
|
|
-}
|
|
|
-
|
|
|
-plt.figure(figsize = (6.4*1, 4.8*1+0.25*4.8))
|
|
|
-
|
|
|
-for speaker in speakers:
|
|
|
- i = speakers.index(speaker)
|
|
|
- ax = plt.subplot(2, 2, i+1)
|
|
|
- ax.set_xlim(-0.5,len(sets)-0.5)
|
|
|
- ax.set_ylim(0, 1)
|
|
|
-
|
|
|
- if i >= 2:
|
|
|
- ax.set_xticks(range(len(sets)))
|
|
|
- ax.set_xticklabels(sets, rotation = 45, horizontalalignment = 'right')
|
|
|
- else:
|
|
|
- ax.set_xticklabels(['' for i in range(len(sets))])
|
|
|
-
|
|
|
- if i%2 == 1:
|
|
|
- ax.set_yticklabels(['' for i in range(6)])
|
|
|
-
|
|
|
- ax.set_xlabel(speaker)
|
|
|
-
|
|
|
- _scores = scores[scores['speaker'] == speaker]
|
|
|
- for metric in ['recall', 'precision', 'f']:
|
|
|
- ax.scatter(
|
|
|
- x = _scores['set'].apply(lambda s: sets.index(s)),
|
|
|
- y = _scores[metric],
|
|
|
- label = labels[metric],
|
|
|
- s = 15,
|
|
|
- marker = styles[metric]
|
|
|
- )
|
|
|
-
|
|
|
-ax = plt.subplot(2, 2, 2)
|
|
|
-ax.legend(loc = "upper right", borderaxespad = 0.1, bbox_to_anchor=(1, 1.25), ncol = 3)
|
|
|
-
|
|
|
-plt.subplots_adjust(wspace = 0.15)
|
|
|
-plt.savefig('Fig4.pdf', bbox_inches = 'tight')
|
|
|
+ i = speakers.index(speaker)
|
|
|
+ ax = plt.subplot(2, 2, i+1)
|
|
|
+ ax.set_xlim(-0.5,len(sets)-0.5)
|
|
|
+ ax.set_ylim(0, 1)
|
|
|
+
|
|
|
+ if i >= 2:
|
|
|
+ ax.set_xticks(range(len(sets)))
|
|
|
+ ax.set_xticklabels(sets, rotation = 45, horizontalalignment = 'right')
|
|
|
+ else:
|
|
|
+ ax.set_xticklabels(['' for i in range(len(sets))])
|
|
|
+
|
|
|
+ if i%2 == 1:
|
|
|
+ ax.set_yticklabels(['' for i in range(6)])
|
|
|
+
|
|
|
+ ax.set_xlabel(speaker)
|
|
|
+
|
|
|
+ _scores = scores[scores['speaker'] == speaker]
|
|
|
+ for metric in ['recall', 'precision', 'f']:
|
|
|
+ ax.scatter(
|
|
|
+ x = _scores['set'].apply(lambda s: sets.index(s)),
|
|
|
+ y = _scores[metric],
|
|
|
+ label = labels[metric],
|
|
|
+ s = 15,
|
|
|
+ marker = styles[metric]
|
|
|
+ )
|
|
|
+
|
|
|
+ ax = plt.subplot(2, 2, 2)
|
|
|
+ ax.legend(loc = "upper right", borderaxespad = 0.1, bbox_to_anchor=(1, 1.25), ncol = 3)
|
|
|
+
|
|
|
+ plt.subplots_adjust(wspace = 0.15)
|
|
|
+ plt.savefig('Fig4.pdf', bbox_inches = 'tight')
|