3 years ago · 9c39475ffd
--- a/scripts/plots.py
+++ b/scripts/plots.py
@@ -0,0 +1,77 @@
 
				+from ChildProject.projects import ChildProject
			
 
				+from ChildProject.annotations import AnnotationManager
			
 
				+from ChildProject.metrics import gamma, segments_to_grid
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.metrics import confusion_matrix
			
 
				+from sklearn.preprocessing import normalize
			
 
				+
			
 
				+import seaborn as sns
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+speakers = ['CHI', 'OCH', 'FEM', 'MAL']
			
 
				+
			
 
				+project = ChildProject('.')
			
 
				+am = AnnotationManager(project)
			
 
				+am.read()
			
 
				+
			
 
				+intersection = AnnotationManager.intersection(am.annotations, ['vtc', 'its'])
			
 
				+segments = am.get_collapsed_segments(intersection)
			
 
				+segments = segments[segments['speaker_type'].isin(speakers)]
			
 
				+segments.sort_values(['segment_onset', 'segment_offset']).to_csv('test.csv', index = False)
			
 
				+#print(gamma(segments, column = 'speaker_type'))
			
 
				+
			
 
				+print('creating grids')
			
 
				+vtc = segments_to_grid(segments[segments['set'] == 'vtc'], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
			
 
				+its = segments_to_grid(segments[segments['set'] == 'its'], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
			
 
				+print('done creating grids')
			
 
				+
			
 
				+speakers.extend(['overlap', 'none'])
			
 
				+
			
 
				+def get_pick(row):
			
 
				+    for cat in reversed(speakers):
			
 
				+        if row[cat]:
			
 
				+            return cat
			
 
				+
			
 
				+def conf_matrix(horizontal, vertical, categories):
			
 
				+    vertical = pd.DataFrame(vertical, columns = categories)
			
 
				+    vertical['pick'] = vertical.apply(
			
 
				+        get_pick,
			
 
				+        axis = 1
			
 
				+    )
			
 
				+    vertical = vertical['pick'].values
			
 
				+
			
 
				+    horizontal = pd.DataFrame(horizontal, columns = categories)
			
 
				+    horizontal['pick'] = horizontal.apply(
			
 
				+        get_pick,
			
 
				+        axis = 1
			
 
				+    )
			
 
				+    horizontal = horizontal['pick'].values
			
 
				+
			
 
				+    confusion = confusion_matrix(vertical, horizontal, labels = categories)
			
 
				+    confusion = normalize(confusion, axis = 1, norm = 'l1')
			
 
				+
			
 
				+    return confusion
			
 
				+
			
 
				+plt.rcParams.update({'font.size': 12})
			
 
				+plt.rc('xtick', labelsize = 10)
			
 
				+plt.rc('ytick', labelsize = 10)
			
 
				+
			
 
				+fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize=(6.4*2, 4.8))
			
 
				+
			
 
				+confusion = conf_matrix(its, vtc, speakers)
			
 
				+sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[0], cmap = 'Reds')
			
 
				+axes[0].set_xlabel('its')
			
 
				+axes[0].set_ylabel('vtc')
			
 
				+axes[0].xaxis.set_ticklabels(speakers)
			
 
				+axes[0].yaxis.set_ticklabels(speakers)
			
 
				+
			
 
				+confusion = conf_matrix(vtc, its, speakers)
			
 
				+sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[1], cmap = 'Reds')
			
 
				+axes[1].set_xlabel('vtc')
			
 
				+axes[1].set_ylabel('its')
			
 
				+axes[1].xaxis.set_ticklabels(speakers)
			
 
				+axes[1].yaxis.set_ticklabels(speakers)
			
 
				+
			
 
				+plt.savefig('Fig5.pdf', bbox_inches = 'tight')
			
--- a/scripts/recall.py
+++ b/scripts/recall.py
@@ -0,0 +1,84 @@
 
				+from ChildProject.projects import ChildProject
			
 
				+from ChildProject.annotations import AnnotationManager
			
 
				+from ChildProject.metrics import segments_to_annotation
			
 
				+
			
 
				+from pyannote.metrics.detection import DetectionPrecisionRecallFMeasure
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+from sklearn.metrics import confusion_matrix
			
 
				+from sklearn.preprocessing import normalize
			
 
				+
			
 
				+import random
			
 
				+
			
 
				+import seaborn as sns
			
 
				+import matplotlib.pyplot as plt
			
 
				+
			
 
				+speakers = ['CHI', 'OCH', 'FEM', 'MAL']
			
 
				+sets = ['its', 'vtc (conf 50%)', 'vtc (drop 50%)', 'vtc (conf 75%)', 'vtc (drop 75%)']
			
 
				+
			
 
				+project = ChildProject('.')
			
 
				+am = AnnotationManager(project)
			
 
				+am.read()
			
 
				+
			
 
				+def confusion(segments, prob):
			
 
				+    segments['speaker_type'] = segments['speaker_type'].apply(
			
 
				+        lambda s: random.choice(speakers) if random.random() < prob else s
			
 
				+    )
			
 
				+    return segments
			
 
				+
			
 
				+def drop(segments, prob):
			
 
				+    return segments.sample(frac = 1-prob)
			
 
				+
			
 
				+intersection = AnnotationManager.intersection(am.annotations, ['vtc', 'its'])
			
 
				+segments = am.get_collapsed_segments(intersection)
			
 
				+segments = segments[segments['speaker_type'].isin(speakers)]
			
 
				+segments.sort_values(['segment_onset', 'segment_offset']).to_csv('test.csv', index = False)
			
 
				+
			
 
				+conf50 = segments[segments['set'] == 'vtc'].copy()
			
 
				+conf50 = confusion(conf50, 0.5)
			
 
				+conf50['set'] = 'vtc (conf 50%)'
			
 
				+
			
 
				+conf75 = segments[segments['set'] == 'vtc'].copy()
			
 
				+conf75 = confusion(conf75, 0.75)
			
 
				+conf75['set'] = 'vtc (conf 75%)'
			
 
				+
			
 
				+drop50 = segments[segments['set'] == 'vtc'].copy()
			
 
				+drop50 = drop(drop50, 0.5)
			
 
				+drop50['set'] = 'vtc (drop 50%)'
			
 
				+
			
 
				+drop75 = segments[segments['set'] == 'vtc'].copy()
			
 
				+drop75 = drop(drop75, 0.75)
			
 
				+drop75['set'] = 'vtc (drop 75%)'
			
 
				+
			
 
				+segments = pd.concat([segments, conf50, conf75, drop50, drop75])
			
 
				+
			
 
				+metric = DetectionPrecisionRecallFMeasure()
			
 
				+
			
 
				+scores = []
			
 
				+for speaker in speakers:
			
 
				+    ref = segments_to_annotation(segments[(segments['set'] == 'vtc') & (segments['speaker_type'] == speaker)], 'speaker_type')
			
 
				+
			
 
				+    for s in sets:
			
 
				+        hyp = segments_to_annotation(segments[(segments['set'] == s) & (segments['speaker_type'] == speaker)], 'speaker_type')
			
 
				+        detail = metric.compute_components(ref, hyp)
			
 
				+        precision, recall, f = metric.compute_metrics(detail)
			
 
				+
			
 
				+        scores.append({
			
 
				+            'set': s,
			
 
				+            'speaker': speaker,
			
 
				+            'recall': recall,
			
 
				+            'precision': precision,
			
 
				+            'f': f
			
 
				+        })
			
 
				+
			
 
				+scores = pd.DataFrame(scores)
			
 
				+scores.to_csv('scores.csv', index = False)
			
 
				+
			
 
				+plt.rcParams.update({'font.size': 12})
			
 
				+plt.rc('xtick', labelsize = 10)
			
 
				+plt.rc('ytick', labelsize = 10)
			
 
				+
			
 
				+fig, axes = plt.subplots(nrows = 2, ncols = 2, figsize=(6.4*2, 4.8*2))
			
 
				+
			
 
				+plt.savefig('Fig4.pdf', bbox_inches = 'tight')