Lucas Gautheron 2 years ago
parent
commit
bc59a5544e
6 changed files with 132 additions and 129 deletions
  1. 1 1
      Fig4.pdf
  2. 1 1
      Fig5.pdf
  3. 30 28
      code/confusion_matrix.py
  4. 98 97
      code/recall.py
  5. 1 1
      main.pdf
  6. 1 1
      main.tex

+ 1 - 1
Fig4.pdf

@@ -1 +1 @@
-.git/annex/objects/pK/F9/MD5E-s19446--8961c70a86f2331f9cb9057e89840a84.pdf/MD5E-s19446--8961c70a86f2331f9cb9057e89840a84.pdf
+.git/annex/objects/Qq/g4/MD5E-s19440--0e343483be93bc292ef16b5c6c194fd4.pdf/MD5E-s19440--0e343483be93bc292ef16b5c6c194fd4.pdf

+ 1 - 1
Fig5.pdf

@@ -1 +1 @@
-.git/annex/objects/6x/P0/MD5E-s17506--e7dc37318548bd2669009ccf5128c9ef.pdf/MD5E-s17506--e7dc37318548bd2669009ccf5128c9ef.pdf
+.git/annex/objects/v0/Jf/MD5E-s15636--6e365c33a40b5f870ccd610b355c6b15.pdf/MD5E-s15636--6e365c33a40b5f870ccd610b355c6b15.pdf

+ 30 - 28
code/confusion_matrix.py

@@ -17,42 +17,44 @@ import sys
 speakers = ['CHI', 'OCH', 'FEM', 'MAL']
 
 path = sys.argv[1]
-project = ChildProject(path)
-am = AnnotationManager(project)
-am.read()
 
-intersection = AnnotationManager.intersection(am.annotations, ['vtc', 'its'])
-segments = am.get_collapsed_segments(intersection)
-segments = segments[segments['speaker_type'].isin(speakers)]
+if __name__ == '__main__':
+    project = ChildProject(path)
+    am = AnnotationManager(project)
+    am.read()
 
-vtc = segments_to_grid(segments[segments['set'] == 'vtc'], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
-its = segments_to_grid(segments[segments['set'] == 'its'], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
+    intersection = AnnotationManager.intersection(am.annotations, ['vtc', 'its'])
+    segments = am.get_collapsed_segments(intersection)
+    segments = segments[segments['speaker_type'].isin(speakers)]
 
-speakers.extend(['overlap', 'none'])
+    vtc = segments_to_grid(segments[segments['set'] == 'vtc'], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
+    its = segments_to_grid(segments[segments['set'] == 'its'], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
 
-confusion_counts = conf_matrix(its, vtc, speakers)
+    speakers.extend(['none'])
 
-plt.rcParams.update({'font.size': 12})
-plt.rc('xtick', labelsize = 10)
-plt.rc('ytick', labelsize = 10)
+    confusion_counts = conf_matrix(vtc, its)
 
-fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize=(6.4*2, 4.8))
+    plt.rcParams.update({'font.size': 12})
+    plt.rc('xtick', labelsize = 10)
+    plt.rc('ytick', labelsize = 10)
 
-confusion = normalize(confusion_counts, axis = 1, norm = 'l1')
+    fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize=(6.4*2, 4.8))
 
-sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[0], cmap = 'Reds')
-axes[0].set_xlabel('its')
-axes[0].set_ylabel('vtc')
-axes[0].xaxis.set_ticklabels(speakers)
-axes[0].yaxis.set_ticklabels(speakers)
+    confusion = confusion_counts/np.sum(vtc, axis = 0)[:,None]
 
-confusion_counts = np.transpose(confusion_counts)
-confusion = normalize(confusion_counts, axis = 1, norm = 'l1')
+    sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[0], cmap = 'Reds')
+    axes[0].set_xlabel('its')
+    axes[0].set_ylabel('vtc')
+    axes[0].xaxis.set_ticklabels(speakers)
+    axes[0].yaxis.set_ticklabels(speakers)
 
-sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[1], cmap = 'Reds')
-axes[1].set_xlabel('vtc')
-axes[1].set_ylabel('its')
-axes[1].xaxis.set_ticklabels(speakers)
-axes[1].yaxis.set_ticklabels(speakers)
+    confusion_counts = np.transpose(confusion_counts)
+    confusion = confusion_counts/np.sum(its, axis = 0)[:,None]
 
-plt.savefig('Fig5.pdf', bbox_inches = 'tight')
+    sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[1], cmap = 'Reds')
+    axes[1].set_xlabel('vtc')
+    axes[1].set_ylabel('its')
+    axes[1].xaxis.set_ticklabels(speakers)
+    axes[1].yaxis.set_ticklabels(speakers)
+
+    plt.savefig('Fig5.pdf', bbox_inches = 'tight')

+ 98 - 97
code/recall.py

@@ -4,8 +4,6 @@ from ChildProject.projects import ChildProject
 from ChildProject.annotations import AnnotationManager
 from ChildProject.metrics import segments_to_annotation
 
-from pyannote.metrics.detection import DetectionPrecisionRecallFMeasure
-
 import matplotlib.pyplot as plt
 import numpy as np
 import os
@@ -25,107 +23,110 @@ def confusion(segments, prob):
 def drop(segments, prob):
     return segments.sample(frac = 1-prob)
 
-if not os.path.exists('scores.csv'):
-    path = sys.argv[1]
-    project = ChildProject(path)
-    am = AnnotationManager(project)
-    am.read()
+if __name__ == '__main__':
+    if not os.path.exists('scores.csv'):
+        from pyannote.metrics.detection import DetectionPrecisionRecallFMeasure
+
+        path = sys.argv[1]
+        project = ChildProject(path)
+        am = AnnotationManager(project)
+        am.read()
+
+        intersection = AnnotationManager.intersection(am.annotations, ['vtc', 'its'])
+        segments = am.get_collapsed_segments(intersection)
+        segments = segments[segments['speaker_type'].isin(speakers)]
+
+        conf50 = segments[segments['set'] == 'vtc'].copy()
+        conf50 = confusion(conf50, 0.5)
+        conf50['set'] = 'vtc (conf 50%)'
+
+        conf75 = segments[segments['set'] == 'vtc'].copy()
+        conf75 = confusion(conf75, 0.75)
+        conf75['set'] = 'vtc (conf 75%)'
+
+        drop50 = segments[segments['set'] == 'vtc'].copy()
+        drop50 = drop(drop50, 0.5)
+        drop50['set'] = 'vtc (drop 50%)'
+
+        drop75 = segments[segments['set'] == 'vtc'].copy()
+        drop75 = drop(drop75, 0.75)
+        drop75['set'] = 'vtc (drop 75%)'
+
+        segments = pd.concat([segments, conf50, conf75, drop50, drop75])
+
+        metric = DetectionPrecisionRecallFMeasure()
+
+        scores = []
+        for speaker in speakers:
+            ref = segments_to_annotation(segments[(segments['set'] == 'vtc') & (segments['speaker_type'] == speaker)], 'speaker_type')
+
+            for s in sets:
+                hyp = segments_to_annotation(segments[(segments['set'] == s) & (segments['speaker_type'] == speaker)], 'speaker_type')
+                detail = metric.compute_components(ref, hyp)
+                precision, recall, f = metric.compute_metrics(detail)
+
+                scores.append({
+                    'set': s,
+                    'speaker': speaker,
+                    'recall': recall,
+                    'precision': precision,
+                    'f': f
+                })
 
-    intersection = AnnotationManager.intersection(am.annotations, ['vtc', 'its'])
-    segments = am.get_collapsed_segments(intersection)
-    segments = segments[segments['speaker_type'].isin(speakers)]
+        scores = pd.DataFrame(scores)
+        scores.to_csv('scores.csv', index = False)
 
-    conf50 = segments[segments['set'] == 'vtc'].copy()
-    conf50 = confusion(conf50, 0.5)
-    conf50['set'] = 'vtc (conf 50%)'
+    scores = pd.read_csv('scores.csv')
 
-    conf75 = segments[segments['set'] == 'vtc'].copy()
-    conf75 = confusion(conf75, 0.75)
-    conf75['set'] = 'vtc (conf 75%)'
+    plt.rcParams.update({'font.size': 12})
+    plt.rc('xtick', labelsize = 10)
+    plt.rc('ytick', labelsize = 10)
 
-    drop50 = segments[segments['set'] == 'vtc'].copy()
-    drop50 = drop(drop50, 0.5)
-    drop50['set'] = 'vtc (drop 50%)'
+    print(scores)
 
-    drop75 = segments[segments['set'] == 'vtc'].copy()
-    drop75 = drop(drop75, 0.75)
-    drop75['set'] = 'vtc (drop 75%)'
+    styles = {
+        'recall': 's',
+        'precision': 'D',
+        'f': 'o'
+    }
 
-    segments = pd.concat([segments, conf50, conf75, drop50, drop75])
+    labels = {
+        'recall': 'recall',
+        'precision': 'precision',
+        'f': 'F-measure'
+    }
 
-    metric = DetectionPrecisionRecallFMeasure()
+    plt.figure(figsize = (6.4*1, 4.8*1+0.25*4.8))
 
-    scores = []
     for speaker in speakers:
-        ref = segments_to_annotation(segments[(segments['set'] == 'vtc') & (segments['speaker_type'] == speaker)], 'speaker_type')
-
-        for s in sets:
-            hyp = segments_to_annotation(segments[(segments['set'] == s) & (segments['speaker_type'] == speaker)], 'speaker_type')
-            detail = metric.compute_components(ref, hyp)
-            precision, recall, f = metric.compute_metrics(detail)
-
-            scores.append({
-                'set': s,
-                'speaker': speaker,
-                'recall': recall,
-                'precision': precision,
-                'f': f
-            })
-
-    scores = pd.DataFrame(scores)
-    scores.to_csv('scores.csv', index = False)
-
-scores = pd.read_csv('scores.csv')
-
-plt.rcParams.update({'font.size': 12})
-plt.rc('xtick', labelsize = 10)
-plt.rc('ytick', labelsize = 10)
-
-print(scores)
-
-styles = {
-    'recall': 's',
-    'precision': 'D',
-    'f': 'o'
-}
-
-labels = {
-    'recall': 'recall',
-    'precision': 'precision',
-    'f': 'F-measure'
-}
-
-plt.figure(figsize = (6.4*1, 4.8*1+0.25*4.8))
-
-for speaker in speakers:
-    i = speakers.index(speaker)
-    ax = plt.subplot(2, 2, i+1)
-    ax.set_xlim(-0.5,len(sets)-0.5)
-    ax.set_ylim(0, 1)
-
-    if i >= 2:
-        ax.set_xticks(range(len(sets)))
-        ax.set_xticklabels(sets, rotation = 45, horizontalalignment = 'right')
-    else:
-        ax.set_xticklabels(['' for i in range(len(sets))])
-
-    if i%2 == 1:
-        ax.set_yticklabels(['' for i in range(6)])
-
-    ax.set_xlabel(speaker)
-
-    _scores = scores[scores['speaker'] == speaker]
-    for metric in ['recall', 'precision', 'f']:
-        ax.scatter(
-            x = _scores['set'].apply(lambda s: sets.index(s)),
-            y = _scores[metric],
-            label = labels[metric],
-            s = 15,
-            marker = styles[metric]
-        )
-
-ax = plt.subplot(2, 2, 2)
-ax.legend(loc = "upper right", borderaxespad = 0.1, bbox_to_anchor=(1, 1.25), ncol = 3)
-
-plt.subplots_adjust(wspace = 0.15)
-plt.savefig('Fig4.pdf', bbox_inches = 'tight')
+        i = speakers.index(speaker)
+        ax = plt.subplot(2, 2, i+1)
+        ax.set_xlim(-0.5,len(sets)-0.5)
+        ax.set_ylim(0, 1)
+
+        if i >= 2:
+            ax.set_xticks(range(len(sets)))
+            ax.set_xticklabels(sets, rotation = 45, horizontalalignment = 'right')
+        else:
+            ax.set_xticklabels(['' for i in range(len(sets))])
+
+        if i%2 == 1:
+            ax.set_yticklabels(['' for i in range(6)])
+
+        ax.set_xlabel(speaker)
+
+        _scores = scores[scores['speaker'] == speaker]
+        for metric in ['recall', 'precision', 'f']:
+            ax.scatter(
+                x = _scores['set'].apply(lambda s: sets.index(s)),
+                y = _scores[metric],
+                label = labels[metric],
+                s = 15,
+                marker = styles[metric]
+            )
+
+    ax = plt.subplot(2, 2, 2)
+    ax.legend(loc = "upper right", borderaxespad = 0.1, bbox_to_anchor=(1, 1.25), ncol = 3)
+
+    plt.subplots_adjust(wspace = 0.15)
+    plt.savefig('Fig4.pdf', bbox_inches = 'tight')

+ 1 - 1
main.pdf

@@ -1 +1 @@
-.git/annex/objects/zf/F2/MD5E-s369128--69e5e8dfc29aba8765cd8ad2777105bf.pdf/MD5E-s369128--69e5e8dfc29aba8765cd8ad2777105bf.pdf
+.git/annex/objects/2M/kf/MD5E-s367487--425211da1dc40679d1b9947d8358e114.pdf/MD5E-s367487--425211da1dc40679d1b9947d8358e114.pdf

+ 1 - 1
main.tex

@@ -421,7 +421,7 @@ It should be noted that these measures are most useful in the absence of ground
 \includegraphics[width=\textwidth]{Fig5.pdf}
 
 \caption{\label{fig:confusion}\textbf{Example of diarization performance comparison using confusion matrices}
-LENA's annotations (its) of the public VanDam corpus \citep{vandam-day} are compared to the VTC's. The first coefficient of the left side matrix should be read as: ``41\% of CHI segments from the VTC are labelled as CHI by the LENA's''. The first coefficient of the right side matrix should be read as: ``71\% of the CHI segments of the LENA are labelled as CHI by the VTC''. It can also be seen that the LENA does not produce overlapping speech segments, i.e. it cannot disambiguate two overlapping speakers.
+LENA's annotations (its) of the public VanDam corpus \citep{vandam-day} are compared to the VTC's. The first coefficient of the left side matrix should be read as: ``38\% of CHI segments from the VTC are also labelled as CHI by the LENA's''. The first coefficient of the right side matrix should be read as: ``82\% of the CHI segments of the LENA are also labelled as CHI by the VTC''. The sum of each row of the right-hand plot may exceed one, since the VTC, unlike the LENA, can detect overlapping speakers.
 }
 
 \end{figure*}