Explorar o código

added confusion matrices to compare.py

Martin Frébourg %!s(int64=2) %!d(string=hai) anos
pai
achega
ac2aafc5d1

+ 13 - 8
code/compare.py

@@ -1,3 +1,4 @@
+from numpy import mod
 import pandas as pd
 from ChildProject.projects import ChildProject
 from ChildProject.annotations import AnnotationManager
@@ -9,11 +10,12 @@ def compare_vandam(set1: str, set2: str) :
     speakers = ['CHI', 'OCH', 'FEM', 'MAL']
     project = ChildProject('vandam-data')
     am = AnnotationManager(project)
-    am.read()
+    #am.read()
 
     #get segments that intercept between two annotations
     intersection = AnnotationManager.intersection(am.annotations, [set1, set2])
     
+    #output directory
     dirName = "outputs/compare/" + set1.replace("/","") + "-" + set2.replace("/","")
     try:
         # Create target Directory
@@ -22,6 +24,7 @@ def compare_vandam(set1: str, set2: str) :
     except FileExistsError:
         print("Directory " , dirName ,  " already exists")
     
+    #opens output file
     file= open("{0}/{1}-{2}.txt".format(dirName, set1.replace("/",""), set2.replace("/","")),"a")
     
     for speaker in speakers: 
@@ -30,28 +33,30 @@ def compare_vandam(set1: str, set2: str) :
         segments = am.get_collapsed_segments(intersection)
         segments = segments[segments['speaker_type'].isin(pd.Series(speaker))]
 
-       # set1_segm = segments_to_grid(segments[segments['set'] == set1], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
-       # set2_segm = segments_to_grid(segments[segments['set'] == set2], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
-
-
         ref = segments_to_annotation(segments[segments['set'] == set1], 'speaker_type')
         hyp = segments_to_annotation(segments[segments['set'] == set2], 'speaker_type')
 
         if __name__ == '__main__':
+
+            #compute metrics
             from pyannote.metrics.detection import DetectionPrecisionRecallFMeasure
             metric = DetectionPrecisionRecallFMeasure()
 
             detail = metric.compute_components(ref, hyp)
             precision, recall, f = metric.compute_metrics(detail)
             
-                       
-            # metric_output = str(f'{precision:.2f}/{recall:.2f}/{f:.2f}')
+            #saves metrics to output file
             metric_output = "precision: {0} / recall : {1} / f: {2}\n".format(precision, recall, f)
             file.write(speaker + ": " + metric_output)
             print("Metrics [precision & recall & f] saved! for {0}".format(speaker))
     file.close
-    
 
+    #generates segments
+    set1_segm = segments_to_grid(segments[segments['set'] == set1], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
+    set2_segm = segments_to_grid(segments[segments['set'] == set2], 0, segments['segment_offset'].max(), 100, 'speaker_type', speakers)
+    matrix_df = pd.DataFrame(conf_matrix(set1_segm, set2_segm))
+    matrix_df.to_csv("{0}/{1}-{2}-confusion-matrix.csv".format(dirName, set1.replace("/",""), set2.replace("/","")), mode = "x", index=False)
+    print("Confusion matrix saved for {0} and {1}!".format(set1, set2))
 
 compare_vandam('eaf', 'cha')
 compare_vandam('eaf', 'cha/aligned')

+ 1 - 0
outputs/compare/eaf-cha/eaf-cha-confusion-matrix.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/02/qX/MD5E-s74--22ae56390035e28008fa9cdc4ef4f910.csv/MD5E-s74--22ae56390035e28008fa9cdc4ef4f910.csv

+ 1 - 0
outputs/compare/eaf-chaaligned/eaf-chaaligned-confusion-matrix.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/pX/51/MD5E-s76--45d91b7c921302f3916c697f755a8c9c.csv/MD5E-s76--45d91b7c921302f3916c697f755a8c9c.csv

+ 1 - 1
outputs/compare/eaf-chaaligned/eaf-chaaligned.txt

@@ -1 +1 @@
-../../../.git/annex/objects/7P/kV/MD5E-s358--335ad1305df050e9e94b3e922edcf960.txt/MD5E-s358--335ad1305df050e9e94b3e922edcf960.txt
+../../../.git/annex/objects/6X/8m/MD5E-s0--d41d8cd98f00b204e9800998ecf8427e.txt/MD5E-s0--d41d8cd98f00b204e9800998ecf8427e.txt