doi
/
zoo-campaign
forked from LAAC-LSCP/zoo-campaign


			
			
				
					
						
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
							import argparse
import numpy as np
import os
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from ChildProject.projects import ChildProject
from ChildProject.annotations import AnnotationManager
from ChildProject.metrics import segments_to_grid, conf_matrix

categories = ['Adult', 'Youngster', 'Junk']

# load VanDam dataset
project = ChildProject('vandam-data')
am = AnnotationManager(project)

annotations = am.annotations[am.annotations['set'].isin(['vtc', 'zoo'])]
segments = am.get_collapsed_segments(annotations)

# map VTC speakers to a two-class categorization
vtc_segments = segments.loc[segments['set'] == 'vtc']
vtc_segments['speaker_age'] = vtc_segments['speaker_type'].replace({
    'MAL': 'Adult',
    'FEM': 'Adult',
    'CHI': 'Youngster',
    'OCH': 'Youngster'
})

# map Zooniverse-extracted speakers to the same two-class categorization
zoo_segments = segments.loc[segments['set'] == 'zoo']
zoo_segments['speaker_age'] = zoo_segments['speaker_age'].replace({
    'Baby': 'Youngster',
    'Child': 'Youngster',
    'Adolescent': 'Adult'
})

# create matrices indicating if any of the classes is active at every 50 ms time step (unitizing)
vtc = segments_to_grid(
    vtc_segments, 0, segments['segment_offset'].max(), 50, 'speaker_age', categories, none = True
)
zoo = segments_to_grid(
    zoo_segments, 0, segments['segment_offset'].max(), 50, 'speaker_age', categories, none = True
)

# keep only the units that have been classified on Zooniverse
vtc = vtc[zoo[:,-1] == 0][:,:-1]
zoo = zoo[zoo[:,-1] == 0][:,:-1]

# compute and show confusion matrices
confusion_counts = conf_matrix(vtc, zoo)

plt.rcParams.update({'font.size': 12})
plt.rc('xtick', labelsize = 10)
plt.rc('ytick', labelsize = 10)

fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize=(6.4*2, 4.8))

confusion = confusion_counts/np.sum(vtc, axis = 0)[:,None]

sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[0], cmap = 'Reds')
axes[0].set_xlabel('zoo')
axes[0].set_ylabel('vtc')
axes[0].xaxis.set_ticklabels(categories)
axes[0].yaxis.set_ticklabels(categories)

confusion_counts = np.transpose(confusion_counts)
confusion = confusion_counts/np.sum(zoo, axis = 0)[:,None]

sns.heatmap(confusion, annot = True, fmt = '.2f', ax = axes[1], cmap = 'Reds')
axes[1].set_xlabel('vtc')
axes[1].set_ylabel('zoo')
axes[1].xaxis.set_ticklabels(categories)
axes[1].yaxis.set_ticklabels(categories)

plt.savefig('annotations/comparison.png', bbox_inches = 'tight')