|
@@ -1,7 +1,7 @@
|
|
|
#!usr/bin/env python
|
|
|
# -*- coding: utf8 -*-
|
|
|
# -----------------------------------------------------------------------------
|
|
|
-# File: compute_turn_annotations.py (as part of project URUMETRICS-CODE)
|
|
|
+# File: compute_annotations.py (as part of project URUMETRICS-CODE)
|
|
|
# Created: 03/08/2022 17:32
|
|
|
# Last Modified: 03/08/2022 17:32
|
|
|
# -----------------------------------------------------------------------------
|
|
@@ -19,24 +19,21 @@
|
|
|
|
|
|
import logging
|
|
|
import os
|
|
|
+from functools import partial
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
from ChildProject.annotations import AnnotationManager
|
|
|
from ChildProject.projects import ChildProject
|
|
|
|
|
|
-from conversational_settings import uru_conversations
|
|
|
+import annotations_functions
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
-def compute_turn_annotations(data_path):
|
|
|
- """
|
|
|
- Computes conversational annotations for the ChildProject in directory data_path
|
|
|
- :param data_path: path to ChildProject dataset
|
|
|
- :type data_path: str
|
|
|
- :return: annotations
|
|
|
- :rtype: pd.DataFrame
|
|
|
- """
|
|
|
+def _annotation_function_wrapper(func, parser_args, **kwargs):
|
|
|
+ return partial(func,parser_args=parser_args, **kwargs)
|
|
|
+
|
|
|
+def get_available_segments(data_path, set_name):
|
|
|
project = ChildProject(data_path)
|
|
|
am = AnnotationManager(project)
|
|
|
am.read()
|
|
@@ -48,29 +45,43 @@ def compute_turn_annotations(data_path):
|
|
|
|
|
|
# Get already existing conversation annotations and only compute annotations for the files that do not already
|
|
|
# have conversational annotations
|
|
|
- if 'conversations' in set(am.annotations['set']):
|
|
|
- conversations_anns = am.annotations[am.annotations['set'] == 'conversations']
|
|
|
+ if set_name in set(am.annotations['set']):
|
|
|
+ conversations_anns = am.annotations[am.annotations['set'] == set_name]
|
|
|
available_vtc_anns = available_vtc_anns[
|
|
|
~available_vtc_anns['recording_filename'].isin(conversations_anns['recording_filename'])]
|
|
|
|
|
|
# Get the segments that are left
|
|
|
data = am.get_segments(available_vtc_anns)
|
|
|
+ return data
|
|
|
+
|
|
|
+def _compute_annotations(data_path, annotation_function):
|
|
|
+ """
|
|
|
+ Computes conversational annotations for the ChildProject in directory data_path
|
|
|
+ :param data_path: path to ChildProject dataset
|
|
|
+ :type data_path: str
|
|
|
+ :return: annotations
|
|
|
+ :rtype: pd.DataFrame
|
|
|
+ """
|
|
|
+
|
|
|
+ data = get_available_segments(data_path, set_name='conversations')
|
|
|
if not len(data):
|
|
|
return pd.DataFrame()
|
|
|
|
|
|
data = data[~data['speaker_type'].isnull()]
|
|
|
|
|
|
- interactional_sequences = []
|
|
|
+ annotations = []
|
|
|
data_grouped = data.groupby('recording_filename')
|
|
|
+
|
|
|
for data_grouped_name, data_grouped_line in data_grouped:
|
|
|
- df_interactional_sequences = uru_conversations.get_interactional_sequences(data_grouped_line).to_dataframe()
|
|
|
- interactional_sequences.append(df_interactional_sequences)
|
|
|
+ df_annotations = annotation_function(recording_filename=data_grouped_name, segments=data_grouped_line,
|
|
|
+ project_path = data_path)
|
|
|
+ annotations.append(df_annotations)
|
|
|
|
|
|
- output = pd.concat(interactional_sequences)
|
|
|
+ output = pd.concat(annotations, axis=0)
|
|
|
return output
|
|
|
|
|
|
|
|
|
-def save_annotations(save_path, annotations):
|
|
|
+def save_annotations(save_path, annotations, annotation_type):
|
|
|
"""
|
|
|
Save the computed annotations
|
|
|
:param save_path: path where to save the annotations
|
|
@@ -85,9 +96,9 @@ def save_annotations(save_path, annotations):
|
|
|
|
|
|
annotations_grouped = annotations.groupby('raw_filename')
|
|
|
for annotation_group_name, annotation_group_data in annotations_grouped:
|
|
|
- output_filename = 'CONV_{}'.format(annotation_group_name.replace('.rttm', ''))
|
|
|
+ output_filename = '{}'.format(annotation_group_name.replace('.rttm', '_{}.csv'.format(annotation_type.upper())))
|
|
|
|
|
|
- full_save_path = os.path.join(save_path, '{}.csv'.format(output_filename))
|
|
|
+ full_save_path = os.path.join(save_path, output_filename)
|
|
|
if os.path.exists(full_save_path):
|
|
|
logger.warning('File {} already exists! If you want to recompute annotations for this file, '
|
|
|
'please delete it first!'.format(full_save_path))
|
|
@@ -112,48 +123,58 @@ def save_annotations(save_path, annotations):
|
|
|
logger.info('Saved to {}.'.format(full_save_path))
|
|
|
|
|
|
|
|
|
-def main(save_path):
|
|
|
- data_path = '.'
|
|
|
+def main(annotation_type, save_path, unknown_args):
|
|
|
+ data_path = ''#
|
|
|
|
|
|
# Check if running the script from the root of the data set
|
|
|
expected_annotation_path = os.path.join(data_path, 'annotations')
|
|
|
+ expected_recordings_path = os.path.join(data_path, 'recordings')
|
|
|
|
|
|
- assert os.path.exists(expected_annotation_path), \
|
|
|
- ValueError('Expected annotation ({}) path not found. Are you sure to be running this '
|
|
|
- 'command from the root of the data set?'.format(expected_annotation_path))
|
|
|
+ assert os.path.exists(expected_annotation_path) and os.path.exists(expected_recordings_path), \
|
|
|
+ ValueError('Expected annotation ({}) or recording path ({}) not found. Are you sure to be running this '
|
|
|
+ 'command from the root of the data set?'.format(expected_annotation_path, expected_recordings_path))
|
|
|
|
|
|
assert os.path.exists(os.path.abspath(save_path)), IOError('Path {} does not exist!'.format(save_path))
|
|
|
|
|
|
- annotations = compute_turn_annotations(data_path)
|
|
|
+ assert hasattr(annotations_functions, '{}_annotations'.format(annotation_type.lower())), \
|
|
|
+ ValueError('Annotation function {}_annotations not found.'.format(annotation_type.lower()))
|
|
|
+
|
|
|
+ annotation_function = getattr(annotations_functions, '{}_annotations'.format(annotation_type.lower()))
|
|
|
+ annotations = _compute_annotations(data_path,
|
|
|
+ annotation_function=_annotation_function_wrapper(
|
|
|
+ func=annotation_function,
|
|
|
+ parser_args=unknown_args))
|
|
|
|
|
|
if not len(annotations):
|
|
|
logger.warning('Apparently nothing needs to be computed!')
|
|
|
return
|
|
|
|
|
|
- save_annotations(save_path, annotations)
|
|
|
+ save_annotations(save_path, annotations, annotation_type)
|
|
|
|
|
|
|
|
|
def _parse_args(argv):
|
|
|
import argparse
|
|
|
|
|
|
parser = argparse.ArgumentParser(description='Compute acoustic annotations.')
|
|
|
+ parser.add_argument('--annotation-type', required=True,
|
|
|
+ help='Which type of annotations should be computed.')
|
|
|
parser.add_argument('--save-path', required=True,
|
|
|
help='Path were the annotations should be saved.')
|
|
|
- args = parser.parse_args(argv)
|
|
|
+ args, unknown_args = parser.parse_known_args(argv)
|
|
|
|
|
|
- return vars(args)
|
|
|
+ return vars(args), unknown_args
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
import sys
|
|
|
|
|
|
pgrm_name, argv = sys.argv[0], sys.argv[1:]
|
|
|
- args = _parse_args(argv)
|
|
|
+ args, unknown_args = _parse_args(argv)
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
try:
|
|
|
- main(**args)
|
|
|
+ main(unknown_args=unknown_args, **args)
|
|
|
sys.exit(0)
|
|
|
except Exception as e:
|
|
|
logger.exception(e)
|