""" This file will test the differents modules in compute_annotations folder You must have the pip package pytest installed """ import os import sys import pytest import shutil import pandas as pd from ChildProject.projects import ChildProject from ChildProject.annotations import AnnotationManager fpath = os.path.join(os.path.dirname(__file__),'..', 'compute_annotations') sys.path.append(fpath) try: sys.modules.pop('utils') #need to do this to avoid utils conflict of importation except: pass import compute_annotations.utils as ca_utils import compute_annotations.annotations_functions as af import compute_annotations.compute_derived_annotations as cda import compute_annotations.conversational_settings as cs import compute_annotations.utils_annotations as uan import compute_annotations.utils_audio as uau sys.path.remove(fpath) DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it def set_up_dataset(): if os.path.exists(TEST_DATASET_PATH): shutil.rmtree(TEST_DATASET_PATH) shutil.copytree(DATASET_PATH, TEST_DATASET_PATH) ################ utils ################### #no test, decorators ########################################## ######## annotations_functions ########### #wrapper around conversations.get_interactional_sequences #def test_conversations_annotations(): # pass # no test for now, see utils_annotations section #def test_acoustic_annotations(): # pass ########################################## ##### compute_derived_annotations ######## @pytest.mark.parametrize('rec_av,remove,rec_rm,empty',[ (False, False, False, True), #rec not needed, annot already there, so should be empty (False, True, False, False), #rec not needed, annot removed, so should return segments (True, True, False, False), #rec needed and present, annot removed, so should return segments (True, True, True, True), #rec needed but absent, should be empty ]) def test_get_available_segments(rec_av,remove,rec_rm, empty): set_up_dataset() if rec_rm : os.remove(os.path.join(TEST_DATASET_PATH,'recordings','raw','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav')) if remove : AnnotationManager(ChildProject(TEST_DATASET_PATH)).remove_set('acoustic') t_segments = pd.read_csv(os.path.join(TEST_DATASET_PATH,'annotations','vtc','converted','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv')) df = cda.get_available_segments(TEST_DATASET_PATH, 'acoustic', ['vtc'],rec_av) print(df.columns) print(df.dtypes) if not empty: pd.testing.assert_frame_equal(t_segments,df[['segment_onset','segment_offset','raw_filename','speaker_type']],check_like=True) else: assert df.shape[0] == 0 #dependent on _get_available_segments #avoid testing acoustic as we don't have a piece of audio to analyze @pytest.mark.parametrize('set_name,function',[ ('conversations', af.conversations_annotations), ]) def test_compute_annotations(set_name, function): set_up_dataset() to_drop = ['raw_filename', 'set', 'time_seek', 'range_onset', 'range_offset', 'format', 'filter', 'annotation_filename', 'imported_at', 'package_version', 'error', 'merged_from',] am = AnnotationManager(ChildProject(TEST_DATASET_PATH)) truth = am.get_segments(am.annotations[am.annotations['set'] == set_name]).drop(columns=to_drop) truth = truth.astype(dtype={ "segment_onset" : 'Int64', "segment_offset" : 'Int64', "speaker_type" : 'string', "recording_filename" : 'string', "segment_duration" : 'Int64', "unit_index" : 'Int64', "inter_seq_index" : 'Int64', "conv_turn_index" : 'Int64', "fmt_inter_seq" : 'string', "is_start_unit" : 'boolean', "is_end_unit" : 'boolean', "is_prompt_to" : 'Int64', "is_response_to" : 'Int64', "is_self_prompt_to" : 'Int64', "is_self_response_to": 'Int64', }) am.remove_set(set_name) df = cda._compute_annotations(TEST_DATASET_PATH,set_name,function,['vtc'],False).drop(columns=to_drop) df = df.astype(dtype=truth.dtypes) pd.testing.assert_frame_equal(truth.reset_index(drop=True),df.reset_index(drop=True), check_like=True) def test_save_annotations(): set_up_dataset() am = AnnotationManager(ChildProject(TEST_DATASET_PATH)) annots = am.annotations annots= annots[annots['set'] == 'acoustic'] segs = am.get_segments(annots) path = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw') cda.save_annotations(path, segs, 'TEST') existing = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'ACOUSTIC_VTC_20220103.csv') new = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'TEST_ACOUSTIC_VTC_20220103.csv') pd.testing.assert_frame_equal(pd.read_csv(existing),pd.read_csv(new), check_like=True) ########################################## ######## conversational_settings ######### #just a list of settings to use for conversation #no test to conduct ########################################## ########## utils_annotations ############# #acoustic annotation require the audio to be present #to integrate with a proper audio used #TODO find a suitable, short audio to put into #the test folder, to run the analysis on ########################################## ############# utils_audio ################ #audio analysis tool #TODO find a suitable, short audio to put into #the test folder, to run the analysis on ##########################################