123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177 |
- """
- This file will test the differents modules in compute_annotations folder
- You must have the pip package pytest installed
- """
- import os
- import sys
- import pytest
- import shutil
- import pandas as pd
- from ChildProject.projects import ChildProject
- from ChildProject.annotations import AnnotationManager
- fpath = os.path.join(os.path.dirname(__file__),'..', 'compute_annotations')
- sys.path.append(fpath)
- try:
- sys.modules.pop('utils') #need to do this to avoid utils conflict of importation
- except:
- pass
- import compute_annotations.utils as ca_utils
- import compute_annotations.annotations_functions as af
- import compute_annotations.compute_derived_annotations as cda
- import compute_annotations.conversational_settings as cs
- import compute_annotations.utils_annotations as uan
- import compute_annotations.utils_audio as uau
- sys.path.remove(fpath)
- DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
- TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it
- def set_up_dataset():
- if os.path.exists(TEST_DATASET_PATH):
- shutil.rmtree(TEST_DATASET_PATH)
- shutil.copytree(DATASET_PATH, TEST_DATASET_PATH)
- ################ utils ###################
- #no test, decorators
- ##########################################
- ######## annotations_functions ###########
- #wrapper around conversations.get_interactional_sequences
- #def test_conversations_annotations():
- # pass
- # no test for now, see utils_annotations section
- #def test_acoustic_annotations():
- # pass
- ##########################################
- ##### compute_derived_annotations ########
-
- @pytest.mark.parametrize('rec_av,remove,rec_rm,empty',[
- (False, False, False, True), #rec not needed, annot already there, so should be empty
- (False, True, False, False), #rec not needed, annot removed, so should return segments
- (True, True, False, False), #rec needed and present, annot removed, so should return segments
- (True, True, True, True), #rec needed but absent, should be empty
- ])
- def test_get_available_segments(rec_av,remove,rec_rm, empty):
- set_up_dataset()
- if rec_rm : os.remove(os.path.join(TEST_DATASET_PATH,'recordings','raw','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav'))
- if remove :
- AnnotationManager(ChildProject(TEST_DATASET_PATH)).remove_set('acoustic')
-
- t_segments = pd.read_csv(os.path.join(TEST_DATASET_PATH,'annotations','vtc','converted','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'))
-
- df = cda.get_available_segments(TEST_DATASET_PATH, 'acoustic', ['vtc'],rec_av)
- print(df.columns)
- print(df.dtypes)
- if not empty:
- pd.testing.assert_frame_equal(t_segments,df[['segment_onset','segment_offset','raw_filename','speaker_type']],check_like=True)
- else:
- assert df.shape[0] == 0
-
- #dependent on _get_available_segments
- #avoid testing acoustic as we don't have a piece of audio to analyze
- @pytest.mark.parametrize('set_name,function',[
- ('conversations', af.conversations_annotations),
- ])
- def test_compute_annotations(set_name, function):
- set_up_dataset()
- to_drop = ['raw_filename',
- 'set',
- 'time_seek',
- 'range_onset',
- 'range_offset',
- 'format',
- 'filter',
- 'annotation_filename',
- 'imported_at',
- 'package_version',
- 'error',
- 'merged_from',]
-
- am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
-
- truth = am.get_segments(am.annotations[am.annotations['set'] == set_name]).drop(columns=to_drop)
-
- truth = truth.astype(dtype={
- "segment_onset" : 'Int64',
- "segment_offset" : 'Int64',
- "speaker_type" : 'string',
- "recording_filename" : 'string',
- "segment_duration" : 'Int64',
- "unit_index" : 'Int64',
- "inter_seq_index" : 'Int64',
- "conv_turn_index" : 'Int64',
- "fmt_inter_seq" : 'string',
- "is_start_unit" : 'boolean',
- "is_end_unit" : 'boolean',
- "is_prompt_to" : 'Int64',
- "is_response_to" : 'Int64',
- "is_self_prompt_to" : 'Int64',
- "is_self_response_to": 'Int64',
- })
- am.remove_set(set_name)
-
- df = cda._compute_annotations(TEST_DATASET_PATH,set_name,function,['vtc'],False).drop(columns=to_drop)
- df = df.astype(dtype=truth.dtypes)
-
- pd.testing.assert_frame_equal(truth.reset_index(drop=True),df.reset_index(drop=True), check_like=True)
-
- def test_save_annotations():
- set_up_dataset()
- am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
- annots = am.annotations
- annots= annots[annots['set'] == 'acoustic']
- segs = am.get_segments(annots)
-
- path = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw')
-
- cda.save_annotations(path, segs, 'TEST')
-
- existing = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'ACOUSTIC_VTC_20220103.csv')
- new = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'TEST_ACOUSTIC_VTC_20220103.csv')
-
- pd.testing.assert_frame_equal(pd.read_csv(existing),pd.read_csv(new), check_like=True)
-
- ##########################################
- ######## conversational_settings #########
-
- #just a list of settings to use for conversation
- #no test to conduct
- ##########################################
- ########## utils_annotations #############
- #acoustic annotation require the audio to be present
- #to integrate with a proper audio used
- #TODO find a suitable, short audio to put into
- #the test folder, to run the analysis on
- ##########################################
- ############# utils_audio ################
-
- #audio analysis tool
-
- #TODO find a suitable, short audio to put into
- #the test folder, to run the analysis on
- ##########################################
-
|