LAAC-LSCP
/
URUMETRICS-CODE


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
							"""
This file will test the differents modules in compute_annotations folder
You  must have the pip package pytest installed
"""
import os
import sys
import pytest
import shutil
import pandas as pd

from ChildProject.projects import ChildProject
from ChildProject.annotations import AnnotationManager

fpath = os.path.join(os.path.dirname(__file__),'..', 'compute_annotations')
sys.path.append(fpath)

try:
    sys.modules.pop('utils') #need to do this to avoid utils conflict of importation
except:
    pass

import compute_annotations.utils as ca_utils
import compute_annotations.annotations_functions as af
import compute_annotations.compute_derived_annotations as cda
import compute_annotations.conversational_settings as cs
import compute_annotations.utils_annotations as uan
import compute_annotations.utils_audio as uau

sys.path.remove(fpath)

DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it

def set_up_dataset():
    if os.path.exists(TEST_DATASET_PATH):
        shutil.rmtree(TEST_DATASET_PATH)
    shutil.copytree(DATASET_PATH, TEST_DATASET_PATH)  

################ utils ###################

#no test, decorators

##########################################

######## annotations_functions ###########

#wrapper around conversations.get_interactional_sequences
#def test_conversations_annotations():
#    pass

# no test for now, see utils_annotations section
#def test_acoustic_annotations():
#    pass

##########################################

##### compute_derived_annotations ########
    
@pytest.mark.parametrize('rec_av,remove,rec_rm,empty',[
    (False, False, False, True), #rec not needed, annot already there, so should be empty
    (False, True, False, False), #rec not needed, annot removed, so should return segments
    (True, True, False, False), #rec needed and present, annot removed, so should return segments
    (True, True, True, True), #rec needed but absent, should be empty
     ])
def test_get_available_segments(rec_av,remove,rec_rm, empty):
    set_up_dataset()
    if rec_rm : os.remove(os.path.join(TEST_DATASET_PATH,'recordings','raw','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav'))
    if remove :
        AnnotationManager(ChildProject(TEST_DATASET_PATH)).remove_set('acoustic')
        
    t_segments = pd.read_csv(os.path.join(TEST_DATASET_PATH,'annotations','vtc','converted','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'))
    
    df = cda.get_available_segments(TEST_DATASET_PATH, 'acoustic', ['vtc'],rec_av)
    print(df.columns)
    print(df.dtypes)
    if not empty:
        pd.testing.assert_frame_equal(t_segments,df[['segment_onset','segment_offset','raw_filename','speaker_type']],check_like=True)
    else:
        assert df.shape[0] == 0
        
#dependent on _get_available_segments
#avoid testing acoustic as we don't have a piece of audio to analyze
@pytest.mark.parametrize('set_name,function',[
    ('conversations', af.conversations_annotations), 
     ])     
def test_compute_annotations(set_name, function):
    set_up_dataset()
    to_drop = ['raw_filename',
                'set',
                'time_seek',
                'range_onset',
                'range_offset',
                'format',
                'filter',
                'annotation_filename',
                'imported_at',
                'package_version',
                'error',
                'merged_from',]
    
    am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
    
    truth = am.get_segments(am.annotations[am.annotations['set'] == set_name]).drop(columns=to_drop)
    
    truth = truth.astype(dtype={
            "segment_onset"      :   'Int64',
            "segment_offset"     :   'Int64',
            "speaker_type"       :   'string',
            "recording_filename" :   'string',
            "segment_duration"   :   'Int64',
            "unit_index"         :   'Int64',
            "inter_seq_index"    :   'Int64',
            "conv_turn_index"    :   'Int64',
            "fmt_inter_seq"      :   'string',
            "is_start_unit"      :   'boolean',
            "is_end_unit"        :   'boolean',
            "is_prompt_to"       :   'Int64',
            "is_response_to"     :   'Int64',
            "is_self_prompt_to"  :   'Int64',
            "is_self_response_to":   'Int64',
            })

    am.remove_set(set_name)
    
    df = cda._compute_annotations(TEST_DATASET_PATH,set_name,function,['vtc'],False).drop(columns=to_drop)
    df = df.astype(dtype=truth.dtypes)
    
    pd.testing.assert_frame_equal(truth.reset_index(drop=True),df.reset_index(drop=True), check_like=True)
    

def test_save_annotations():
    set_up_dataset()
    am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
    annots = am.annotations
    annots= annots[annots['set'] == 'acoustic']
    segs = am.get_segments(annots)
    
    path = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw')
    
    cda.save_annotations(path, segs, 'TEST')
    
    existing = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'ACOUSTIC_VTC_20220103.csv')
    new = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'TEST_ACOUSTIC_VTC_20220103.csv')
    
    pd.testing.assert_frame_equal(pd.read_csv(existing),pd.read_csv(new), check_like=True)
    

##########################################


######## conversational_settings #########
    
#just a list of settings to use for conversation
#no test to conduct

##########################################


########## utils_annotations #############

#acoustic annotation require the audio to be present
#to integrate with a proper audio used

#TODO find a suitable, short audio to put into
#the test folder, to run the analysis on

##########################################

############# utils_audio ################
    
#audio analysis tool
    
#TODO find a suitable, short audio to put into
#the test folder, to run the analysis on

##########################################