LAAC-LSCP
/
URUMETRICS-CODE


			
			
				
					
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
							"""
This file will test the differents modules in generate_messages folder
You  must have the pip package pytest installed
"""

import os
import sys
import pytest
import shutil
import pandas as pd
import yaml
import datetime

fpath = os.path.join(os.path.dirname(__file__),'..', 'generate_messages')
sys.path.append(fpath)

import generate_messages.messages as msg

TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it
DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it

YAML_PATH = os.path.join(DATASET_PATH, 'extra','messages','definition','metrics_messages.yaml')
with open(YAML_PATH, 'r') as in_yaml:
        YAML = yaml.load(in_yaml, Loader=yaml.FullLoader)
    
def set_up_dataset():
    if os.path.exists(TEST_DATASET_PATH):
        shutil.rmtree(TEST_DATASET_PATH)
    shutil.copytree(DATASET_PATH, TEST_DATASET_PATH) 

########## generate_messages #############
    
list_of_metrics_label = ['avg_cry_voc_dur_chi', 'avg_non_can_voc_dur_chi', 'non_can_voc_chi_ph', 'voc_chi_ph', 'mean_pitch_range_chi', 'avg_pr_pm_fem', 'mlus_fem', 'avg_can_voc_dur_chi', 'sc_fem_ph', 'chi_adu_turn_transitions', 'mean_mean_pitch_fem', 'avg_voc_dur_fem', 'lp_n', 'voc_dur_fem_ph', 'cp_n', 'cry_voc_dur_chi_ph', 'duration_acoustic', 'cp_dur', 'mean_pitch_range_fem', 'wc_fem_ph', 'duration_vcm', 'avg_voc_dur_chi', 'mean_mean_pitch_chi', 'voc_dur_chi_ph', 'voc_fem_ph', 'mluw_fem', 'can_voc_chi_ph', 'mlup_fem', 'duration_alice_vtc', 'avg_sr_pm_fem', 'lp_dur', 'duration_conversations', 'duration_vtc', 'avg_wr_pm_fem', 'can_voc_dur_chi_ph', 'pc_fem_ph', 'cry_voc_chi_ph', 'non_can_voc_dur_chi_ph']

def test_get_metrics():
    met_rec, cols = msg.get_metrics(DATASET_PATH, os.path.join(DATASET_PATH,'extra','metrics','metrics.csv'))
    
    assert sorted(cols) == sorted(list_of_metrics_label)
    pd.testing.assert_frame_equal(met_rec, pd.read_csv(os.path.join('tests','data','truth','get_metrics.csv')))

 
@pytest.mark.parametrize('key,yml,date,evol,truth',[
    ([False, False], YAML,'20221101', [('chi_adu_turn_transitions', 0.0, False), ('voc_chi_ph', -6.0, False)],'Gracias por enviarnos el audio. Qué bueno que hayas tomado un rato para conversar con tu hijo/a. Comunicate con <<nombre-bebe>> en todas las oportunidades que tengas – esto le hará muy bien ahora y en el futuro. La próxima vez probá practicar que haya más ida y vuelta entre ustedes, por ejemplo, haciéndole preguntas a <<nombre-bebe>>.'), 
    ([False, True], YAML,'20221118', [('voc_chi_ph', 0.0, False), ('chi_adu_turn_transitions', 6.0, True)],'En tu último audio, hubo más ida y vuelta entre ustedes. ¡Muy bien! La próxima vez probá dejar más espacio para que <<nombre-bebe>> hable, por ejemplo, haciéndole preguntas. Comunicate con <<nombre-bebe>> en todas las oportunidades que tengas – le ayudarás a desarrollar más su capacidad de expresarse.'), 
    ([True, True], YAML,'20221114', [('chi_adu_turn_transitions', 3.0, True), ('voc_chi_ph', 6.0, True)],'En tu último audio, hubo más ida y vuelta entre ustedes y <<nombre-bebe>> habló más que en el audio anterior. ¡Eso es excelente! Comunicate con <<nombre-bebe>> en todas las oportunidades que tengas – estarás contribuyendo a desarrollar más su cerebro.'), 
    ([True, False], YAML,'20221201', [('voc_chi_ph', 9.0, True), ('chi_adu_turn_transitions', -6.0, False)],'En tu último audio, <<nombre-bebe>> habló más que en el audio anterior. ¡Muy bien! La próxima vez probá practicar que haya más ida y vuelta entre ustedes, por ejemplo, haciéndole preguntas a <<nombre-bebe>>. Comunicate con <<nombre-bebe>> en todas las oportunidades que tengas – le ayudarás a estimular sus habilidades sociales.'), 
     ])
def test_fill_template(key,yml,evol,truth, date):
    start = datetime.datetime.strptime(YAML['start_date'],'%Y-%m-%d')
    end = datetime.datetime.strptime(YAML['end_date'],'%Y-%m-%d')
    datet = datetime.datetime.strptime(date,'%Y%m%d')
    
    res = msg.fill_template(key,yml,evol,datet, start,end)
    #print(res)
    assert truth == res
  
#dependent on get_metrics - fill_template
@pytest.mark.parametrize('date,dateThrow',[
    ('20220117',False), 
    ('20220207',False), 
    ('2022-01-01',True),  
     ]) 
def test_build_messages(date, dateThrow):

    if dateThrow:
        with pytest.raises(ValueError):
            msg.build_messages(pd.DataFrame(),[],YAML_PATH,date) #wrong date format will throw ValueError
    else:
        metrics = pd.read_csv(os.path.join('tests' ,'data','mockup_messages','metrics.csv'))
        
        columns = list(set(metrics.columns) - set(['recording_filename', 'child_id']))
        
        recordings = pd.read_csv(os.path.join('tests', 'data', 'mockup_messages','recordings.csv'))
        
        metrics_recordings = pd.merge(metrics, recordings, on='recording_filename', suffixes=('', '_drop'))
        metrics_recordings.drop([col for col in metrics_recordings.columns if 'drop' in col], axis=1, inplace=True)
    
        # Handle file with the same child_id that have the same date -> keep the longest one
        metrics_recordings = (metrics_recordings.groupby(['child_id', 'date_iso'], as_index=False)
                              # Keep only the first segment for each candidate speaker
                              .apply(lambda rows: (rows.sort_values(by='start_time', ascending=False) # take last instead
                                                  .head(n=1))))
        
        messages = msg.build_messages(metrics_recordings, columns, YAML_PATH, date)
        truth = pd.read_csv(os.path.join('tests', 'data', 'mockup_messages', 'messages_{}.csv'.format(date)), sep=';')
        
        pd.testing.assert_frame_equal(messages, truth, check_like=True)
   

     
#dependent on build_messages - get_metrics - fill_template
def test_generate_messages():
    set_up_dataset()
    data_path = os.path.join('tests','data', 'mockup_messages')
    metadata_path = os.path.join(TEST_DATASET_PATH,'metadata')
    shutil.copyfile(os.path.join(data_path,'recordings.csv'), os.path.join(metadata_path, 'recordings.csv'))
    shutil.copyfile(os.path.join(data_path,'children.csv'), os.path.join(metadata_path,'children.csv'))
    shutil.copyfile(os.path.join(data_path,'annotations.csv'), os.path.join(metadata_path,'annotations.csv'))
    
    date1 = '20220117'  
    msg.generate_messages(TEST_DATASET_PATH, os.path.join(data_path,'metrics.csv'), YAML_PATH, date1)
    messages1 = pd.read_csv(os.path.join(TEST_DATASET_PATH, 'extra','messages','generated','messages_{}.csv'.format(date1)), sep=';')
    
    date2 = '20220207'
    msg.generate_messages(TEST_DATASET_PATH, os.path.join(data_path,'metrics.csv'), YAML_PATH, date2)
    messages2 = pd.read_csv(os.path.join(TEST_DATASET_PATH, 'extra','messages','generated','messages_{}.csv'.format(date2)), sep=';')
    
    truth1 = pd.read_csv(os.path.join('tests', 'data', 'mockup_messages', 'messages_{}.csv'.format(date1)), sep=';')
    
    truth2 = pd.read_csv(os.path.join('tests', 'data', 'mockup_messages', 'messages_{}.csv'.format(date2)), sep=';')
        
    pd.testing.assert_frame_equal(messages1, truth1, check_like=True)
    
    pd.testing.assert_frame_equal(messages2, truth2, check_like=True)
    
##########################################