LAAC-LSCP
/
URUMETRICS-CODE


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
							"""
This file will test the differents modules in import_data
You  must have the pip package pytest installed
"""

import os
import sys
import pytest
import shutil
import pandas as pd

fpath = os.path.join(os.path.dirname(__file__),'..', 'import_data')
sys.path.append(fpath)

#from import_data.utils import get_raw_filename
#from utils import get_raw_filename
try:
    sys.modules.pop('utils') #need to do this to avoid utils conflict of importation
except:
    pass

import utils as id_utils
import prepare_data_set as pds
import import_recordings as ir
import import_annotations as ia
import custom_converters as cc


CHILDREN_FILE = os.path.join('dataset-test','metadata','children.csv')
DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
RECS_PATH = os.path.join(DATASET_PATH,'metadata','recordings.csv')
CHILDREN_PATH = os.path.join(DATASET_PATH,'metadata','children.csv')

NEW_DATASET_PATH = os.path.join('tests','new_dataset') #dataset created by test from scratch

NON_EXISTING_PATH = 'non_existing'
TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it
TEST_RECS_META = os.path.join(TEST_DATASET_PATH,'metadata','recordings.csv')
TEST_RECS_PATH = os.path.join(TEST_DATASET_PATH,'recordings','raw')
TEST_CHI_META = os.path.join(TEST_DATASET_PATH,'metadata','children.csv')
INPUT_RECS = os.path.join('tests','data','recs')

def set_up_dataset():
    if os.path.exists(TEST_DATASET_PATH):
        print('exists')
        shutil.rmtree(TEST_DATASET_PATH)
    shutil.copytree(DATASET_PATH, TEST_DATASET_PATH)
    
    
################### utils ################

@pytest.mark.parametrize('file,result',
    [(CHILDREN_FILE, "children"),])
def test_get_raw_filename(file, result):
    assert id_utils.get_raw_filename(file) == result
    #assert get_raw_filename(file) == result
    
@pytest.mark.parametrize('path,ext,full_path,result',
    [(DATASET_PATH, ["csv","rttm"], True,
      sorted([os.path.join(DATASET_PATH,'annotations/vtc/raw/VTC_20220103.rttm'),
              os.path.join(DATASET_PATH,'annotations/vtc/raw/VTC_20220124.rttm'),
              os.path.join(DATASET_PATH,'metadata/annotations.csv'),
              os.path.join(DATASET_PATH,'metadata/children.csv'),
              os.path.join(DATASET_PATH,'metadata/recordings.csv'),
              os.path.join(DATASET_PATH,'annotations/vtc/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'),
              os.path.join(DATASET_PATH,'annotations/alice/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'),
              os.path.join(DATASET_PATH,'annotations/vcm/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'),
              ])),
     (DATASET_PATH, ["csv","rttm"], False,
      sorted(['annotations/vtc/raw/VTC_20220103.rttm',
              'annotations/vtc/raw/VTC_20220124.rttm',
              'metadata/annotations.csv',
              'metadata/children.csv',
              'metadata/recordings.csv',
              'annotations/vtc/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv',
              'annotations/alice/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv',
              'annotations/vcm/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv',
              ])),
     (os.path.join(DATASET_PATH,'metadata'), [], False,
      sorted(['annotations.csv',
              'children.csv',
              'recordings.csv',
              'readme.md',
              ])),
     (NON_EXISTING_PATH, ["csv","rttm"], False,
      []),
     (DATASET_PATH, "invented", True,
      []),
      ])
def test_walk_dir(path, ext, full_path, result):
    r = id_utils.walk_dir(path, ext, full_path)
    print(r)
    assert r == result
    
##########################################
    
############# prepare_data_set ###########

#tree when creating a new empty dataset from the NEW_DATASET_PATH var
CREATION_TREE = [(NEW_DATASET_PATH, ['metadata', 'extra', 'annotations', 'recordings'], []),
                 (os.path.join(NEW_DATASET_PATH,'metadata'), [], ['.gitkeep']),
                 (os.path.join(NEW_DATASET_PATH,'extra'), [], ['.gitkeep']),
                 (os.path.join(NEW_DATASET_PATH,'annotations'), ['vtc', 'acoustic', 'vcm', 'conversations', 'alice'], ['.gitkeep']),            
                 (os.path.join(NEW_DATASET_PATH,'annotations/vtc'), ['raw'], []),
                 (os.path.join(NEW_DATASET_PATH,'annotations/vtc/raw'), [], ['.gitkeep']),
                 (os.path.join(NEW_DATASET_PATH,'annotations/acoustic'), ['raw'], []), 
                 (os.path.join(NEW_DATASET_PATH,'annotations/acoustic/raw'), [], ['.gitkeep']),
                 (os.path.join(NEW_DATASET_PATH,'annotations/vcm'), ['raw'], []),
                 (os.path.join(NEW_DATASET_PATH,'annotations/vcm/raw'), [], ['.gitkeep']),
                 (os.path.join(NEW_DATASET_PATH,'annotations/conversations'), ['raw'], []),
                 (os.path.join(NEW_DATASET_PATH,'annotations/conversations/raw'),[], ['.gitkeep']),
                 (os.path.join(NEW_DATASET_PATH,'annotations/alice'), ['raw'], []),
                 (os.path.join(NEW_DATASET_PATH,'annotations/alice/raw'), [], ['.gitkeep']),
                 (os.path.join(NEW_DATASET_PATH,'recordings'), ['raw'], []),
                 (os.path.join(NEW_DATASET_PATH,'recordings/raw'), [], ['.gitkeep']),
                 ]

def test_create_child_project_directories():
    pds.create_child_project_directories(NEW_DATASET_PATH)
    tree = list(os.walk(NEW_DATASET_PATH))
    shutil.rmtree(NEW_DATASET_PATH)
    
    assert tree == CREATION_TREE
    
##########################################
    
########## import_recordings #############

@pytest.mark.parametrize('path,result',
    [(RECS_PATH, pd.read_csv(RECS_PATH)),
     (NON_EXISTING_PATH, pd.DataFrame(columns = ['experiment', 'experiment_stage', 'child_id', 'date_iso', 'start_time',
                   'recording_device_type', 'recording_filename', 'session_id'])),])  
def test_get_recordings(path,result):
    recs = ir._get_recordings(path)
    pd.testing.assert_frame_equal(recs,result)
    
    
@pytest.mark.parametrize('path,result',
    [(CHILDREN_PATH, pd.read_csv(CHILDREN_PATH)),
     (NON_EXISTING_PATH, pd.DataFrame(columns = ['experiment', 'child_id', 'child_dob'])),])  
def test_get_children(path,result):
    childn = ir._get_children(path)
    pd.testing.assert_frame_equal(childn,result)


INCORRECT_RECS_PATH = os.path.join('test','data','incorrect-recs')
@pytest.mark.parametrize('path,rec,result',
    [(RECS_PATH, "14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav", {'experiment': 'test', 'experiment_stage': 'Audio-1-familia-14T-CP-2020-02', 'child_id': 'chi_14T', 'date_iso': '2022-01-03', 'start_time': '00:00:00', 'recording_device_type': 'lena', 'recording_filename': '14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav', 'session_id': 'chi_14T_20220103', 'duration': 0, 'imported_at': '2022-10-26 14:49:10'}),
     (INCORRECT_RECS_PATH, "14T_Audio-1-familia-14T-CP-2020-02_20220103-000000.wav", False),
     (INCORRECT_RECS_PATH, "14T_Audio-1-familia-14T-CP-2020-02_20221403_000000.wav", False),
     (INCORRECT_RECS_PATH, "14T_Audio-1-familia-14T-CP-2020-02_20220103_520000.wav", False),])   
def test_build_recording_metadata(path, rec, result):
    metadata = ir._build_recording_metadata(path,rec,'test', 'lena')
    if result:
        result['imported_at'] = metadata['imported_at'] #forced to get the imported at value for consistency
    assert metadata == result
    
    
#dependent on _get_recordings - _build_recording_metadata
def test_import_recordings():
    truth = os.path.join('tests','data','truth','new_recs_import.csv')
    set_up_dataset()
    
    #copy a new rec into the dataset
    shutil.copy2(os.path.join(INPUT_RECS,'23T_Audio-3-familia-23T-lectura-01_20220124_000000.wav'),TEST_RECS_PATH)
    #os.path.join(TEST_RECS_PATH,'23T_Audio-3-familia-23T-lectura-01_20220124_000000.wav'))
    ir.import_recordings(TEST_DATASET_PATH,'test','lena')
    
    pd.testing.assert_frame_equal(pd.read_csv(TEST_RECS_META).drop(columns=['imported_at']),pd.read_csv(truth).drop(columns=['imported_at']),check_like=True)
    
    
#dependent on _get_recordings - _get_children - import_recordings
def test_import_children():
    truth = os.path.join('tests','data','truth','new_children_import.csv')
    set_up_dataset()
    
    #copy a new rec into the dataset
    shutil.copy2(os.path.join(INPUT_RECS,'23T_Audio-3-familia-23T-lectura-01_20220124_000000.wav'),TEST_RECS_PATH) 
    ir.import_recordings(TEST_DATASET_PATH,'test','lena')
    
    #now import the child info
    ir.import_children(TEST_DATASET_PATH, 'test')
    
    pd.testing.assert_frame_equal(pd.read_csv(TEST_CHI_META),pd.read_csv(truth),check_like=True)
    
#COMMENTED OUT : as of now, is just import_recordings followed by import_children, so already tested  
#dependent on import_recordings - import_children
#def test_data_importation():
#    truth_r = os.path.join('tests','data','truth','new_recs_import.csv')
#    truth_c = os.path.join('tests','data','truth','new_children_import.csv')
#    
#    #copy a new rec into the dataset
#    shutil.copy2(os.path.join(INPUT_RECS,'23T_Audio-3-familia-23T-lectura-01_20220124_000000.wav'),TEST_RECS_PATH) 
#    
#    ir.test_data_importation(TEST_DATASET_PATH,'test','lena')
#    
#    pd.testing.assert_frame_equal(pd.read_csv(TEST_RECS_META).drop(columns=['imported_at']),pd.read_csv(truth_r).drop(columns=['imported_at']),check_like=True)
#    pd.testing.assert_frame_equal(pd.read_csv(TEST_CHI_META),pd.read_csv(truth_c),check_like=True)
    
#####################################################
    
################# import_annotations ################
    
    
def test_filter_missing_annotation_files():
    pass

def test_check_importation():
    pass

def test_ia_get_recordings():
    pass

def test_build_raw_filename():
    pass

def test_import_annotation():
    pass
    
def test_import_annotations():
    pass

###################################################
    

############## custom_converters ##################
    
def test_filteredCsvConverter():
    pass
    

###################################################