test_importData.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. """
  2. This file will test the differents modules in import_data
  3. You must have the pip package pytest installed
  4. """
  5. import os
  6. import sys
  7. import pytest
  8. import shutil
  9. import pandas as pd
  10. fpath = os.path.join(os.path.dirname(__file__),'..', 'import_data')
  11. sys.path.append(fpath)
  12. #from import_data.utils import get_raw_filename
  13. #from utils import get_raw_filename
  14. try:
  15. sys.modules.pop('utils') #need to do this to avoid utils conflict of importation
  16. except:
  17. pass
  18. import utils as id_utils
  19. import prepare_data_set as pds
  20. import import_recordings as ir
  21. import import_annotations as ia
  22. import custom_converters as cc
  23. CHILDREN_FILE = os.path.join('dataset-test','metadata','children.csv')
  24. DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
  25. RECS_PATH = os.path.join(DATASET_PATH,'metadata','recordings.csv')
  26. CHILDREN_PATH = os.path.join(DATASET_PATH,'metadata','children.csv')
  27. NEW_DATASET_PATH = os.path.join('tests','new_dataset') #dataset created by test from scratch
  28. NON_EXISTING_PATH = 'non_existing'
  29. TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it
  30. TEST_RECS_META = os.path.join(TEST_DATASET_PATH,'metadata','recordings.csv')
  31. TEST_RECS_PATH = os.path.join(TEST_DATASET_PATH,'recordings','raw')
  32. TEST_CHI_META = os.path.join(TEST_DATASET_PATH,'metadata','children.csv')
  33. INPUT_RECS = os.path.join('tests','data','recs')
  34. def set_up_dataset():
  35. if os.path.exists(TEST_DATASET_PATH):
  36. print('exists')
  37. shutil.rmtree(TEST_DATASET_PATH)
  38. shutil.copytree(DATASET_PATH, TEST_DATASET_PATH)
  39. ################### utils ################
  40. @pytest.mark.parametrize('file,result',
  41. [(CHILDREN_FILE, "children"),])
  42. def test_get_raw_filename(file, result):
  43. assert id_utils.get_raw_filename(file) == result
  44. #assert get_raw_filename(file) == result
  45. @pytest.mark.parametrize('path,ext,full_path,result',
  46. [(DATASET_PATH, ["csv","rttm"], True,
  47. sorted([os.path.join(DATASET_PATH,'annotations/vtc/raw/VTC_20220103.rttm'),
  48. os.path.join(DATASET_PATH,'annotations/vtc/raw/VTC_20220124.rttm'),
  49. os.path.join(DATASET_PATH,'metadata/annotations.csv'),
  50. os.path.join(DATASET_PATH,'metadata/children.csv'),
  51. os.path.join(DATASET_PATH,'metadata/recordings.csv'),
  52. os.path.join(DATASET_PATH,'annotations/vtc/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'),
  53. os.path.join(DATASET_PATH,'annotations/alice/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'),
  54. os.path.join(DATASET_PATH,'annotations/vcm/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'),
  55. ])),
  56. (DATASET_PATH, ["csv","rttm"], False,
  57. sorted(['annotations/vtc/raw/VTC_20220103.rttm',
  58. 'annotations/vtc/raw/VTC_20220124.rttm',
  59. 'metadata/annotations.csv',
  60. 'metadata/children.csv',
  61. 'metadata/recordings.csv',
  62. 'annotations/vtc/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv',
  63. 'annotations/alice/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv',
  64. 'annotations/vcm/converted/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv',
  65. ])),
  66. (os.path.join(DATASET_PATH,'metadata'), [], False,
  67. sorted(['annotations.csv',
  68. 'children.csv',
  69. 'recordings.csv',
  70. 'readme.md',
  71. ])),
  72. (NON_EXISTING_PATH, ["csv","rttm"], False,
  73. []),
  74. (DATASET_PATH, "invented", True,
  75. []),
  76. ])
  77. def test_walk_dir(path, ext, full_path, result):
  78. r = id_utils.walk_dir(path, ext, full_path)
  79. print(r)
  80. assert r == result
  81. ##########################################
  82. ############# prepare_data_set ###########
  83. #tree when creating a new empty dataset from the NEW_DATASET_PATH var
  84. CREATION_TREE = [(NEW_DATASET_PATH, ['metadata', 'extra', 'annotations', 'recordings'], []),
  85. (os.path.join(NEW_DATASET_PATH,'metadata'), [], ['.gitkeep']),
  86. (os.path.join(NEW_DATASET_PATH,'extra'), [], ['.gitkeep']),
  87. (os.path.join(NEW_DATASET_PATH,'annotations'), ['vtc', 'acoustic', 'vcm', 'conversations', 'alice'], ['.gitkeep']),
  88. (os.path.join(NEW_DATASET_PATH,'annotations/vtc'), ['raw'], []),
  89. (os.path.join(NEW_DATASET_PATH,'annotations/vtc/raw'), [], ['.gitkeep']),
  90. (os.path.join(NEW_DATASET_PATH,'annotations/acoustic'), ['raw'], []),
  91. (os.path.join(NEW_DATASET_PATH,'annotations/acoustic/raw'), [], ['.gitkeep']),
  92. (os.path.join(NEW_DATASET_PATH,'annotations/vcm'), ['raw'], []),
  93. (os.path.join(NEW_DATASET_PATH,'annotations/vcm/raw'), [], ['.gitkeep']),
  94. (os.path.join(NEW_DATASET_PATH,'annotations/conversations'), ['raw'], []),
  95. (os.path.join(NEW_DATASET_PATH,'annotations/conversations/raw'),[], ['.gitkeep']),
  96. (os.path.join(NEW_DATASET_PATH,'annotations/alice'), ['raw'], []),
  97. (os.path.join(NEW_DATASET_PATH,'annotations/alice/raw'), [], ['.gitkeep']),
  98. (os.path.join(NEW_DATASET_PATH,'recordings'), ['raw'], []),
  99. (os.path.join(NEW_DATASET_PATH,'recordings/raw'), [], ['.gitkeep']),
  100. ]
  101. def test_create_child_project_directories():
  102. pds.create_child_project_directories(NEW_DATASET_PATH)
  103. tree = list(os.walk(NEW_DATASET_PATH))
  104. shutil.rmtree(NEW_DATASET_PATH)
  105. assert tree == CREATION_TREE
  106. ##########################################
  107. ########## import_recordings #############
  108. @pytest.mark.parametrize('path,result',
  109. [(RECS_PATH, pd.read_csv(RECS_PATH)),
  110. (NON_EXISTING_PATH, pd.DataFrame(columns = ['experiment', 'experiment_stage', 'child_id', 'date_iso', 'start_time',
  111. 'recording_device_type', 'recording_filename', 'session_id'])),])
  112. def test_get_recordings(path,result):
  113. recs = ir._get_recordings(path)
  114. pd.testing.assert_frame_equal(recs,result)
  115. @pytest.mark.parametrize('path,result',
  116. [(CHILDREN_PATH, pd.read_csv(CHILDREN_PATH)),
  117. (NON_EXISTING_PATH, pd.DataFrame(columns = ['experiment', 'child_id', 'child_dob'])),])
  118. def test_get_children(path,result):
  119. childn = ir._get_children(path)
  120. pd.testing.assert_frame_equal(childn,result)
  121. INCORRECT_RECS_PATH = os.path.join('test','data','incorrect-recs')
  122. @pytest.mark.parametrize('path,rec,result',
  123. [(RECS_PATH, "14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav", {'experiment': 'test', 'experiment_stage': 'Audio-1-familia-14T-CP-2020-02', 'child_id': 'chi_14T', 'date_iso': '2022-01-03', 'start_time': '00:00:00', 'recording_device_type': 'lena', 'recording_filename': '14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav', 'session_id': 'chi_14T_20220103', 'duration': 0, 'imported_at': '2022-10-26 14:49:10'}),
  124. (INCORRECT_RECS_PATH, "14T_Audio-1-familia-14T-CP-2020-02_20220103-000000.wav", False),
  125. (INCORRECT_RECS_PATH, "14T_Audio-1-familia-14T-CP-2020-02_20221403_000000.wav", False),
  126. (INCORRECT_RECS_PATH, "14T_Audio-1-familia-14T-CP-2020-02_20220103_520000.wav", False),])
  127. def test_build_recording_metadata(path, rec, result):
  128. metadata = ir._build_recording_metadata(path,rec,'test', 'lena')
  129. if result:
  130. result['imported_at'] = metadata['imported_at'] #forced to get the imported at value for consistency
  131. assert metadata == result
  132. #dependent on _get_recordings - _build_recording_metadata
  133. def test_import_recordings():
  134. truth = os.path.join('tests','data','truth','new_recs_import.csv')
  135. set_up_dataset()
  136. #copy a new rec into the dataset
  137. shutil.copy2(os.path.join(INPUT_RECS,'23T_Audio-3-familia-23T-lectura-01_20220124_000000.wav'),TEST_RECS_PATH)
  138. #os.path.join(TEST_RECS_PATH,'23T_Audio-3-familia-23T-lectura-01_20220124_000000.wav'))
  139. ir.import_recordings(TEST_DATASET_PATH,'test','lena')
  140. pd.testing.assert_frame_equal(pd.read_csv(TEST_RECS_META).drop(columns=['imported_at']),pd.read_csv(truth).drop(columns=['imported_at']),check_like=True)
  141. #dependent on _get_recordings - _get_children - import_recordings
  142. def test_import_children():
  143. truth = os.path.join('tests','data','truth','new_children_import.csv')
  144. set_up_dataset()
  145. #copy a new rec into the dataset
  146. shutil.copy2(os.path.join(INPUT_RECS,'23T_Audio-3-familia-23T-lectura-01_20220124_000000.wav'),TEST_RECS_PATH)
  147. ir.import_recordings(TEST_DATASET_PATH,'test','lena')
  148. #now import the child info
  149. ir.import_children(TEST_DATASET_PATH, 'test')
  150. pd.testing.assert_frame_equal(pd.read_csv(TEST_CHI_META),pd.read_csv(truth),check_like=True)
  151. #COMMENTED OUT : as of now, is just import_recordings followed by import_children, so already tested
  152. #dependent on import_recordings - import_children
  153. #def test_data_importation():
  154. # truth_r = os.path.join('tests','data','truth','new_recs_import.csv')
  155. # truth_c = os.path.join('tests','data','truth','new_children_import.csv')
  156. #
  157. # #copy a new rec into the dataset
  158. # shutil.copy2(os.path.join(INPUT_RECS,'23T_Audio-3-familia-23T-lectura-01_20220124_000000.wav'),TEST_RECS_PATH)
  159. #
  160. # ir.test_data_importation(TEST_DATASET_PATH,'test','lena')
  161. #
  162. # pd.testing.assert_frame_equal(pd.read_csv(TEST_RECS_META).drop(columns=['imported_at']),pd.read_csv(truth_r).drop(columns=['imported_at']),check_like=True)
  163. # pd.testing.assert_frame_equal(pd.read_csv(TEST_CHI_META),pd.read_csv(truth_c),check_like=True)
  164. #####################################################
  165. ################# import_annotations ################
  166. def test_filter_missing_annotation_files():
  167. pass
  168. def test_check_importation():
  169. pass
  170. def test_ia_get_recordings():
  171. pass
  172. def test_build_raw_filename():
  173. pass
  174. def test_import_annotation():
  175. pass
  176. def test_import_annotations():
  177. pass
  178. ###################################################
  179. ############## custom_converters ##################
  180. def test_filteredCsvConverter():
  181. pass
  182. ###################################################