Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

test_computeAnnotations.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. """
  2. This file will test the differents modules in compute_annotations folder
  3. You must have the pip package pytest installed
  4. """
  5. import os
  6. import sys
  7. import pytest
  8. import shutil
  9. import pandas as pd
  10. from ChildProject.projects import ChildProject
  11. from ChildProject.annotations import AnnotationManager
  12. fpath = os.path.join(os.path.dirname(__file__),'..', 'compute_annotations')
  13. sys.path.append(fpath)
  14. try:
  15. sys.modules.pop('utils') #need to do this to avoid utils conflict of importation
  16. except:
  17. pass
  18. import compute_annotations.utils as ca_utils
  19. import compute_annotations.annotations_functions as af
  20. import compute_annotations.compute_derived_annotations as cda
  21. import compute_annotations.conversational_settings as cs
  22. import compute_annotations.utils_annotations as uan
  23. import compute_annotations.utils_audio as uau
  24. sys.path.remove(fpath)
  25. DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
  26. TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it
  27. def set_up_dataset():
  28. if os.path.exists(TEST_DATASET_PATH):
  29. shutil.rmtree(TEST_DATASET_PATH)
  30. shutil.copytree(DATASET_PATH, TEST_DATASET_PATH)
  31. ################ utils ###################
  32. #no test, decorators
  33. ##########################################
  34. ######## annotations_functions ###########
  35. #wrapper around conversations.get_interactional_sequences
  36. #def test_conversations_annotations():
  37. # pass
  38. # no test for now, see utils_annotations section
  39. #def test_acoustic_annotations():
  40. # pass
  41. ##########################################
  42. ##### compute_derived_annotations ########
  43. @pytest.mark.parametrize('rec_av,remove,rec_rm,empty',[
  44. (False, False, False, True), #rec not needed, annot already there, so should be empty
  45. (False, True, False, False), #rec not needed, annot removed, so should return segments
  46. (True, True, False, False), #rec needed and present, annot removed, so should return segments
  47. (True, True, True, True), #rec needed but absent, should be empty
  48. ])
  49. def test_get_available_segments(rec_av,remove,rec_rm, empty):
  50. set_up_dataset()
  51. if rec_rm : os.remove(os.path.join(TEST_DATASET_PATH,'recordings','raw','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav'))
  52. if remove :
  53. AnnotationManager(ChildProject(TEST_DATASET_PATH)).remove_set('acoustic')
  54. t_segments = pd.read_csv(os.path.join(TEST_DATASET_PATH,'annotations','vtc','converted','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'))
  55. df = cda.get_available_segments(TEST_DATASET_PATH, 'acoustic', ['vtc'],rec_av)
  56. print(df.columns)
  57. print(df.dtypes)
  58. if not empty:
  59. pd.testing.assert_frame_equal(t_segments,df[['segment_onset','segment_offset','raw_filename','speaker_type']],check_like=True)
  60. else:
  61. assert df.shape[0] == 0
  62. #dependent on _get_available_segments
  63. #avoid testing acoustic as we don't have a piece of audio to analyze
  64. @pytest.mark.parametrize('set_name,function',[
  65. ('conversations', af.conversations_annotations),
  66. ])
  67. def test_compute_annotations(set_name, function):
  68. set_up_dataset()
  69. to_drop = ['raw_filename',
  70. 'set',
  71. 'time_seek',
  72. 'range_onset',
  73. 'range_offset',
  74. 'format',
  75. 'filter',
  76. 'annotation_filename',
  77. 'imported_at',
  78. 'package_version',
  79. 'error',
  80. 'merged_from',]
  81. am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
  82. truth = am.get_segments(am.annotations[am.annotations['set'] == set_name]).drop(columns=to_drop)
  83. truth = truth.astype(dtype={
  84. "segment_onset" : 'Int64',
  85. "segment_offset" : 'Int64',
  86. "speaker_type" : 'string',
  87. "recording_filename" : 'string',
  88. "segment_duration" : 'Int64',
  89. "unit_index" : 'Int64',
  90. "inter_seq_index" : 'Int64',
  91. "conv_turn_index" : 'Int64',
  92. "fmt_inter_seq" : 'string',
  93. "is_start_unit" : 'boolean',
  94. "is_end_unit" : 'boolean',
  95. "is_prompt_to" : 'Int64',
  96. "is_response_to" : 'Int64',
  97. "is_self_prompt_to" : 'Int64',
  98. "is_self_response_to": 'Int64',
  99. })
  100. am.remove_set(set_name)
  101. df = cda._compute_annotations(TEST_DATASET_PATH,set_name,function,['vtc'],False).drop(columns=to_drop)
  102. df = df.astype(dtype=truth.dtypes)
  103. pd.testing.assert_frame_equal(truth.reset_index(drop=True),df.reset_index(drop=True), check_like=True)
  104. def test_save_annotations():
  105. set_up_dataset()
  106. am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
  107. annots = am.annotations
  108. annots= annots[annots['set'] == 'acoustic']
  109. segs = am.get_segments(annots)
  110. path = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw')
  111. cda.save_annotations(path, segs, 'TEST')
  112. existing = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'ACOUSTIC_VTC_20220103.csv')
  113. new = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'TEST_ACOUSTIC_VTC_20220103.csv')
  114. pd.testing.assert_frame_equal(pd.read_csv(existing),pd.read_csv(new), check_like=True)
  115. ##########################################
  116. ######## conversational_settings #########
  117. #just a list of settings to use for conversation
  118. #no test to conduct
  119. ##########################################
  120. ########## utils_annotations #############
  121. #acoustic annotation require the audio to be present
  122. #to integrate with a proper audio used
  123. #TODO find a suitable, short audio to put into
  124. #the test folder, to run the analysis on
  125. ##########################################
  126. ############# utils_audio ################
  127. #audio analysis tool
  128. #TODO find a suitable, short audio to put into
  129. #the test folder, to run the analysis on
  130. ##########################################