Browse Source

tests for metrics and computed annotations

Loann Peurey 1 year ago
parent
commit
b22e84b20e

+ 1 - 0
.gitignore

@@ -169,3 +169,4 @@ Thumbs.db
 
 #test_dataset
 tests/test_dataset/
+tests/metrics.csv

+ 26 - 4
compute_annotations/compute_derived_annotations.py

@@ -37,6 +37,19 @@ def _annotation_function_wrapper(func, parser_args, **kwargs):
 
 
 def get_available_segments(project_path, set_name, base_sets, raw_recording_available=False):
+    """
+    Get the annotation segments that will be used to construct new annotations for a set.
+    This returns the segments in the sets used (base_sets) to compute the new annotations (for set_name).
+    This will exclude segments for which the annotations already exist in the target set.
+    :param project_path: path to the dataset
+    :type project_path: str
+    :param set_name: set to which we will add new annotation files. segments of base sets are excluded if they already have annotation files in the target set
+    :type set_name: str
+    :param base_sets: sets from which to get the segments
+    :type base_sets: list[str]
+    :param raw_recording_available: if True, exclude annotations for which the actual recording is not present (for when the process requires the audio)
+    :type raw_recording_available: bool
+    """
     project = ChildProject(project_path)
     am = AnnotationManager(project)
     am.read()
@@ -70,9 +83,17 @@ def get_available_segments(project_path, set_name, base_sets, raw_recording_avai
 
 def _compute_annotations(project_path, annotation_type, annotation_function, base_sets, raw_recording_available):
     """
-    Computes conversational annotations for the ChildProject in directory project_path
+    Computes annotations for the ChildProject in directory project_path, of a specific set, from a list of sets
     :param project_path: path to ChildProject dataset
     :type project_path: str
+    :param annotation_type: name of the set to compute for
+    :type annotation_type: str
+    :param annotation_function: callable that creates the annotations (stored in annotations_functions)
+    :type annotation_function: callable
+    :base_sets: sets that are required to compute the new annotations
+    :type base_sets: list[str]
+    :param raw_recording_available: is the actual recording file needed
+    :type raw_recording_available: bool
     :return: annotations
     :rtype: pd.DataFrame
     """
@@ -101,12 +122,12 @@ def _compute_annotations(project_path, annotation_type, annotation_function, bas
 def save_annotations(save_path, annotations, annotation_type):
     """
     Save the computed annotations
-    :param save_path: path where to save the annotations
+    :param save_path: path where to save the annotations (use annotation raw folder)
     :type save_path: str
-    :param save_name: name of the file
-    :type save_name: str
     :param annotations: annotations to be saved
     :type annotations: pd.DataFrame
+    :param annotation_type: annotation type, only used to name the raw file
+    :type annotation_type: str
     :return: None
     :rtype: None
     """
@@ -119,6 +140,7 @@ def save_annotations(save_path, annotations, annotation_type):
         if os.path.exists(full_save_path):
             logger.warning('File {} already exists! If you want to recompute annotations for this file, '
                            'please delete it first!'.format(full_save_path))
+            #TODO, currently outputs file until one exists and then fail, resulting in an unknown number being written and similarly not written, maybe should be 'continue' for now.
             return
 
         annotation_group_data = annotation_group_data.drop(columns=

+ 46 - 0
generate_messages/messages.py

@@ -36,6 +36,14 @@ def _read_yaml(yaml_path):
 
 
 def get_metrics(project_path, metrics_file):
+    """
+    given a dataset and an output metrics file
+    return a merge of the metrics and recording info, as well as a list of all the metrics labels
+    :param project_path: path to the dataset
+    :type project_path: str
+    :param metrics_file: path to the metrics csv file
+    :type metrics_file: str
+    """
     project = ChildProject(project_path)
     am = AnnotationManager(project)
     am.read()
@@ -58,6 +66,22 @@ def get_metrics(project_path, metrics_file):
 
 
 def fill_template(template_key, messages, metrics_evolution):
+    """
+    given the full list of templates, a template key and a measure of the evolution of the metric,
+    returns the wanted template filled with the correct evolution indication
+    :param template_key: template to consider contained into the full list in 'messages'
+    :type template_key: str
+    :param messages: dictionary of templates (taken from yaml file)
+    :type messages: dict
+    :param metrics_evolution:
+    :type metrics_evolution:
+    :return: filled template
+    :rtype: str
+    """
+    print(template_key)
+    print(messages)
+    print(metrics_evolution)
+    
     template = messages['_{}_{}'.format(*template_key)]
 
     for positivity_item_index, (positivity_item, _, positivity_direction) in enumerate(metrics_evolution, 1):
@@ -74,6 +98,18 @@ def fill_template(template_key, messages, metrics_evolution):
 
 
 def build_messages(metrics_recordings, metrics_columns, message_file_path, date):
+    """
+    :param metrics_recordings:
+    :type metrics_recordings:
+    :param metrics_columns:
+    :type metrics_columns:
+    :param message_file_path:
+    :type message_file_path:
+    :param date:
+    :type date: str
+    :return:
+    :rtype:
+    """
     try:
         date = datetime.strptime(date, "%Y%m%d").strftime("%Y-%m-%d")
     except:
@@ -129,6 +165,16 @@ def build_messages(metrics_recordings, metrics_columns, message_file_path, date)
 
 
 def generate_messages(project_path, metrics_file, message_definition, date):
+    """
+    :param project_path:
+    :type project_path:
+    :param metrics_file:
+    :type metrics_file:
+    :param message_definition:
+    :type message_definition:
+    :param date: date in format YYYYMMDD for which to generate messages
+    :type date: str
+    """
     message_out_path = os.path.join(project_path, 'extra', 'messages', 'generated', 'messages_{}.csv'.format(date))
 
     message_out_dir = os.path.dirname(message_out_path)

+ 2 - 0
tests/data/truth/get_metrics.csv

@@ -0,0 +1,2 @@
+recording_filename,child_id,duration_acoustic,mean_pitch_range_fem,mean_mean_pitch_chi,mean_mean_pitch_fem,mean_pitch_range_chi,duration_alice_vtc,mlup_fem,mlus_fem,mluw_fem,pc_fem_ph,sc_fem_ph,wc_fem_ph,avg_wr_pm_fem,avg_sr_pm_fem,avg_pr_pm_fem,duration_conversations,chi_adu_turn_transitions,duration_vcm,cp_dur,cp_n,lp_dur,avg_non_can_voc_dur_chi,non_can_voc_dur_chi_ph,non_can_voc_chi_ph,avg_can_voc_dur_chi,can_voc_dur_chi_ph,can_voc_chi_ph,avg_cry_voc_dur_chi,cry_voc_dur_chi_ph,cry_voc_chi_ph,lp_n,duration_vtc,avg_voc_dur_chi,avg_voc_dur_fem,voc_dur_chi_ph,voc_dur_fem_ph,voc_chi_ph,voc_fem_ph,experiment,experiment_stage,date_iso,start_time,recording_device_type,session_id,duration,imported_at
+14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,chi_14T,60000,24.04061291408368,27.81940327111273,26.803605201172505,20.849495032754373,60000,31.30692307692308,15.54,10.296153846153846,24419.4,12121.2,8031.0,415.2094566207961,660.5086208495603,1345.1232870330582,60000,10,60000,0.3742337772141196,0.25,1.0,657.8888888888889,355260.0,540.0,1180.3333333333333,212460.0,180.0,0,0.0,0.0,1.0,60000,788.5,3125.923076923077,567720.0,2438220.0,720.0,780.0,test,Audio-1-familia-14T-CP-2020-02,2022-01-03,00:00:00,unknown,chi_14T_20220103,60000,2022-09-08 18:10:10

+ 33 - 33
tests/existing_dataset/annotations/acoustic/raw/ACOUSTIC_VTC_20220103.csv

@@ -1,33 +1,33 @@
-"segment_onset","segment_offset","speaker_type","recording_filename","mean_pitch_semitone","median_pitch_semitone","p5_pitch_semitone","p95_pitch_semitone","pitch_range_semitone"
-716,1091,"OCH","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.492868065520707,30.138642702884987,20.926579385247212,38.043171039925696,17.116591654678484
-788,1335,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.214643421856756,33.11085884127924,8.643340281901134,37.13355658578659,28.49021630388546
-969,2473,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.298045163512683,32.88547138489501,16.117759395143267,36.702404352700434,20.584644957557167
-1939,2354,"OCH","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",30.428308088316903,34.61281831343323,15.920787817807724,37.36111941428427,21.440331596476543
-3086,4515,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.555790687873063,25.43977444107606,9.468522269335491,38.91392066067757,29.445398391342078
-4703,4887,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",33.291505212128094,33.965485866935126,29.417243345500275,36.839583257883376,7.4223399123831015
-5505,6600,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",25.238233859752953,25.98407828936643,8.463363614308186,36.31506800615161,27.851704391843427
-7341,13185,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.370678664104652,27.750854514520753,8.341604446398945,35.09095086550627,26.749346419107326
-12083,12444,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.956301863760103,28.88535975974367,28.714569368207133,34.608363509762654,5.893794141555521
-12590,13373,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",30.885068902158533,32.25262782345649,15.867153956619152,37.06109220660275,21.1939382499836
-14342,16227,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",27.099425252840522,30.1316705155409,4.900098987854256,36.364327384602554,31.464228396748297
-16544,19425,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.813336544105095,29.79400943252938,6.3135133339207385,35.052483607526085,28.738970273605347
-19011,20491,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.44086771482358,31.30927236046787,14.25502037838229,35.78714023878623,21.532119860403938
-19043,19185,"OCH","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",32.98295998189534,32.584870544859385,32.48382047902467,33.98404020556487,1.5002197265401946
-19565,19676,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",33.23750268763251,32.635081963927476,31.840505612731988,35.47788877572007,3.6373831629880797
-19992,20350,"MAL","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",23.984203441734014,29.978572686536808,10.764177536575712,30.938970827336167,20.174793290760455
-20819,21492,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",25.972110913025837,27.786858031123913,11.930542688627893,34.3079964640147,22.377453775386805
-21371,22532,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",28.63344638844196,28.605046698194336,18.85412445160436,38.6102602617787,19.75613581017434
-22341,22489,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.353878009435352,27.440413582763572,7.962495806926718,37.031037659997224,29.068541853070506
-23011,32954,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",25.77530730000921,26.83477640238156,8.893634420293438,37.358931057908656,28.465296637615218
-31303,31511,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",28.0041265711964,27.788575894455448,27.13021350366272,29.700172038814195,2.569958535151475
-33289,35014,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.25148665734396,28.7330051925927,9.870414118088517,31.4329577243406,21.562543606252085
-34493,34922,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",23.198844317955512,29.349953897341493,6.124320718231403,31.328715670516758,25.204394952285355
-35669,44992,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",24.460095969972958,27.402694042444974,6.9129786644071,35.560134764118054,28.647156099710955
-35675,36011,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",27.16001249135059,31.49551718224296,6.488738913265618,33.956207690162906,27.467468776897288
-45529,48388,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",22.117398461823168,23.810606492436005,3.250495971994742,38.30354993900844,35.053053967013696
-47920,48511,"MAL","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.125592094398705,28.040180683789984,20.538702480546274,32.325854178371884,11.78715169782561
-48406,48742,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",27.901054926890673,28.90405869303639,20.20725812088503,33.8977663740024,13.690508253117372
-49012,49992,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",25.59386252867044,28.882693615369817,7.193311347719939,32.0764799883201,24.883168640600164
-49969,53843,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",23.456682357930745,24.408714876975008,5.961750903820473,36.933893811282246,30.972142907461773
-49994,50402,"MAL","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",16.573881119136608,20.800023048270496,4.888311023727557,24.879314465853824,19.991003442126267
-50785,50991,"MAL","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",13.751972351457697,9.010613171850022,8.798760436188962,27.3952530848442,18.596492648655236
+segment_onset,segment_offset,speaker_type,recording_filename,mean_pitch_semitone,median_pitch_semitone,p5_pitch_semitone,p95_pitch_semitone,pitch_range_semitone
+716,1091,OCH,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.4928680655207,30.138642702885,20.9265793852472,38.0431710399257,17.1165916546785
+788,1335,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.2146434218568,33.1108588412792,8.64334028190113,37.1335565857866,28.4902163038855
+969,2473,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.2980451635127,32.885471384895,16.1177593951433,36.7024043527004,20.5846449575572
+1939,2354,OCH,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,30.4283080883169,34.6128183134332,15.9207878178077,37.3611194142843,21.4403315964765
+3086,4515,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.5557906878731,25.4397744410761,9.46852226933549,38.9139206606776,29.4453983913421
+4703,4887,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,33.2915052121281,33.9654858669351,29.4172433455003,36.8395832578834,7.4223399123831
+5505,6600,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,25.238233859753,25.9840782893664,8.46336361430819,36.3150680061516,27.8517043918434
+7341,13185,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.3706786641047,27.7508545145208,8.34160444639895,35.0909508655063,26.7493464191073
+12083,12444,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.9563018637601,28.8853597597437,28.7145693682071,34.6083635097627,5.89379414155552
+12590,13373,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,30.8850689021585,32.2526278234565,15.8671539566192,37.0610922066027,21.1939382499836
+14342,16227,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,27.0994252528405,30.1316705155409,4.90009898785426,36.3643273846026,31.4642283967483
+16544,19425,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.8133365441051,29.7940094325294,6.31351333392074,35.0524836075261,28.7389702736053
+19011,20491,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.4408677148236,31.3092723604679,14.2550203783823,35.7871402387862,21.5321198604039
+19043,19185,OCH,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,32.9829599818953,32.5848705448594,32.4838204790247,33.9840402055649,1.50021972654019
+19565,19676,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,33.2375026876325,32.6350819639275,31.840505612732,35.4778887757201,3.63738316298808
+19992,20350,MAL,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,23.984203441734,29.9785726865368,10.7641775365757,30.9389708273362,20.1747932907605
+20819,21492,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,25.9721109130258,27.7868580311239,11.9305426886279,34.3079964640147,22.3774537753868
+21371,22532,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,28.633446388442,28.6050466981943,18.8541244516044,38.6102602617787,19.7561358101743
+22341,22489,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.3538780094354,27.4404135827636,7.96249580692672,37.0310376599972,29.0685418530705
+23011,32954,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,25.7753073000092,26.8347764023816,8.89363442029344,37.3589310579087,28.4652966376152
+31303,31511,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,28.0041265711964,27.7885758944554,27.1302135036627,29.7001720388142,2.56995853515147
+33289,35014,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.251486657344,28.7330051925927,9.87041411808852,31.4329577243406,21.5625436062521
+34493,34922,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,23.1988443179555,29.3499538973415,6.1243207182314,31.3287156705168,25.2043949522854
+35669,44992,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,24.460095969973,27.402694042445,6.9129786644071,35.5601347641181,28.647156099711
+35675,36011,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,27.1600124913506,31.495517182243,6.48873891326562,33.9562076901629,27.4674687768973
+45529,48388,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,22.1173984618232,23.810606492436,3.25049597199474,38.3035499390084,35.0530539670137
+47920,48511,MAL,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.1255920943987,28.04018068379,20.5387024805463,32.3258541783719,11.7871516978256
+48406,48742,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,27.9010549268907,28.9040586930364,20.207258120885,33.8977663740024,13.6905082531174
+49012,49992,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,25.5938625286704,28.8826936153698,7.19331134771994,32.0764799883201,24.8831686406002
+49969,53843,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,23.4566823579307,24.408714876975,5.96175090382047,36.9338938112822,30.9721429074618
+49994,50402,MAL,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,16.5738811191366,20.8000230482705,4.88831102372756,24.8793144658538,19.9910034421263
+50785,50991,MAL,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,13.7519723514577,9.01061317185002,8.79876043618896,27.3952530848442,18.5964926486552

+ 108 - 2
tests/test_computeAnnotations.py

@@ -8,6 +8,9 @@ import pytest
 import shutil
 import pandas as pd
 
+from ChildProject.projects import ChildProject
+from ChildProject.annotations import AnnotationManager
+
 fpath = os.path.join(os.path.dirname(__file__),'..', 'compute_annotations')
 sys.path.append(fpath)
 
@@ -25,6 +28,14 @@ import compute_annotations.utils_audio as uau
 
 sys.path.remove(fpath)
 
+DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
+TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it
+
+def set_up_dataset():
+    if os.path.exists(TEST_DATASET_PATH):
+        shutil.rmtree(TEST_DATASET_PATH)
+    shutil.copytree(DATASET_PATH, TEST_DATASET_PATH)  
+
 ################ utils ###################
 
 #no test, decorators
@@ -33,8 +44,9 @@ sys.path.remove(fpath)
 
 ######## annotations_functions ###########
 
-def test_conversations_annotations():
-    pass
+#wrapper around conversations.get_interactional_sequences
+#def test_conversations_annotations():
+#    pass
 
 # no test for now, see utils_annotations section
 #def test_acoustic_annotations():
@@ -43,6 +55,95 @@ def test_conversations_annotations():
 ##########################################
 
 ##### compute_derived_annotations ########
+    
+@pytest.mark.parametrize('rec_av,remove,rec_rm,empty',[
+    (False, False, False, True), #rec not needed, annot already there, so should be empty
+    (False, True, False, False), #rec not needed, annot removed, so should return segments
+    (True, True, False, False), #rec needed and present, annot removed, so should return segments
+    (True, True, True, True), #rec needed but absent, should be empty
+     ])
+def test_get_available_segments(rec_av,remove,rec_rm, empty):
+    set_up_dataset()
+    if rec_rm : os.remove(os.path.join(TEST_DATASET_PATH,'recordings','raw','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav'))
+    if remove :
+        AnnotationManager(ChildProject(TEST_DATASET_PATH)).remove_set('acoustic')
+        
+    t_segments = pd.read_csv(os.path.join(TEST_DATASET_PATH,'annotations','vtc','converted','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'))
+    
+    df = cda.get_available_segments(TEST_DATASET_PATH, 'acoustic', ['vtc'],rec_av)
+    print(df.columns)
+    print(df.dtypes)
+    if not empty:
+        pd.testing.assert_frame_equal(t_segments,df[['segment_onset','segment_offset','raw_filename','speaker_type']],check_like=True)
+    else:
+        assert df.shape[0] == 0
+        
+#dependent on _get_available_segments
+#avoid testing acoustic as we don't have a piece of audio to analyze
+@pytest.mark.parametrize('set_name,function',[
+    ('conversations', af.conversations_annotations), 
+     ])     
+def test_compute_annotations(set_name, function):
+    set_up_dataset()
+    to_drop = ['raw_filename',
+                'set',
+                'time_seek',
+                'range_onset',
+                'range_offset',
+                'format',
+                'filter',
+                'annotation_filename',
+                'imported_at',
+                'package_version',
+                'error',
+                'merged_from',]
+    
+    am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
+    
+    truth = am.get_segments(am.annotations[am.annotations['set'] == set_name]).drop(columns=to_drop)
+    
+    truth = truth.astype(dtype={
+            "segment_onset"      :   'Int64',
+            "segment_offset"     :   'Int64',
+            "speaker_type"       :   'string',
+            "recording_filename" :   'string',
+            "segment_duration"   :   'Int64',
+            "unit_index"         :   'Int64',
+            "inter_seq_index"    :   'Int64',
+            "conv_turn_index"    :   'Int64',
+            "fmt_inter_seq"      :   'string',
+            "is_start_unit"      :   'boolean',
+            "is_end_unit"        :   'boolean',
+            "is_prompt_to"       :   'Int64',
+            "is_response_to"     :   'Int64',
+            "is_self_prompt_to"  :   'Int64',
+            "is_self_response_to":   'Int64',
+            })
+
+    am.remove_set(set_name)
+    
+    df = cda._compute_annotations(TEST_DATASET_PATH,set_name,function,['vtc'],False).drop(columns=to_drop)
+    df = df.astype(dtype=truth.dtypes)
+    
+    pd.testing.assert_frame_equal(truth.reset_index(drop=True),df.reset_index(drop=True), check_like=True)
+    
+
+def test_save_annotations():
+    set_up_dataset()
+    am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
+    annots = am.annotations
+    annots= annots[annots['set'] == 'acoustic']
+    segs = am.get_segments(annots)
+    
+    path = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw')
+    
+    cda.save_annotations(path, segs, 'TEST')
+    
+    existing = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'ACOUSTIC_VTC_20220103.csv')
+    new = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'TEST_ACOUSTIC_VTC_20220103.csv')
+    
+    pd.testing.assert_frame_equal(pd.read_csv(existing),pd.read_csv(new), check_like=True)
+    
 
 ##########################################
 
@@ -66,6 +167,11 @@ def test_conversations_annotations():
 ##########################################
 
 ############# utils_audio ################
+    
+#audio analysis tool
+    
+#TODO find a suitable, short audio to put into
+#the test folder, to run the analysis on
 
 ##########################################
     

+ 10 - 0
tests/test_computeMetrics.py

@@ -15,11 +15,21 @@ sys.path.append(fpath)
 import compute_metrics.metrics as met
 import compute_metrics.metrics_functions as mf
 
+DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
+
 ############### metrics ##################
 
+def test_get_metrics():
+    met_file = os.path.join('tests','metrics.csv')
+    met.get_metrics(DATASET_PATH, met_file)
+    
+    pd.testing.assert_frame_equal(pd.read_csv(met_file),pd.read_csv(os.path.join(DATASET_PATH,'extra','metrics','metrics.csv')))
+
 ##########################################
 
 ########## metrics_functions #############
+    
+#maybe add a part checking calculation of each metric?
 
 ##########################################
 

+ 27 - 0
tests/test_generateMessages.py

@@ -14,7 +14,34 @@ sys.path.append(fpath)
 
 import generate_messages.messages as msg
 
+TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it
+DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
+
+def set_up_dataset():
+    if os.path.exists(TEST_DATASET_PATH):
+        shutil.rmtree(TEST_DATASET_PATH)
+    shutil.copytree(DATASET_PATH, TEST_DATASET_PATH) 
+
 ########## generate_messages #############
+    
+list_of_metrics_label = ['avg_cry_voc_dur_chi', 'avg_non_can_voc_dur_chi', 'non_can_voc_chi_ph', 'voc_chi_ph', 'mean_pitch_range_chi', 'avg_pr_pm_fem', 'mlus_fem', 'avg_can_voc_dur_chi', 'sc_fem_ph', 'chi_adu_turn_transitions', 'mean_mean_pitch_fem', 'avg_voc_dur_fem', 'lp_n', 'voc_dur_fem_ph', 'cp_n', 'cry_voc_dur_chi_ph', 'duration_acoustic', 'cp_dur', 'mean_pitch_range_fem', 'wc_fem_ph', 'duration_vcm', 'avg_voc_dur_chi', 'mean_mean_pitch_chi', 'voc_dur_chi_ph', 'voc_fem_ph', 'mluw_fem', 'can_voc_chi_ph', 'mlup_fem', 'duration_alice_vtc', 'avg_sr_pm_fem', 'lp_dur', 'duration_conversations', 'duration_vtc', 'avg_wr_pm_fem', 'can_voc_dur_chi_ph', 'pc_fem_ph', 'cry_voc_chi_ph', 'non_can_voc_dur_chi_ph']
+
+def test_get_metrics():
+    met_rec, cols = msg.get_metrics(DATASET_PATH, os.path.join(DATASET_PATH,'extra','metrics','metrics.csv'))
+    
+    assert sorted(cols) == sorted(list_of_metrics_label)
+    pd.testing.assert_frame_equal(met_rec, pd.read_csv(os.path.join('tests','data','truth','get_metrics.csv')))
+    
+
+def test_fill_template():
+    pass
+    
+    
+def test_build_messages():
+    pass
 
+def test_generate_messages():
+    pass
+    
 ##########################################