1 year ago · b22e84b20e
--- a/.gitignore
+++ b/.gitignore
@@ -169,3 +169,4 @@ Thumbs.db
 
				 
			
 
				 #test_dataset
			
 
				 tests/test_dataset/
			
 
				+tests/metrics.csv
			
--- a/compute_annotations/compute_derived_annotations.py
+++ b/compute_annotations/compute_derived_annotations.py
@@ -37,6 +37,19 @@ def _annotation_function_wrapper(func, parser_args, **kwargs):
 
				 
			
 
				 
			
 
				 def get_available_segments(project_path, set_name, base_sets, raw_recording_available=False):
			
 
				+    """
			
 
				+    Get the annotation segments that will be used to construct new annotations for a set.
			
 
				+    This returns the segments in the sets used (base_sets) to compute the new annotations (for set_name).
			
 
				+    This will exclude segments for which the annotations already exist in the target set.
			
 
				+    :param project_path: path to the dataset
			
 
				+    :type project_path: str
			
 
				+    :param set_name: set to which we will add new annotation files. segments of base sets are excluded if they already have annotation files in the target set
			
 
				+    :type set_name: str
			
 
				+    :param base_sets: sets from which to get the segments
			
 
				+    :type base_sets: list[str]
			
 
				+    :param raw_recording_available: if True, exclude annotations for which the actual recording is not present (for when the process requires the audio)
			
 
				+    :type raw_recording_available: bool
			
 
				+    """
			
 
				     project = ChildProject(project_path)
			
 
				     am = AnnotationManager(project)
			
 
				     am.read()
			
@@ -70,9 +83,17 @@ def get_available_segments(project_path, set_name, base_sets, raw_recording_avai
 
				 
			
 
				 def _compute_annotations(project_path, annotation_type, annotation_function, base_sets, raw_recording_available):
			
 
				     """
			
 
				-    Computes conversational annotations for the ChildProject in directory project_path
			
 
				+    Computes annotations for the ChildProject in directory project_path, of a specific set, from a list of sets
			
 
				     :param project_path: path to ChildProject dataset
			
 
				     :type project_path: str
			
 
				+    :param annotation_type: name of the set to compute for
			
 
				+    :type annotation_type: str
			
 
				+    :param annotation_function: callable that creates the annotations (stored in annotations_functions)
			
 
				+    :type annotation_function: callable
			
 
				+    :base_sets: sets that are required to compute the new annotations
			
 
				+    :type base_sets: list[str]
			
 
				+    :param raw_recording_available: is the actual recording file needed
			
 
				+    :type raw_recording_available: bool
			
 
				     :return: annotations
			
 
				     :rtype: pd.DataFrame
			
 
				     """
			
@@ -101,12 +122,12 @@ def _compute_annotations(project_path, annotation_type, annotation_function, bas
 
				 def save_annotations(save_path, annotations, annotation_type):
			
 
				     """
			
 
				     Save the computed annotations
			
 
				-    :param save_path: path where to save the annotations
			
 
				+    :param save_path: path where to save the annotations (use annotation raw folder)
			
 
				     :type save_path: str
			
 
				-    :param save_name: name of the file
			
 
				-    :type save_name: str
			
 
				     :param annotations: annotations to be saved
			
 
				     :type annotations: pd.DataFrame
			
 
				+    :param annotation_type: annotation type, only used to name the raw file
			
 
				+    :type annotation_type: str
			
 
				     :return: None
			
 
				     :rtype: None
			
 
				     """
			
@@ -119,6 +140,7 @@ def save_annotations(save_path, annotations, annotation_type):
 
				         if os.path.exists(full_save_path):
			
 
				             logger.warning('File {} already exists! If you want to recompute annotations for this file, '
			
 
				                            'please delete it first!'.format(full_save_path))
			
 
				+            #TODO, currently outputs file until one exists and then fail, resulting in an unknown number being written and similarly not written, maybe should be 'continue' for now.
			
 
				             return
			
 
				 
			
 
				         annotation_group_data = annotation_group_data.drop(columns=
			
--- a/generate_messages/messages.py
+++ b/generate_messages/messages.py
@@ -36,6 +36,14 @@ def _read_yaml(yaml_path):
 
				 
			
 
				 
			
 
				 def get_metrics(project_path, metrics_file):
			
 
				+    """
			
 
				+    given a dataset and an output metrics file
			
 
				+    return a merge of the metrics and recording info, as well as a list of all the metrics labels
			
 
				+    :param project_path: path to the dataset
			
 
				+    :type project_path: str
			
 
				+    :param metrics_file: path to the metrics csv file
			
 
				+    :type metrics_file: str
			
 
				+    """
			
 
				     project = ChildProject(project_path)
			
 
				     am = AnnotationManager(project)
			
 
				     am.read()
			
@@ -58,6 +66,22 @@ def get_metrics(project_path, metrics_file):
 
				 
			
 
				 
			
 
				 def fill_template(template_key, messages, metrics_evolution):
			
 
				+    """
			
 
				+    given the full list of templates, a template key and a measure of the evolution of the metric,
			
 
				+    returns the wanted template filled with the correct evolution indication
			
 
				+    :param template_key: template to consider contained into the full list in 'messages'
			
 
				+    :type template_key: str
			
 
				+    :param messages: dictionary of templates (taken from yaml file)
			
 
				+    :type messages: dict
			
 
				+    :param metrics_evolution:
			
 
				+    :type metrics_evolution:
			
 
				+    :return: filled template
			
 
				+    :rtype: str
			
 
				+    """
			
 
				+    print(template_key)
			
 
				+    print(messages)
			
 
				+    print(metrics_evolution)
			
 
				+    
			
 
				     template = messages['_{}_{}'.format(*template_key)]
			
 
				 
			
 
				     for positivity_item_index, (positivity_item, _, positivity_direction) in enumerate(metrics_evolution, 1):
			
@@ -74,6 +98,18 @@ def fill_template(template_key, messages, metrics_evolution):
 
				 
			
 
				 
			
 
				 def build_messages(metrics_recordings, metrics_columns, message_file_path, date):
			
 
				+    """
			
 
				+    :param metrics_recordings:
			
 
				+    :type metrics_recordings:
			
 
				+    :param metrics_columns:
			
 
				+    :type metrics_columns:
			
 
				+    :param message_file_path:
			
 
				+    :type message_file_path:
			
 
				+    :param date:
			
 
				+    :type date: str
			
 
				+    :return:
			
 
				+    :rtype:
			
 
				+    """
			
 
				     try:
			
 
				         date = datetime.strptime(date, "%Y%m%d").strftime("%Y-%m-%d")
			
 
				     except:
			
@@ -129,6 +165,16 @@ def build_messages(metrics_recordings, metrics_columns, message_file_path, date)
 
				 
			
 
				 
			
 
				 def generate_messages(project_path, metrics_file, message_definition, date):
			
 
				+    """
			
 
				+    :param project_path:
			
 
				+    :type project_path:
			
 
				+    :param metrics_file:
			
 
				+    :type metrics_file:
			
 
				+    :param message_definition:
			
 
				+    :type message_definition:
			
 
				+    :param date: date in format YYYYMMDD for which to generate messages
			
 
				+    :type date: str
			
 
				+    """
			
 
				     message_out_path = os.path.join(project_path, 'extra', 'messages', 'generated', 'messages_{}.csv'.format(date))
			
 
				 
			
 
				     message_out_dir = os.path.dirname(message_out_path)
			
--- a/tests/data/truth/get_metrics.csv
+++ b/tests/data/truth/get_metrics.csv
@@ -0,0 +1,2 @@
 
				+recording_filename,child_id,duration_acoustic,mean_pitch_range_fem,mean_mean_pitch_chi,mean_mean_pitch_fem,mean_pitch_range_chi,duration_alice_vtc,mlup_fem,mlus_fem,mluw_fem,pc_fem_ph,sc_fem_ph,wc_fem_ph,avg_wr_pm_fem,avg_sr_pm_fem,avg_pr_pm_fem,duration_conversations,chi_adu_turn_transitions,duration_vcm,cp_dur,cp_n,lp_dur,avg_non_can_voc_dur_chi,non_can_voc_dur_chi_ph,non_can_voc_chi_ph,avg_can_voc_dur_chi,can_voc_dur_chi_ph,can_voc_chi_ph,avg_cry_voc_dur_chi,cry_voc_dur_chi_ph,cry_voc_chi_ph,lp_n,duration_vtc,avg_voc_dur_chi,avg_voc_dur_fem,voc_dur_chi_ph,voc_dur_fem_ph,voc_chi_ph,voc_fem_ph,experiment,experiment_stage,date_iso,start_time,recording_device_type,session_id,duration,imported_at
			
 
				+14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,chi_14T,60000,24.04061291408368,27.81940327111273,26.803605201172505,20.849495032754373,60000,31.30692307692308,15.54,10.296153846153846,24419.4,12121.2,8031.0,415.2094566207961,660.5086208495603,1345.1232870330582,60000,10,60000,0.3742337772141196,0.25,1.0,657.8888888888889,355260.0,540.0,1180.3333333333333,212460.0,180.0,0,0.0,0.0,1.0,60000,788.5,3125.923076923077,567720.0,2438220.0,720.0,780.0,test,Audio-1-familia-14T-CP-2020-02,2022-01-03,00:00:00,unknown,chi_14T_20220103,60000,2022-09-08 18:10:10
			
--- a/tests/existing_dataset/annotations/acoustic/raw/ACOUSTIC_VTC_20220103.csv
+++ b/tests/existing_dataset/annotations/acoustic/raw/ACOUSTIC_VTC_20220103.csv
@@ -1,33 +1,33 @@
 
				-"segment_onset","segment_offset","speaker_type","recording_filename","mean_pitch_semitone","median_pitch_semitone","p5_pitch_semitone","p95_pitch_semitone","pitch_range_semitone"
			
 
				-716,1091,"OCH","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.492868065520707,30.138642702884987,20.926579385247212,38.043171039925696,17.116591654678484
			
 
				-788,1335,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.214643421856756,33.11085884127924,8.643340281901134,37.13355658578659,28.49021630388546
			
 
				-969,2473,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.298045163512683,32.88547138489501,16.117759395143267,36.702404352700434,20.584644957557167
			
 
				-1939,2354,"OCH","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",30.428308088316903,34.61281831343323,15.920787817807724,37.36111941428427,21.440331596476543
			
 
				-3086,4515,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.555790687873063,25.43977444107606,9.468522269335491,38.91392066067757,29.445398391342078
			
 
				-4703,4887,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",33.291505212128094,33.965485866935126,29.417243345500275,36.839583257883376,7.4223399123831015
			
 
				-5505,6600,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",25.238233859752953,25.98407828936643,8.463363614308186,36.31506800615161,27.851704391843427
			
 
				-7341,13185,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.370678664104652,27.750854514520753,8.341604446398945,35.09095086550627,26.749346419107326
			
 
				-12083,12444,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.956301863760103,28.88535975974367,28.714569368207133,34.608363509762654,5.893794141555521
			
 
				-12590,13373,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",30.885068902158533,32.25262782345649,15.867153956619152,37.06109220660275,21.1939382499836
			
 
				-14342,16227,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",27.099425252840522,30.1316705155409,4.900098987854256,36.364327384602554,31.464228396748297
			
 
				-16544,19425,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.813336544105095,29.79400943252938,6.3135133339207385,35.052483607526085,28.738970273605347
			
 
				-19011,20491,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",29.44086771482358,31.30927236046787,14.25502037838229,35.78714023878623,21.532119860403938
			
 
				-19043,19185,"OCH","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",32.98295998189534,32.584870544859385,32.48382047902467,33.98404020556487,1.5002197265401946
			
 
				-19565,19676,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",33.23750268763251,32.635081963927476,31.840505612731988,35.47788877572007,3.6373831629880797
			
 
				-19992,20350,"MAL","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",23.984203441734014,29.978572686536808,10.764177536575712,30.938970827336167,20.174793290760455
			
 
				-20819,21492,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",25.972110913025837,27.786858031123913,11.930542688627893,34.3079964640147,22.377453775386805
			
 
				-21371,22532,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",28.63344638844196,28.605046698194336,18.85412445160436,38.6102602617787,19.75613581017434
			
 
				-22341,22489,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.353878009435352,27.440413582763572,7.962495806926718,37.031037659997224,29.068541853070506
			
 
				-23011,32954,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",25.77530730000921,26.83477640238156,8.893634420293438,37.358931057908656,28.465296637615218
			
 
				-31303,31511,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",28.0041265711964,27.788575894455448,27.13021350366272,29.700172038814195,2.569958535151475
			
 
				-33289,35014,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.25148665734396,28.7330051925927,9.870414118088517,31.4329577243406,21.562543606252085
			
 
				-34493,34922,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",23.198844317955512,29.349953897341493,6.124320718231403,31.328715670516758,25.204394952285355
			
 
				-35669,44992,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",24.460095969972958,27.402694042444974,6.9129786644071,35.560134764118054,28.647156099710955
			
 
				-35675,36011,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",27.16001249135059,31.49551718224296,6.488738913265618,33.956207690162906,27.467468776897288
			
 
				-45529,48388,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",22.117398461823168,23.810606492436005,3.250495971994742,38.30354993900844,35.053053967013696
			
 
				-47920,48511,"MAL","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",26.125592094398705,28.040180683789984,20.538702480546274,32.325854178371884,11.78715169782561
			
 
				-48406,48742,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",27.901054926890673,28.90405869303639,20.20725812088503,33.8977663740024,13.690508253117372
			
 
				-49012,49992,"CHI","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",25.59386252867044,28.882693615369817,7.193311347719939,32.0764799883201,24.883168640600164
			
 
				-49969,53843,"FEM","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",23.456682357930745,24.408714876975008,5.961750903820473,36.933893811282246,30.972142907461773
			
 
				-49994,50402,"MAL","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",16.573881119136608,20.800023048270496,4.888311023727557,24.879314465853824,19.991003442126267
			
 
				-50785,50991,"MAL","13980101/14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav",13.751972351457697,9.010613171850022,8.798760436188962,27.3952530848442,18.596492648655236
			
 
				+segment_onset,segment_offset,speaker_type,recording_filename,mean_pitch_semitone,median_pitch_semitone,p5_pitch_semitone,p95_pitch_semitone,pitch_range_semitone
			
 
				+716,1091,OCH,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.4928680655207,30.138642702885,20.9265793852472,38.0431710399257,17.1165916546785
			
 
				+788,1335,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.2146434218568,33.1108588412792,8.64334028190113,37.1335565857866,28.4902163038855
			
 
				+969,2473,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.2980451635127,32.885471384895,16.1177593951433,36.7024043527004,20.5846449575572
			
 
				+1939,2354,OCH,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,30.4283080883169,34.6128183134332,15.9207878178077,37.3611194142843,21.4403315964765
			
 
				+3086,4515,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.5557906878731,25.4397744410761,9.46852226933549,38.9139206606776,29.4453983913421
			
 
				+4703,4887,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,33.2915052121281,33.9654858669351,29.4172433455003,36.8395832578834,7.4223399123831
			
 
				+5505,6600,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,25.238233859753,25.9840782893664,8.46336361430819,36.3150680061516,27.8517043918434
			
 
				+7341,13185,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.3706786641047,27.7508545145208,8.34160444639895,35.0909508655063,26.7493464191073
			
 
				+12083,12444,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.9563018637601,28.8853597597437,28.7145693682071,34.6083635097627,5.89379414155552
			
 
				+12590,13373,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,30.8850689021585,32.2526278234565,15.8671539566192,37.0610922066027,21.1939382499836
			
 
				+14342,16227,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,27.0994252528405,30.1316705155409,4.90009898785426,36.3643273846026,31.4642283967483
			
 
				+16544,19425,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.8133365441051,29.7940094325294,6.31351333392074,35.0524836075261,28.7389702736053
			
 
				+19011,20491,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,29.4408677148236,31.3092723604679,14.2550203783823,35.7871402387862,21.5321198604039
			
 
				+19043,19185,OCH,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,32.9829599818953,32.5848705448594,32.4838204790247,33.9840402055649,1.50021972654019
			
 
				+19565,19676,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,33.2375026876325,32.6350819639275,31.840505612732,35.4778887757201,3.63738316298808
			
 
				+19992,20350,MAL,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,23.984203441734,29.9785726865368,10.7641775365757,30.9389708273362,20.1747932907605
			
 
				+20819,21492,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,25.9721109130258,27.7868580311239,11.9305426886279,34.3079964640147,22.3774537753868
			
 
				+21371,22532,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,28.633446388442,28.6050466981943,18.8541244516044,38.6102602617787,19.7561358101743
			
 
				+22341,22489,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.3538780094354,27.4404135827636,7.96249580692672,37.0310376599972,29.0685418530705
			
 
				+23011,32954,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,25.7753073000092,26.8347764023816,8.89363442029344,37.3589310579087,28.4652966376152
			
 
				+31303,31511,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,28.0041265711964,27.7885758944554,27.1302135036627,29.7001720388142,2.56995853515147
			
 
				+33289,35014,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.251486657344,28.7330051925927,9.87041411808852,31.4329577243406,21.5625436062521
			
 
				+34493,34922,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,23.1988443179555,29.3499538973415,6.1243207182314,31.3287156705168,25.2043949522854
			
 
				+35669,44992,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,24.460095969973,27.402694042445,6.9129786644071,35.5601347641181,28.647156099711
			
 
				+35675,36011,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,27.1600124913506,31.495517182243,6.48873891326562,33.9562076901629,27.4674687768973
			
 
				+45529,48388,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,22.1173984618232,23.810606492436,3.25049597199474,38.3035499390084,35.0530539670137
			
 
				+47920,48511,MAL,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,26.1255920943987,28.04018068379,20.5387024805463,32.3258541783719,11.7871516978256
			
 
				+48406,48742,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,27.9010549268907,28.9040586930364,20.207258120885,33.8977663740024,13.6905082531174
			
 
				+49012,49992,CHI,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,25.5938625286704,28.8826936153698,7.19331134771994,32.0764799883201,24.8831686406002
			
 
				+49969,53843,FEM,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,23.4566823579307,24.408714876975,5.96175090382047,36.9338938112822,30.9721429074618
			
 
				+49994,50402,MAL,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,16.5738811191366,20.8000230482705,4.88831102372756,24.8793144658538,19.9910034421263
			
 
				+50785,50991,MAL,14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav,13.7519723514577,9.01061317185002,8.79876043618896,27.3952530848442,18.5964926486552
			
--- a/tests/test_computeAnnotations.py
+++ b/tests/test_computeAnnotations.py
@@ -8,6 +8,9 @@ import pytest
 
				 import shutil
			
 
				 import pandas as pd
			
 
				 
			
 
				+from ChildProject.projects import ChildProject
			
 
				+from ChildProject.annotations import AnnotationManager
			
 
				+
			
 
				 fpath = os.path.join(os.path.dirname(__file__),'..', 'compute_annotations')
			
 
				 sys.path.append(fpath)
			
 
				 
			
@@ -25,6 +28,14 @@ import compute_annotations.utils_audio as uau
 
				 
			
 
				 sys.path.remove(fpath)
			
 
				 
			
 
				+DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
			
 
				+TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it
			
 
				+
			
 
				+def set_up_dataset():
			
 
				+    if os.path.exists(TEST_DATASET_PATH):
			
 
				+        shutil.rmtree(TEST_DATASET_PATH)
			
 
				+    shutil.copytree(DATASET_PATH, TEST_DATASET_PATH)  
			
 
				+
			
 
				 ################ utils ###################
			
 
				 
			
 
				 #no test, decorators
			
@@ -33,8 +44,9 @@ sys.path.remove(fpath)
 
				 
			
 
				 ######## annotations_functions ###########
			
 
				 
			
 
				-def test_conversations_annotations():
			
 
				-    pass
			
 
				+#wrapper around conversations.get_interactional_sequences
			
 
				+#def test_conversations_annotations():
			
 
				+#    pass
			
 
				 
			
 
				 # no test for now, see utils_annotations section
			
 
				 #def test_acoustic_annotations():
			
@@ -43,6 +55,95 @@ def test_conversations_annotations():
 
				 ##########################################
			
 
				 
			
 
				 ##### compute_derived_annotations ########
			
 
				+    
			
 
				+@pytest.mark.parametrize('rec_av,remove,rec_rm,empty',[
			
 
				+    (False, False, False, True), #rec not needed, annot already there, so should be empty
			
 
				+    (False, True, False, False), #rec not needed, annot removed, so should return segments
			
 
				+    (True, True, False, False), #rec needed and present, annot removed, so should return segments
			
 
				+    (True, True, True, True), #rec needed but absent, should be empty
			
 
				+     ])
			
 
				+def test_get_available_segments(rec_av,remove,rec_rm, empty):
			
 
				+    set_up_dataset()
			
 
				+    if rec_rm : os.remove(os.path.join(TEST_DATASET_PATH,'recordings','raw','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000.wav'))
			
 
				+    if remove :
			
 
				+        AnnotationManager(ChildProject(TEST_DATASET_PATH)).remove_set('acoustic')
			
 
				+        
			
 
				+    t_segments = pd.read_csv(os.path.join(TEST_DATASET_PATH,'annotations','vtc','converted','14T_Audio-1-familia-14T-CP-2020-02_20220103_000000_0_60000.csv'))
			
 
				+    
			
 
				+    df = cda.get_available_segments(TEST_DATASET_PATH, 'acoustic', ['vtc'],rec_av)
			
 
				+    print(df.columns)
			
 
				+    print(df.dtypes)
			
 
				+    if not empty:
			
 
				+        pd.testing.assert_frame_equal(t_segments,df[['segment_onset','segment_offset','raw_filename','speaker_type']],check_like=True)
			
 
				+    else:
			
 
				+        assert df.shape[0] == 0
			
 
				+        
			
 
				+#dependent on _get_available_segments
			
 
				+#avoid testing acoustic as we don't have a piece of audio to analyze
			
 
				+@pytest.mark.parametrize('set_name,function',[
			
 
				+    ('conversations', af.conversations_annotations), 
			
 
				+     ])     
			
 
				+def test_compute_annotations(set_name, function):
			
 
				+    set_up_dataset()
			
 
				+    to_drop = ['raw_filename',
			
 
				+                'set',
			
 
				+                'time_seek',
			
 
				+                'range_onset',
			
 
				+                'range_offset',
			
 
				+                'format',
			
 
				+                'filter',
			
 
				+                'annotation_filename',
			
 
				+                'imported_at',
			
 
				+                'package_version',
			
 
				+                'error',
			
 
				+                'merged_from',]
			
 
				+    
			
 
				+    am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
			
 
				+    
			
 
				+    truth = am.get_segments(am.annotations[am.annotations['set'] == set_name]).drop(columns=to_drop)
			
 
				+    
			
 
				+    truth = truth.astype(dtype={
			
 
				+            "segment_onset"      :   'Int64',
			
 
				+            "segment_offset"     :   'Int64',
			
 
				+            "speaker_type"       :   'string',
			
 
				+            "recording_filename" :   'string',
			
 
				+            "segment_duration"   :   'Int64',
			
 
				+            "unit_index"         :   'Int64',
			
 
				+            "inter_seq_index"    :   'Int64',
			
 
				+            "conv_turn_index"    :   'Int64',
			
 
				+            "fmt_inter_seq"      :   'string',
			
 
				+            "is_start_unit"      :   'boolean',
			
 
				+            "is_end_unit"        :   'boolean',
			
 
				+            "is_prompt_to"       :   'Int64',
			
 
				+            "is_response_to"     :   'Int64',
			
 
				+            "is_self_prompt_to"  :   'Int64',
			
 
				+            "is_self_response_to":   'Int64',
			
 
				+            })
			
 
				+
			
 
				+    am.remove_set(set_name)
			
 
				+    
			
 
				+    df = cda._compute_annotations(TEST_DATASET_PATH,set_name,function,['vtc'],False).drop(columns=to_drop)
			
 
				+    df = df.astype(dtype=truth.dtypes)
			
 
				+    
			
 
				+    pd.testing.assert_frame_equal(truth.reset_index(drop=True),df.reset_index(drop=True), check_like=True)
			
 
				+    
			
 
				+
			
 
				+def test_save_annotations():
			
 
				+    set_up_dataset()
			
 
				+    am = AnnotationManager(ChildProject(TEST_DATASET_PATH))
			
 
				+    annots = am.annotations
			
 
				+    annots= annots[annots['set'] == 'acoustic']
			
 
				+    segs = am.get_segments(annots)
			
 
				+    
			
 
				+    path = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw')
			
 
				+    
			
 
				+    cda.save_annotations(path, segs, 'TEST')
			
 
				+    
			
 
				+    existing = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'ACOUSTIC_VTC_20220103.csv')
			
 
				+    new = os.path.join(TEST_DATASET_PATH,'annotations', 'acoustic','raw', 'TEST_ACOUSTIC_VTC_20220103.csv')
			
 
				+    
			
 
				+    pd.testing.assert_frame_equal(pd.read_csv(existing),pd.read_csv(new), check_like=True)
			
 
				+    
			
 
				 
			
 
				 ##########################################
			
 
				 
			
@@ -66,6 +167,11 @@ def test_conversations_annotations():
 
				 ##########################################
			
 
				 
			
 
				 ############# utils_audio ################
			
 
				+    
			
 
				+#audio analysis tool
			
 
				+    
			
 
				+#TODO find a suitable, short audio to put into
			
 
				+#the test folder, to run the analysis on
			
 
				 
			
 
				 ##########################################
			
 
				     
			
--- a/tests/test_computeMetrics.py
+++ b/tests/test_computeMetrics.py
@@ -15,11 +15,21 @@ sys.path.append(fpath)
 
				 import compute_metrics.metrics as met
			
 
				 import compute_metrics.metrics_functions as mf
			
 
				 
			
 
				+DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
			
 
				+
			
 
				 ############### metrics ##################
			
 
				 
			
 
				+def test_get_metrics():
			
 
				+    met_file = os.path.join('tests','metrics.csv')
			
 
				+    met.get_metrics(DATASET_PATH, met_file)
			
 
				+    
			
 
				+    pd.testing.assert_frame_equal(pd.read_csv(met_file),pd.read_csv(os.path.join(DATASET_PATH,'extra','metrics','metrics.csv')))
			
 
				+
			
 
				 ##########################################
			
 
				 
			
 
				 ########## metrics_functions #############
			
 
				+    
			
 
				+#maybe add a part checking calculation of each metric?
			
 
				 
			
 
				 ##########################################
			
 
				 
			
--- a/tests/test_generateMessages.py
+++ b/tests/test_generateMessages.py
@@ -14,7 +14,34 @@ sys.path.append(fpath)
 
				 
			
 
				 import generate_messages.messages as msg
			
 
				 
			
 
				+TEST_DATASET_PATH = os.path.join('tests','test_dataset') #dataset to use for tests changing it
			
 
				+DATASET_PATH = os.path.join('tests','existing_dataset') #dataset already existing, used for tests that don't change it
			
 
				+
			
 
				+def set_up_dataset():
			
 
				+    if os.path.exists(TEST_DATASET_PATH):
			
 
				+        shutil.rmtree(TEST_DATASET_PATH)
			
 
				+    shutil.copytree(DATASET_PATH, TEST_DATASET_PATH) 
			
 
				+
			
 
				 ########## generate_messages #############
			
 
				+    
			
 
				+list_of_metrics_label = ['avg_cry_voc_dur_chi', 'avg_non_can_voc_dur_chi', 'non_can_voc_chi_ph', 'voc_chi_ph', 'mean_pitch_range_chi', 'avg_pr_pm_fem', 'mlus_fem', 'avg_can_voc_dur_chi', 'sc_fem_ph', 'chi_adu_turn_transitions', 'mean_mean_pitch_fem', 'avg_voc_dur_fem', 'lp_n', 'voc_dur_fem_ph', 'cp_n', 'cry_voc_dur_chi_ph', 'duration_acoustic', 'cp_dur', 'mean_pitch_range_fem', 'wc_fem_ph', 'duration_vcm', 'avg_voc_dur_chi', 'mean_mean_pitch_chi', 'voc_dur_chi_ph', 'voc_fem_ph', 'mluw_fem', 'can_voc_chi_ph', 'mlup_fem', 'duration_alice_vtc', 'avg_sr_pm_fem', 'lp_dur', 'duration_conversations', 'duration_vtc', 'avg_wr_pm_fem', 'can_voc_dur_chi_ph', 'pc_fem_ph', 'cry_voc_chi_ph', 'non_can_voc_dur_chi_ph']
			
 
				+
			
 
				+def test_get_metrics():
			
 
				+    met_rec, cols = msg.get_metrics(DATASET_PATH, os.path.join(DATASET_PATH,'extra','metrics','metrics.csv'))
			
 
				+    
			
 
				+    assert sorted(cols) == sorted(list_of_metrics_label)
			
 
				+    pd.testing.assert_frame_equal(met_rec, pd.read_csv(os.path.join('tests','data','truth','get_metrics.csv')))
			
 
				+    
			
 
				+
			
 
				+def test_fill_template():
			
 
				+    pass
			
 
				+    
			
 
				+    
			
 
				+def test_build_messages():
			
 
				+    pass
			
 
				 
			
 
				+def test_generate_messages():
			
 
				+    pass
			
 
				+    
			
 
				 ##########################################