#!/usr/bin/env python3 import pandas as pd import os from ChildProject.projects import ChildProject from ChildProject.annotations import AnnotationManager dataset_path = "." ### !!!! EDIT THIS SECTION !!!! ### annot_type = {"set":"vtc","file_extension":".rttm",'format':'vtc_rttm'} # UNCOMMENT FOR VTC ANNOTATIONS #annot_type = {"set":"alice/output","file_extension":".txt"} # UNCOMMENT FOR ALICE ANNOTATIONS #annot_type = {"set":"vcm","file_extension":".vcm"} # UNCOMMENT FOR VCM ANNOTATIONS ################################### #load the project project = ChildProject(dataset_path) # load the annotation manager for our project am = AnnotationManager(project) # we take a copy of the recordings.csv file of the dataset, that suits us because we have one importation per recording, as is usually the case with automated annotations input_frame = pd.DataFrame.copy(project.recordings) # let's drop every column that we don't need input_frame.drop(['experiment', 'child_id', 'date_iso', 'start_time', 'recording_device_type'], axis = 1, inplace = True) #make sure that the duration for the recordings is set in recordings.csv, otherwise run child-project compute-durations /path input_frame["raw_filename"]= input_frame.apply(lambda row: os.path.splitext(row["recording_filename"])[0] + annot_type["file_extension"], axis=1) #take the name of the audio and add extension of the annotation (so this assumes the annotation file has the same name as the audio appart from extension) input_frame["set"] = annot_type["set"] #set to import to input_frame["time_seek"] = "0" # timestamps in the file don't need to be shifted input_frame["range_onset"] = "0" #from the start of the audio... input_frame["range_offset"]= input_frame["duration"] # ...to the end input_frame["format"] = annot_type["format"] am.import_annotations(input_frame)