12345678910111213141516171819202122232425262728293031323334 |
- #!/usr/bin/env python3
- import pandas as pd
- import os
- from ChildProject.projects import ChildProject
- from ChildProject.annotations import AnnotationManager
- dataset_path = "."
- ### !!!! EDIT THIS SECTION !!!! ###
- annot_type = {"set":"vtc","file_extension":".rttm",'format':'vtc_rttm'} # UNCOMMENT FOR VTC ANNOTATIONS
- #annot_type = {"set":"alice/output","file_extension":".txt"} # UNCOMMENT FOR ALICE ANNOTATIONS
- #annot_type = {"set":"vcm","file_extension":".vcm"} # UNCOMMENT FOR VCM ANNOTATIONS
- ###################################
- #load the project
- project = ChildProject(dataset_path)
- # load the annotation manager for our project
- am = AnnotationManager(project)
- # we take a copy of the recordings.csv file of the dataset, that suits us because we have one importation per recording, as is usually the case with automated annotations
- input_frame = pd.DataFrame.copy(project.recordings)
- # let's drop every column that we don't need
- input_frame.drop(['experiment', 'child_id', 'date_iso', 'start_time', 'recording_device_type'], axis = 1, inplace = True)
- #make sure that the duration for the recordings is set in recordings.csv, otherwise run child-project compute-durations /path
- input_frame["raw_filename"]= input_frame.apply(lambda row: os.path.splitext(row["recording_filename"])[0] + annot_type["file_extension"], axis=1) #take the name of the audio and add extension of the annotation (so this assumes the annotation file has the same name as the audio appart from extension)
- input_frame["set"] = annot_type["set"] #set to import to
- input_frame["time_seek"] = "0" # timestamps in the file don't need to be shifted
- input_frame["range_onset"] = "0" #from the start of the audio...
- input_frame["range_offset"]= input_frame["duration"] # ...to the end
- input_frame["format"] = annot_type["format"]
- am.import_annotations(input_frame)
|