import-vtc.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334
  1. #!/usr/bin/env python3
  2. import pandas as pd
  3. import os
  4. from ChildProject.projects import ChildProject
  5. from ChildProject.annotations import AnnotationManager
  6. dataset_path = "."
  7. ### !!!! EDIT THIS SECTION !!!! ###
  8. annot_type = {"set":"vtc","file_extension":".rttm",'format':'vtc_rttm'} # UNCOMMENT FOR VTC ANNOTATIONS
  9. #annot_type = {"set":"alice/output","file_extension":".txt"} # UNCOMMENT FOR ALICE ANNOTATIONS
  10. #annot_type = {"set":"vcm","file_extension":".vcm"} # UNCOMMENT FOR VCM ANNOTATIONS
  11. ###################################
  12. #load the project
  13. project = ChildProject(dataset_path)
  14. # load the annotation manager for our project
  15. am = AnnotationManager(project)
  16. # we take a copy of the recordings.csv file of the dataset, that suits us because we have one importation per recording, as is usually the case with automated annotations
  17. input_frame = pd.DataFrame.copy(project.recordings)
  18. # let's drop every column that we don't need
  19. input_frame.drop(['experiment', 'child_id', 'date_iso', 'start_time', 'recording_device_type'], axis = 1, inplace = True)
  20. #make sure that the duration for the recordings is set in recordings.csv, otherwise run child-project compute-durations /path
  21. input_frame["raw_filename"]= input_frame.apply(lambda row: os.path.splitext(row["recording_filename"])[0] + annot_type["file_extension"], axis=1) #take the name of the audio and add extension of the annotation (so this assumes the annotation file has the same name as the audio appart from extension)
  22. input_frame["set"] = annot_type["set"] #set to import to
  23. input_frame["time_seek"] = "0" # timestamps in the file don't need to be shifted
  24. input_frame["range_onset"] = "0" #from the start of the audio...
  25. input_frame["range_offset"]= input_frame["duration"] # ...to the end
  26. input_frame["format"] = annot_type["format"]
  27. am.import_annotations(input_frame)