LAAC-LSCP
/
longform-hardware-audio-test


			
			
				
					
						
							12345678910111213141516171819202122232425262728293031323334
							#!/usr/bin/env python3
import pandas as pd
import os
from ChildProject.projects import ChildProject
from ChildProject.annotations import AnnotationManager

dataset_path = "."
### !!!! EDIT THIS SECTION !!!! ###
annot_type = {"set":"vtc","file_extension":".rttm",'format':'vtc_rttm'} # UNCOMMENT FOR VTC ANNOTATIONS
#annot_type = {"set":"alice/output","file_extension":".txt"} # UNCOMMENT FOR ALICE ANNOTATIONS
#annot_type = {"set":"vcm","file_extension":".vcm"} # UNCOMMENT FOR VCM ANNOTATIONS
###################################

#load the project
project = ChildProject(dataset_path)
# load the annotation manager for our project
am = AnnotationManager(project)

# we take a copy of the recordings.csv file of the dataset, that suits us because we have one importation per recording, as is usually the case with automated annotations
input_frame = pd.DataFrame.copy(project.recordings)

# let's drop every column that we don't need
input_frame.drop(['experiment', 'child_id', 'date_iso', 'start_time', 'recording_device_type'], axis = 1, inplace = True)

#make sure that the duration for the recordings is set in recordings.csv, otherwise run child-project compute-durations /path

input_frame["raw_filename"]= input_frame.apply(lambda row: os.path.splitext(row["recording_filename"])[0] + annot_type["file_extension"], axis=1) #take the name of the audio and add extension of the annotation (so this assumes the annotation file has the same name as the audio appart from extension)
input_frame["set"] = annot_type["set"] #set to import to
input_frame["time_seek"] = "0" # timestamps in the file don't need to be shifted
input_frame["range_onset"] = "0" #from the start of the audio...
input_frame["range_offset"]= input_frame["duration"] # ...to the end
input_frame["format"] = annot_type["format"]

am.import_annotations(input_frame)