|
@@ -6,6 +6,17 @@ import re
|
|
|
input_dir = "outputs/mfa_align_output"
|
|
|
output_path = "outputs/grid2csv_output"
|
|
|
|
|
|
+speaker_type_dic = {
|
|
|
+ "CHI - words" : "CHI",
|
|
|
+ "CHI - phones" : "CHI",
|
|
|
+ "MOT - words" : "FEM",
|
|
|
+ "MOT - phones" : "FEM",
|
|
|
+ "FAT - words" : "MAL",
|
|
|
+ "FAT - phones" : "MAL",
|
|
|
+ "SIS - words" : "OCH",
|
|
|
+ "SIS - phones" : "OCH"
|
|
|
+ }
|
|
|
+
|
|
|
#selects TextGrid files only
|
|
|
files = Path(input_dir).glob('*.TextGrid')
|
|
|
|
|
@@ -30,8 +41,9 @@ for file in files:
|
|
|
#populates dataframe
|
|
|
df = df.append(pd.Series({'speaker_id': tier.name, 'segment_onset': (interval[0]*1000 + audio_onset),
|
|
|
'segment_offset': (interval[1]*1000 + audio_onset), 'transcription': interval[2]}), ignore_index= True)
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
+ #populates speaker_type column
|
|
|
+ df['speaker_type'] = df['speaker_id'].map(speaker_type_dic)
|
|
|
#exports to csv
|
|
|
df.to_csv("{0}/{1}.csv".format(output_path, file.stem), mode = "x", na_rep= "NA", index= False)
|
|
|
print("----------------SAVED!-----------------")
|