|
@@ -52,14 +52,20 @@ for file in files:
|
|
|
if(audio_onset < corpus_onset): corpus_onset = audio_onset
|
|
|
if(audio_offset > corpus_offset): corpus_offset = audio_offset
|
|
|
|
|
|
+
|
|
|
#loop through all tiers
|
|
|
for tier in grid.get_tiers():
|
|
|
+
|
|
|
+ #remove all phones
|
|
|
+ if 'phones' in tier.name: continue
|
|
|
+
|
|
|
for interval in tier.get_all_intervals():
|
|
|
|
|
|
+ #conditions to skip this iteration: empty, "sp", "sil" or if interval is a phone
|
|
|
if not interval[2]: continue
|
|
|
- if interval[2] == "sil" :
|
|
|
- continue
|
|
|
-
|
|
|
+ if interval[2] == "sil" : continue
|
|
|
+ if interval[2] == "sp" : continue
|
|
|
+
|
|
|
#populates dataframe
|
|
|
temp_dict = {'speaker_id': tier.name, 'segment_onset': (interval[0]*1000 + audio_onset),
|
|
|
'segment_offset': (interval[1]*1000 + audio_onset), 'transcription': interval[2]}
|
|
@@ -85,5 +91,5 @@ df = pd.concat([df, orig_df_subset])
|
|
|
df.sort_values(by='segment_onset', inplace= True)
|
|
|
|
|
|
#exports to csv
|
|
|
-df.to_csv("{0}/BN32_010007-aligned.csv.csv".format(output_path), mode = "x", na_rep= "NA", index= False)
|
|
|
+df.to_csv("{0}/BN32_010007-aligned.csv".format(output_path), mode = "x", na_rep= "NA", index= False)
|
|
|
print("----------------SAVED!-----------------")
|