|
@@ -0,0 +1,28 @@
|
|
|
|
+import pympi.Praat as pr
|
|
|
|
+import pandas as pd
|
|
|
|
+from pathlib import Path
|
|
|
|
+
|
|
|
|
+input_dir = "outputs/mfa_align_output"
|
|
|
|
+output_path = "outputs/grid2csv_output"
|
|
|
|
+
|
|
|
|
+#selects TextGrid files only
|
|
|
|
+files = Path(input_dir).glob('*.TextGrid')
|
|
|
|
+
|
|
|
|
+#Loop through each textgrid file of the folder
|
|
|
|
+for file in files:
|
|
|
|
+ #open textgrid file
|
|
|
|
+ print(file)
|
|
|
|
+ grid = pr.TextGrid(file)
|
|
|
|
+ #initialize header
|
|
|
|
+ df = pd.DataFrame(columns= ["speaker_id","segment_offset","mwu_type","transcription","speaker_type",
|
|
|
|
+ "segment_onset","vcm_type","addresseee","raw_filename","ling_type","lex_type"])
|
|
|
|
+
|
|
|
|
+ #loop through all tiers, might want to loop through each textgrid
|
|
|
|
+ for tier in grid.get_tiers():
|
|
|
|
+ for interval in tier.get_all_intervals():
|
|
|
|
+ df = df.append(pd.Series({'speaker_id': tier.name, 'segment_onset': interval[0]*1000,
|
|
|
|
+ 'segment_offset': interval[1]*1000, 'transcription': interval[2]}), ignore_index= True)
|
|
|
|
+
|
|
|
|
+ #exports to csv
|
|
|
|
+ df.to_csv("{0}/{1}.csv".format(output_path, file.stem), mode = "x", na_rep= "NA", index= False)
|
|
|
|
+ print("----------------SAVED!-----------------")
|