Browse Source

created txtgrid converter

Apfelsaft 2 years ago
parent
commit
85fa1f5b6e

+ 28 - 0
code/grid2csv.py

@@ -0,0 +1,28 @@
+import pympi.Praat as pr
+import pandas as pd
+from pathlib import Path
+
+input_dir = "outputs/mfa_align_output"
+output_path = "outputs/grid2csv_output"
+
+#selects TextGrid files only
+files = Path(input_dir).glob('*.TextGrid')
+
+#Loop through each textgrid file of the folder
+for file in files:
+    #open textgrid file
+    print(file)
+    grid = pr.TextGrid(file)
+    #initialize header
+    df = pd.DataFrame(columns= ["speaker_id","segment_offset","mwu_type","transcription","speaker_type",
+                    "segment_onset","vcm_type","addresseee","raw_filename","ling_type","lex_type"])
+
+    #loop through all tiers, might want to loop through each textgrid
+    for tier in grid.get_tiers():
+        for interval in tier.get_all_intervals():
+            df = df.append(pd.Series({'speaker_id': tier.name, 'segment_onset': interval[0]*1000,
+                'segment_offset': interval[1]*1000, 'transcription': interval[2]}), ignore_index= True)
+
+    #exports to csv
+    df.to_csv("{0}/{1}.csv".format(output_path, file.stem), mode = "x", na_rep= "NA", index= False)
+    print("----------------SAVED!-----------------")

+ 0 - 0
code/silence_percent


+ 1 - 0
outputs/grid2csv_output/BN1.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/Z8/Gj/MD5E-s98930--a6109e153c914cecc16ef8a2f8173009.csv/MD5E-s98930--a6109e153c914cecc16ef8a2f8173009.csv

+ 1 - 0
outputs/grid2csv_output/BN2.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/w8/Zf/MD5E-s107325--2a8bd046c0e5b0765d770bfb475addb2.csv/MD5E-s107325--2a8bd046c0e5b0765d770bfb475addb2.csv

outputs/test_results/BN1.TextGrid → outputs/mfa_align_output/BN1.TextGrid


outputs/test_results/BN2.TextGrid → outputs/mfa_align_output/BN2.TextGrid