Browse Source

modified grid2csv for timestamps

Apfelsaft 2 years ago
parent
commit
5beb04f8f5

+ 9 - 2
code/grid2csv.py

@@ -1,6 +1,7 @@
 import pympi.Praat as pr
 import pandas as pd
 from pathlib import Path
+import re
 
 input_dir = "outputs/mfa_align_output"
 output_path = "outputs/grid2csv_output"
@@ -15,6 +16,9 @@ for file in files:
     print(file)
     grid = pr.TextGrid(file)
 
+    #gets original onset of the sliced recording
+    audio_onset = int(re.split("\D+", file.stem)[1])
+
     #initialize header
     df = pd.DataFrame(columns= ["speaker_id","segment_offset","mwu_type","transcription","speaker_type",
                     "segment_onset","vcm_type","addresseee","raw_filename","ling_type","lex_type"])
@@ -22,8 +26,11 @@ for file in files:
     #loop through all tiers
     for tier in grid.get_tiers():
         for interval in tier.get_all_intervals():
-            df = df.append(pd.Series({'speaker_id': tier.name, 'segment_onset': interval[0]*1000,
-                'segment_offset': interval[1]*1000, 'transcription': interval[2]}), ignore_index= True)
+            
+            #populates dataframe
+            df = df.append(pd.Series({'speaker_id': tier.name, 'segment_onset': (interval[0]*1000 + audio_onset),
+                'segment_offset': (interval[1]*1000 + audio_onset), 'transcription': interval[2]}), ignore_index= True)
+            
 
     #exports to csv
     df.to_csv("{0}/{1}.csv".format(output_path, file.stem), mode = "x", na_rep= "NA", index= False)

+ 1 - 0
outputs/grid2csv_output/BN-0-727249.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/mk/k4/MD5E-s8267--3a6a2e3e89de1c47df5983c8bceb2df0.csv/MD5E-s8267--3a6a2e3e89de1c47df5983c8bceb2df0.csv

+ 1 - 0
outputs/grid2csv_output/BN-1297003-1913555.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/94/wQ/MD5E-s9894--4434af116673508bbb5b768ec249ff64.csv/MD5E-s9894--4434af116673508bbb5b768ec249ff64.csv

+ 1 - 0
outputs/grid2csv_output/BN-727249-1297003.csv

@@ -0,0 +1 @@
+../../.git/annex/objects/XP/1V/MD5E-s8235--445ced42c58c97a9b997fd1e4521e4cb.csv/MD5E-s8235--445ced42c58c97a9b997fd1e4521e4cb.csv

+ 1 - 1
outputs/grid2csv_output/BN1.csv

@@ -1 +1 @@
-../../.git/annex/objects/Z8/Gj/MD5E-s98930--a6109e153c914cecc16ef8a2f8173009.csv/MD5E-s98930--a6109e153c914cecc16ef8a2f8173009.csv
+../../.git/annex/objects/0Z/8w/MD5E-s98930--9748dabedbaad29d266788f638b97fb9.csv/MD5E-s98930--9748dabedbaad29d266788f638b97fb9.csv

+ 1 - 1
outputs/grid2csv_output/BN2.csv

@@ -1 +1 @@
-../../.git/annex/objects/w8/Zf/MD5E-s107325--2a8bd046c0e5b0765d770bfb475addb2.csv/MD5E-s107325--2a8bd046c0e5b0765d770bfb475addb2.csv
+../../.git/annex/objects/Pz/6j/MD5E-s107325--64c4a0f948488cae5ee510c5e6129de4.csv/MD5E-s107325--64c4a0f948488cae5ee510c5e6129de4.csv

+ 1 - 0
outputs/mfa_align_output/BN-0-727249.TextGrid

@@ -0,0 +1 @@
+../../.git/annex/objects/kj/0W/MD5E-s17424--25df6ad9cb9771e972619485468f8d92/MD5E-s17424--25df6ad9cb9771e972619485468f8d92

+ 1 - 0
outputs/mfa_align_output/BN-1297003-1913555.TextGrid

@@ -0,0 +1 @@
+../../.git/annex/objects/3G/Qq/MD5E-s20194--f4527c39fa00a25d9dfa6506f1f9ca4b/MD5E-s20194--f4527c39fa00a25d9dfa6506f1f9ca4b

+ 1 - 0
outputs/mfa_align_output/BN-727249-1297003.TextGrid

@@ -0,0 +1 @@
+../../.git/annex/objects/Vk/wp/MD5E-s16678--7cc3f98a47e347b56ebc6912d96badf3/MD5E-s16678--7cc3f98a47e347b56ebc6912d96badf3