Browse Source

added code to populate speaker-type

Apfelsaft 2 years ago
parent
commit
85dafb3768

+ 14 - 2
code/grid2csv.py

@@ -6,6 +6,17 @@ import re
 input_dir = "outputs/mfa_align_output"
 output_path = "outputs/grid2csv_output"
 
+speaker_type_dic = {
+        "CHI - words" : "CHI",
+        "CHI - phones" : "CHI",
+        "MOT - words" : "FEM",
+        "MOT - phones" : "FEM",
+        "FAT - words" : "MAL",
+        "FAT - phones" : "MAL",
+        "SIS - words" : "OCH",
+        "SIS - phones" : "OCH"
+        }
+
 #selects TextGrid files only
 files = Path(input_dir).glob('*.TextGrid')
 
@@ -30,8 +41,9 @@ for file in files:
             #populates dataframe
             df = df.append(pd.Series({'speaker_id': tier.name, 'segment_onset': (interval[0]*1000 + audio_onset),
                 'segment_offset': (interval[1]*1000 + audio_onset), 'transcription': interval[2]}), ignore_index= True)
-            
-
+    
+    #populates speaker_type column
+    df['speaker_type'] = df['speaker_id'].map(speaker_type_dic)
     #exports to csv
     df.to_csv("{0}/{1}.csv".format(output_path, file.stem), mode = "x", na_rep= "NA", index= False)
     print("----------------SAVED!-----------------")

+ 1 - 1
outputs/grid2csv_output/BN-0-727249.csv

@@ -1 +1 @@
-../../.git/annex/objects/Xk/pF/MD5E-s99540--b5c532e408d69bc4ef8ba5904fb5e8da.csv/MD5E-s99540--b5c532e408d69bc4ef8ba5904fb5e8da.csv
+../../.git/annex/objects/49/28/MD5E-s101199--f0ef1e67969cef1cedc4d534f65b7860.csv/MD5E-s101199--f0ef1e67969cef1cedc4d534f65b7860.csv

+ 1 - 1
outputs/grid2csv_output/BN-1297003-1913555.csv

@@ -1 +1 @@
-../../.git/annex/objects/KK/3m/MD5E-s116152--89e47de48a10bfbb7b3d9587c3e82f89.csv/MD5E-s116152--89e47de48a10bfbb7b3d9587c3e82f89.csv
+../../.git/annex/objects/2J/Vm/MD5E-s118161--e455d4a37c0927f792dcd1f9c6627603.csv/MD5E-s118161--e455d4a37c0927f792dcd1f9c6627603.csv

+ 1 - 1
outputs/grid2csv_output/BN-727249-1297003.csv

@@ -1 +1 @@
-../../.git/annex/objects/4J/9F/MD5E-s102145--717cdafde043d775a8f189a2b00fac67.csv/MD5E-s102145--717cdafde043d775a8f189a2b00fac67.csv
+../../.git/annex/objects/47/gP/MD5E-s103933--c04d33b0486c0d94a012bdf44edfac4e.csv/MD5E-s103933--c04d33b0486c0d94a012bdf44edfac4e.csv