123456789101112131415161718192021222324252627282930 |
- import pympi.Praat as pr
- import pandas as pd
- from pathlib import Path
- input_dir = "outputs/mfa_align_output"
- output_path = "outputs/grid2csv_output"
- #selects TextGrid files only
- files = Path(input_dir).glob('*.TextGrid')
- #Loop through each textgrid file of the folder
- for file in files:
- #open textgrid file
- print(file)
- grid = pr.TextGrid(file)
- #initialize header
- df = pd.DataFrame(columns= ["speaker_id","segment_offset","mwu_type","transcription","speaker_type",
- "segment_onset","vcm_type","addresseee","raw_filename","ling_type","lex_type"])
- #loop through all tiers
- for tier in grid.get_tiers():
- for interval in tier.get_all_intervals():
- df = df.append(pd.Series({'speaker_id': tier.name, 'segment_onset': interval[0]*1000,
- 'segment_offset': interval[1]*1000, 'transcription': interval[2]}), ignore_index= True)
- #exports to csv
- df.to_csv("{0}/{1}.csv".format(output_path, file.stem), mode = "x", na_rep= "NA", index= False)
- print("----------------SAVED!-----------------")
|