grid2csv.py 1.1 KB

123456789101112131415161718192021222324252627282930
  1. import pympi.Praat as pr
  2. import pandas as pd
  3. from pathlib import Path
  4. input_dir = "outputs/mfa_align_output"
  5. output_path = "outputs/grid2csv_output"
  6. #selects TextGrid files only
  7. files = Path(input_dir).glob('*.TextGrid')
  8. #Loop through each textgrid file of the folder
  9. for file in files:
  10. #open textgrid file
  11. print(file)
  12. grid = pr.TextGrid(file)
  13. #initialize header
  14. df = pd.DataFrame(columns= ["speaker_id","segment_offset","mwu_type","transcription","speaker_type",
  15. "segment_onset","vcm_type","addresseee","raw_filename","ling_type","lex_type"])
  16. #loop through all tiers
  17. for tier in grid.get_tiers():
  18. for interval in tier.get_all_intervals():
  19. df = df.append(pd.Series({'speaker_id': tier.name, 'segment_onset': interval[0]*1000,
  20. 'segment_offset': interval[1]*1000, 'transcription': interval[2]}), ignore_index= True)
  21. #exports to csv
  22. df.to_csv("{0}/{1}.csv".format(output_path, file.stem), mode = "x", na_rep= "NA", index= False)
  23. print("----------------SAVED!-----------------")