csv2grid_bis.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import pandas as pd
  2. import pympi.Praat as pr
  3. from pydub import AudioSegment
  4. #open csv as pandas dataframe and clean dataframe
  5. df = pd.read_csv("inputs/vandam-data/annotations/cha/converted/BN32_010007_0_0.csv")
  6. df.dropna(subset = ['transcription'], inplace = True)
  7. df = df[df['transcription'].str.match(r"[a-zA-Z]")]
  8. #corrects transcriptions that start before the end of previous transcription
  9. df = df.assign(segment_onset = df['segment_onset'].clip(lower = df['segment_offset'].shift(periods = 1, fill_value = 0)))
  10. #open recording
  11. recording = AudioSegment.from_wav("inputs/vandam-data/recordings/converted/standard/BN32_010007.wav")
  12. #initiate
  13. slices_length = 100
  14. audio_onset = 0
  15. for a in range(0, len(df), slices_length):
  16. #creates sliced dataframe with 100 rows
  17. b = min(a + slices_length, len(df)-1)
  18. df_sliced = df[a:b]
  19. print(a,b)
  20. #finds the segment offset of the 100th transcription entry and stores it into var
  21. #in milliseconds
  22. audio_offset = df_sliced['segment_offset'].max()
  23. #extracts recording at desired length and exports it to new file
  24. recording_sliced = recording[audio_onset:audio_offset]
  25. recording_sliced.export("outputs/csv2grid_output/BN-{0}-{1}.wav".format(audio_onset, audio_offset), format='wav')
  26. #create textgrid
  27. grid = pr.TextGrid(xmax = (audio_offset-audio_onset)/1000)
  28. #iterate through each row
  29. for speaker, segments in df_sliced.groupby('speaker_id'):
  30. aTier = grid.add_tier(speaker)
  31. for i in segments.index.values:
  32. print(i)
  33. if not segments.loc[i, 'transcription']:
  34. continue
  35. aTier.add_interval(
  36. (segments.loc[i, 'segment_onset'] - audio_onset)/1000,
  37. (segments.loc[i, 'segment_offset'] - audio_onset)/1000,
  38. segments.loc[i, 'transcription'],
  39. False
  40. )
  41. grid.to_file("outputs/csv2grid_output/BN-{0}-{1}.TextGrid".format(audio_onset, audio_offset))
  42. #the end cut for this iteration becomes the starting point for next iteration
  43. audio_onset = audio_offset
  44. #increment row numbers
  45. a += 100
  46. b += 100