import pandas as pd import pympi.Praat as pr from pydub import AudioSegment #open csv as pandas dataframe and clean dataframe df = pd.read_csv("inputs/vandam-data/annotations/cha/converted/BN32_010007_0_0.csv") df.dropna(subset = ['transcription'], inplace = True) df = df[df['transcription'].str.match(r"[a-zA-Z]")] #corrects transcriptions that start before the end of previous transcription df = df.assign(segment_onset = df['segment_onset'].clip(lower = df['segment_offset'].shift(periods = 1, fill_value = 0))) #open recording recording = AudioSegment.from_wav("inputs/vandam-data/recordings/converted/standard/BN32_010007.wav") #initiate slices_length = 100 audio_onset = 0 for a in range(0, len(df), slices_length): #creates sliced dataframe with 100 rows b = min(a + slices_length, len(df)-1) df_sliced = df[a:b] print(a,b) #finds the segment offset of the 100th transcription entry and stores it into var #in milliseconds audio_offset = df_sliced['segment_offset'].max() #extracts recording at desired length and exports it to new file recording_sliced = recording[audio_onset:audio_offset] recording_sliced.export("outputs/csv2grid_output/BN-{0}-{1}.wav".format(audio_onset, audio_offset), format='wav') #create textgrid grid = pr.TextGrid(xmax = (audio_offset-audio_onset)/1000) #iterate through each row for speaker, segments in df_sliced.groupby('speaker_id'): aTier = grid.add_tier(speaker) for i in segments.index.values: print(i) if not segments.loc[i, 'transcription']: continue aTier.add_interval( (segments.loc[i, 'segment_onset'] - audio_onset)/1000, (segments.loc[i, 'segment_offset'] - audio_onset)/1000, segments.loc[i, 'transcription'], False ) grid.to_file("outputs/csv2grid_output/BN-{0}-{1}.TextGrid".format(audio_onset, audio_offset)) #the end cut for this iteration becomes the starting point for next iteration audio_onset = audio_offset #increment row numbers a += 100 b += 100