Browse Source

corrects transcription onsets

Apfelsaft 2 years ago
parent
commit
a07accaa2c
1 changed files with 2 additions and 0 deletions
  1. 2 0
      code/csv2grid_bis.py

+ 2 - 0
code/csv2grid_bis.py

@@ -6,6 +6,8 @@ from pydub import AudioSegment
 df = pd.read_csv("inputs/vandam-data/annotations/cha/converted/BN32_010007_0_0.csv")
 df.dropna(subset = ['transcription'], inplace = True)
 df = df[df['transcription'].str.match(r"[a-zA-Z]")]
+#corrects transcriptions that start before the end of previous transcription
+df = df.assign(segment_onset = df['segment_onset'].clip(lower = df['segment_offset'].shift(periods = 1, fill_value = 0)))
 
 #open recording
 recording = AudioSegment.from_wav("inputs/vandam-data/recordings/converted/standard/BN32_010007.wav")