|
@@ -6,6 +6,8 @@ from pydub import AudioSegment
|
|
|
df = pd.read_csv("inputs/vandam-data/annotations/cha/converted/BN32_010007_0_0.csv")
|
|
|
df.dropna(subset = ['transcription'], inplace = True)
|
|
|
df = df[df['transcription'].str.match(r"[a-zA-Z]")]
|
|
|
+#corrects transcriptions that start before the end of previous transcription
|
|
|
+df = df.assign(segment_onset = df['segment_onset'].clip(lower = df['segment_offset'].shift(periods = 1, fill_value = 0)))
|
|
|
|
|
|
#open recording
|
|
|
recording = AudioSegment.from_wav("inputs/vandam-data/recordings/converted/standard/BN32_010007.wav")
|