|
@@ -11,21 +11,18 @@ df = df[df['transcription'].str.match(r"[a-zA-Z]")]
|
|
|
recording = AudioSegment.from_wav("inputs/vandam-data/recordings/converted/standard/BN32_010007.wav")
|
|
|
|
|
|
#initiate
|
|
|
-a = 0
|
|
|
-b = 99
|
|
|
+slices_length = 100
|
|
|
audio_onset = 0
|
|
|
-end = False
|
|
|
-
|
|
|
-while end == False:
|
|
|
-
|
|
|
- #check if b is not out of range
|
|
|
- #if it is, place last row # in b and change boolean to True to stop loop
|
|
|
- if(b > int(df.count()[0])):
|
|
|
- b = int(df.count()[0])
|
|
|
- end = True
|
|
|
|
|
|
+for a in range(0, len(df), slices_length):
|
|
|
#creates sliced dataframe with 100 rows
|
|
|
+ b = min(a + slices_length, len(df)-1)
|
|
|
df_sliced = df[a:b]
|
|
|
+ print(a,b)
|
|
|
+
|
|
|
+ #finds the segment offset of the 100th transcription entry and stores it into var
|
|
|
+ #in milliseconds
|
|
|
+ audio_offset = df_sliced['segment_offset'].max()
|
|
|
|
|
|
#finds the segment offset of the 100th transcription entry and stores it into var
|
|
|
#in milliseconds
|