Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

researchcut2segments.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. #!/usr/bin/python3
  2. """
  3. created on Wed Jan 30 2018
  4. author: Christian Olaf Haeusler
  5. To Do:
  6. argparser
  7. Erzaehler Filtern wennn MOVIE = True
  8. """
  9. from collections import defaultdict
  10. import os
  11. from os.path import basename
  12. from os.path import join as opj
  13. from os.path import exists
  14. import re
  15. import sys
  16. import pandas as pd
  17. SEGMENTS_OFFSETS = (
  18. (0.00, 0.00),
  19. (886.00, 0.00),
  20. (1752.08, 0.08), # third segment's start
  21. (2612.16, 0.16),
  22. (3572.20, 0.20),
  23. (4480.28, 0.28),
  24. (5342.36, 0.36),
  25. (6410.44, 0.44), # last segment's start
  26. (7086.00, 0.00)) # movie's last time point
  27. # dictionaries with paired touples containing time (2sec steps) and offset
  28. # in respect to the audiovisual movie (forrestgump_researchcut_ger_mono.mkv)
  29. AUDIO_AV_OFFSETS = {
  30. 0: { 0: 21.33},
  31. 1: { 0: 37.33,
  32. 408: 21.33},
  33. 2: { 0: 69.33,
  34. 199: 61.33},
  35. 3: { 0: 93.33,
  36. 320: 101.33},
  37. 4: { 0: 109.33,
  38. 401: 101.33},
  39. 5: { 0: 141.33},
  40. 6: { 0: 189.31,
  41. 61: 181.31},
  42. 7: { 0: 205.33}}
  43. AUDIO_AO_OFFSETS = {
  44. 0: { 0: 47.02},
  45. 1: { 0: 36.35,
  46. 203: 47.02},
  47. 2: { 0: 87.02,
  48. 199: 92.35},
  49. 3: { 0: 124.35,
  50. 320: 132.35},
  51. 4: { 0: 105.69,
  52. 401: 92.35},
  53. 5: { 0: 137.69,
  54. 364: 167.02},
  55. 6: { 0: 201.67,
  56. 61: 543.00},
  57. 7: { 0:-1422.31}}
  58. def time_stamp_to_msec(t_stamp='01:50:34:01'):
  59. '''
  60. Input:
  61. time stamp (str) in format HH:MM:SS:Frame
  62. Output:
  63. time point in milliseconds (int)
  64. '''
  65. splitted_stamp = t_stamp.split(':')
  66. milliseconds = (int(splitted_stamp[0]) * 60 * 60 * 1000) +\
  67. (int(splitted_stamp[1]) * 60 * 1000) +\
  68. (int(splitted_stamp[2]) * 1000) +\
  69. (int(splitted_stamp[3]) * 40)
  70. return milliseconds
  71. def msec_to_time_stamp(milliseconds=6634040):
  72. '''
  73. Input:
  74. a time point in milliseconds (int)
  75. Output:
  76. a time stamp (str) in format HH:MM:SS:Frame
  77. '''
  78. # convert in case function was called from the command line with the
  79. # timing given as a string
  80. milliseconds = int(milliseconds)
  81. hours = (milliseconds / (60 * 60 * 1000))
  82. minutes = (milliseconds % (60 * 60 * 1000) / (60 * 1000))
  83. seconds = (milliseconds % (60 * 60 * 1000) % (60 * 1000) / 1000)
  84. frame = (milliseconds % (60 * 60 * 1000) % (60 * 1000) % (1000) // 40)
  85. time_stamp = '%02d:%02d:%02d:%02d' % (hours, minutes, seconds, frame)
  86. return time_stamp
  87. def get_run_number(starts, onset):
  88. '''
  89. '''
  90. for start in sorted(starts, reverse=True):
  91. if onset >= start:
  92. run = starts.index(start)
  93. break
  94. return run
  95. def whole_anno_to_segments(seg_starts, run_nr, anno_time):
  96. '''
  97. "The position of an event from a movie annotation with respect to the
  98. cropped fMRI time series can now be determined by substracting the
  99. start time of the respective segment as listed in Table 1"
  100. http://studyforrest.org/annotation_timing.html
  101. '''
  102. seg_time = anno_time - seg_starts[run_nr]
  103. return seg_time
  104. def fix_audio_movie_segments(AUDIO_AV_OFFSETS, run, uncorrected):
  105. '''corrects the segments' audio offsets
  106. in respect to the unsegmented movie
  107. '''
  108. critical_time_points = sorted(AUDIO_AV_OFFSETS[run].keys(), reverse=True)
  109. for crit in critical_time_points:
  110. if uncorrected >= crit * 2.0:
  111. corrected = uncorrected + (AUDIO_AV_OFFSETS[run][crit] / 1000.0)
  112. break
  113. return corrected
  114. def fix_audio_descr_segments(AUDIO_AO_OFFSETS, run, uncorrected):
  115. '''corrects the segments' audio offsets
  116. in respect to the unsegmented audiobook
  117. '''
  118. critical_time_points = sorted(AUDIO_AO_OFFSETS[run].keys(), reverse=True)
  119. for crit in critical_time_points:
  120. if uncorrected >= crit * 2.0:
  121. corrected = uncorrected + (AUDIO_AO_OFFSETS[run][crit] / 1000.0)
  122. break
  123. return corrected
  124. def write_segmented_annos(infilename, stimulus, run_dict, out_dir):
  125. '''
  126. '''
  127. basefilename = basename(infilename)[:-4]
  128. outdir = opj(out_dir, stimulus)
  129. if not exists(outdir):
  130. os.makedirs(outdir)
  131. for run in sorted(run_dict.keys()):
  132. outname = opj(out_dir, stimulus, '{}_run-{}_events.tsv'.format(
  133. basefilename,
  134. run + 1))
  135. pd.DataFrame.from_records(
  136. run_dict[run],
  137. columns=run_dict[run][0].dtype.names).to_csv(
  138. outname,
  139. sep='\t',
  140. index=False,
  141. encoding='utf-8')
  142. #### main program #####
  143. if __name__ == "__main__":
  144. # constants #
  145. infile = sys.argv[1]
  146. annotated_time = sys.argv[2]
  147. target_time = sys.argv[3]
  148. outdir = sys.argv[4]
  149. # with launch_ipdb_on_exception():
  150. # read the annotation file
  151. anno = pd.read_csv(infile, sep='\t', encoding='utf-8').to_records(index=False)
  152. segment_starts = [start for start, offset in SEGMENTS_OFFSETS]
  153. run_events = defaultdict(list)
  154. for row in anno:
  155. # get the run number
  156. run = get_run_number(segment_starts, row['onset'])
  157. # convert the timings of a continuous annotation
  158. # to timings in respect to the start of the corresponding segment
  159. onset_in_seg = whole_anno_to_segments(
  160. segment_starts,
  161. run,
  162. float(row['onset']))
  163. # correct for the stimulus used to annotate the audiotrack
  164. if annotated_time == 'aomovie':
  165. # the files
  166. # forrestgump_researchcut_ad_ger.flac and
  167. # german_dvd_5.1_48000hz_488kb_research_cut_aligned_cutted_narrator_muted_48000Hz.flac
  168. # (that contain the audio description) were originally lagging
  169. # behind for XYZ msec and were shiftet forward
  170. # by one frame (40ms) in respect to the reference file
  171. # forrestgump_researchcut_ger.mkv
  172. # 1st, correct for shifting the narrator (incl. dialogue) 40ms
  173. # to the front before annotating the narrator/dialogue
  174. onset_in_seg += 0.040
  175. # 2nd, correct for the offset between the (unshifted) audio
  176. # description and the audiovisual movie
  177. # -> the offset is varying +/- one frame (40 ms) around 0
  178. onset_in_seg -= 0.000
  179. # 3rd, correct for the offset between whole stimulus
  180. # (audiovisual or audio-only) and its segments
  181. if target_time == 'avmovie':
  182. onset_in_seg = fix_audio_movie_segments(
  183. AUDIO_AV_OFFSETS,
  184. run,
  185. onset_in_seg)
  186. elif target_time == 'aomovie':
  187. onset_in_seg = fix_audio_descr_segments(
  188. AUDIO_AO_OFFSETS,
  189. run,
  190. onset_in_seg)
  191. else:
  192. raise ValueError('Unknown time label %s', target_time)
  193. elif annotated_time == 'avmovie':
  194. # all splendid for now
  195. pass
  196. else:
  197. raise ValueError('%s is an unknown annotation', basename(input_file))
  198. row['onset'] = round(onset_in_seg, 3)
  199. # append that shit
  200. run_events[run].append(row)
  201. write_segmented_annos(infile, target_time, run_events, outdir)