locationsanno2onsets.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343
  1. #!/usr/bin/env python3
  2. '''
  3. created on Tue Feb 18 2020
  4. author: Christian Olaf Haeusler
  5. '''
  6. from collections import defaultdict
  7. from glob import glob
  8. import argparse
  9. import csv
  10. import os.path
  11. import random
  12. # parameters #
  13. STIM_LENGTH = 0.2 # in seconds
  14. random.seed(1984) # for jittering the no cut events
  15. def parse_arguments():
  16. '''
  17. '''
  18. parser = argparse.ArgumentParser(
  19. description='''converts annotated events to
  20. event files to be used in FSL''')
  21. parser.add_argument('-ind',
  22. default='events/segments/avmovie',
  23. help='''directory that contains the segmented
  24. annotation; e.g. 'events/segments/avmovie' ''')
  25. parser.add_argument('-inp',
  26. default='locations_run-?_events.tsv',
  27. help='''input pattern of the segmented
  28. annotation files ''')
  29. parser.add_argument('-outd',
  30. default='events/onsets',
  31. help='''output directory; e.g. 'events/onsets' ''')
  32. args = parser.parse_args()
  33. inDir = args.ind
  34. inPat = args.inp
  35. outDir = args.outd
  36. return inDir, inPat, outDir
  37. def get_anno_segments(directory, fname_pattern):
  38. '''
  39. '''
  40. path_pattern = os.path.join(directory, fname_pattern)
  41. anno_pathes = glob(path_pattern)
  42. return sorted(anno_pathes)
  43. def get_run_number(path):
  44. '''
  45. '''
  46. fname = os.path.basename(path)
  47. run = fname.split('run-')[1].split('_events')[0]
  48. return run
  49. def read_anno_segment(path):
  50. '''
  51. '''
  52. with open(path, 'r') as csvfile:
  53. all_rows = csv.reader(csvfile, delimiter='\t')
  54. # skip the header
  55. next(all_rows, None)
  56. # put content of files into a list
  57. anno = []
  58. for row in all_rows:
  59. # convert onset from str to float
  60. row[0] = float(row[0])
  61. # convert duration to offset
  62. row[1] = round(row[0] + float(row[1]), 2)
  63. anno.append(row)
  64. return anno
  65. def compute_spatial_switches(row,
  66. prev_major_l,
  67. prev_setting,
  68. prev_locale):
  69. '''
  70. '''
  71. start_t = row[0]
  72. major_l = row[2]
  73. setting = row[3]
  74. locale = row[4]
  75. # GET PATTERN OF LOCATION SWITCHES
  76. # checking which switches are present
  77. spatial_switches = [None, None, None]
  78. # major location switch present in current line?
  79. if major_l != prev_major_l:
  80. spatial_switches[0] = True
  81. else:
  82. spatial_switches[0] = False
  83. # setting switch present in current line?
  84. if setting != prev_setting:
  85. spatial_switches[1] = True
  86. # register that the setting in the previous line has been "learned"
  87. if not prev_setting in learned_settings:
  88. learned_settings.append(prev_setting)
  89. else:
  90. spatial_switches[1] = False
  91. # locale switch present in current line?
  92. if locale != prev_locale:
  93. spatial_switches[2] = True
  94. # register that the locale in the previous line has been "learned"
  95. # create a dictionary key with the setting's name first
  96. learned_locales[prev_setting]
  97. if not prev_locale in learned_locales[prev_setting]:
  98. learned_locales[prev_setting].append(prev_locale)
  99. else:
  100. spatial_switches[2] = False
  101. return spatial_switches
  102. def apply_condition_rules(row, switches, learned_settings, learned_locales):
  103. '''
  104. '''
  105. start_t = row[0]
  106. major_l = row[2]
  107. setting = row[3]
  108. locale = row[4]
  109. # apply rules to compute conditions
  110. # rule for a setting change (i.e. a cut across scenes)
  111. if switches == [True, True, True] or switches == [False, True, True]:
  112. if setting in learned_settings:
  113. cond = 'vse_old'
  114. else:
  115. cond = 'vse_new'
  116. # rule for locale_change
  117. # (the locale (mostly a room) changes within a scene/setting)
  118. elif spatial_switches == [False, False, True]:
  119. cond = 'vlo_ch'
  120. # rule for perspective_changes
  121. elif spatial_switches == [False, False, False]:
  122. if locale in learned_locales[setting]:
  123. cond = 'vpe_old'
  124. else:
  125. cond = 'vpe_new'
  126. else:
  127. raise RuntimeError('cant recognize condition')
  128. return(start_t, cond)
  129. def create_nocut(row):
  130. '''
  131. '''
  132. # the first to functions are used to align the events to the nearest
  133. # movie frame
  134. def msec_to_time_stamp(milliseconds):
  135. '''
  136. Input:
  137. a time point in milliseconds (int)
  138. Output:
  139. a time stamp (str) in format HH:MM:SS:Frame
  140. '''
  141. hours = (milliseconds / (60*60*1000))
  142. minutes = (milliseconds % (60*60*1000) / (60*1000))
  143. seconds = (milliseconds % (60*60*1000) % (60*1000) / 1000)
  144. frame = (milliseconds % (60*60*1000) % (60*1000) % (1000) // 40)
  145. time_stamp = '%02d:%02d:%02d:%02d' % (hours, minutes, seconds, frame)
  146. return time_stamp
  147. def time_stamp_to_msec(t_stamp):
  148. '''
  149. Input:
  150. time stamp (str) in format HH:MM:SS:Frame
  151. Output:
  152. time point in milliseconds (int)
  153. '''
  154. splitted_stamp = t_stamp.split(':')
  155. milliseconds = (int(splitted_stamp[0]) * 60 * 60 * 1000) +\
  156. (int(splitted_stamp[1]) * 60 * 1000) +\
  157. (int(splitted_stamp[2]) * 1000) +\
  158. (int(splitted_stamp[3]) * 40)
  159. return milliseconds
  160. nocut_time = 10
  161. min_shot_length = 2 * nocut_time
  162. shot_length = row[1] - row[0]
  163. if shot_length > min_shot_length:
  164. # needs floor division
  165. # code should work with python 2 and python 3
  166. integer_quotient = int(shot_length / 10)
  167. event_distance = shot_length / float(integer_quotient)
  168. nocut_events = []
  169. for multiplier in range(1, integer_quotient):
  170. time_point = multiplier * event_distance
  171. nocut_event = row[0] + time_point
  172. # add Gaussian Jitter around the event of 1.25 sec)
  173. nocut_event = nocut_event + (random.gauss(0, 1250) / 1000)
  174. # align to next movie frame
  175. nocut_event = msec_to_time_stamp(nocut_event * 1000)
  176. nocut_event = time_stamp_to_msec(nocut_event) / 1000.0
  177. nocut_events.append(nocut_event)
  178. return(nocut_events)
  179. def count_events_conds(events_dict):
  180. '''
  181. '''
  182. all_segments_dict = defaultdict(int)
  183. # print events per condition per run
  184. for run in sorted(events_dict.keys()):
  185. print('\nrun %s:' % run)
  186. for loc in sorted(events_dict[run].keys()):
  187. count = len(events_dict[run][loc])
  188. if count > 0:
  189. print('%s\t%s' % (loc, count))
  190. else:
  191. print('%s\t%s\t###' % (loc, count))
  192. # add the event count of the current run to the dict for the
  193. # whole stimulus
  194. all_segments_dict[loc] += count
  195. print('\n\nwhole stimulus:')
  196. loc_descr_count = [[count, loc] for loc, count in all_segments_dict.items()]
  197. loc_descr_count.sort(key=lambda x: int(x[0]), reverse=True)
  198. for count, loc in loc_descr_count:
  199. print('%s\t%s' % (loc, count))
  200. return None
  201. def write_event_files(conds_dict, out_dir):
  202. '''
  203. '''
  204. print('\nWriting onset files')
  205. # for cond in sorted(t_per_cond.keys()):
  206. # print(cond, len(t_per_cond[cond]))
  207. for run in conds_dict.keys():
  208. for cond in conds_dict[run].keys():
  209. # print('writing onsets for', run, cond)
  210. out_fname = os.path.join(out_dir,
  211. 'run-%i' % run,
  212. cond + '.txt')
  213. path = os.path.dirname(out_fname)
  214. if not os.path.exists(path):
  215. os.makedirs(path)
  216. # write lines in FSL's EV3 format
  217. lines = ['%.3f\t%.1f\t1\n' % (timing, STIM_LENGTH) for timing in conds_dict[run][cond]]
  218. outfile = open(out_fname, 'w')
  219. outfile.writelines(lines)
  220. outfile.close()
  221. # main program #
  222. if __name__ == "__main__":
  223. inDir, inPat, outDir = parse_arguments()
  224. # build the name of the output directory from the input directory
  225. # handles if input has timing of audio-description or audio-visual movie
  226. outDir = os.path.join(outDir, os.path.basename(inDir))
  227. # search for files that contain the desired annotation
  228. anno_segments = get_anno_segments(inDir, inPat)
  229. # initialize the dicts for the tags drawn from their columns
  230. # for run (key) -> condition (key) -> timings (list)
  231. locations_conds = {seg:defaultdict(list) for seg in range(1,9)}
  232. prev_major_l = None
  233. prev_setting = None
  234. prev_locale = None
  235. learned_settings = []
  236. learned_locales = defaultdict(list)
  237. # looper over the segmented annotation
  238. for segment in anno_segments:
  239. run = int(get_run_number(segment))
  240. anno = read_anno_segment(segment)
  241. for row in anno:
  242. # check which spatial switches happen from
  243. # former to current row (i.e. frame)
  244. spatial_switches = compute_spatial_switches(row,
  245. prev_major_l,
  246. prev_setting,
  247. prev_locale)
  248. start_t, cond = apply_condition_rules(row,
  249. spatial_switches,
  250. learned_settings,
  251. learned_locales)
  252. locations_conds[run][cond].append(start_t)
  253. # prepare for next row
  254. prev_major_l = row[2]
  255. prev_setting = row[3]
  256. prev_locale = row[4]
  257. # create the nocuts from shots longer than specified intervall
  258. nocut_events = create_nocut(row)
  259. if nocut_events:
  260. locations_conds[run]['vno_cut'].extend(nocut_events)
  261. # COUNTING final conditions
  262. print('\n\nCONDITIONS count:')
  263. count_events_conds(locations_conds)
  264. # write the files
  265. write_event_files(locations_conds, outDir)