|
@@ -1,12 +1,12 @@
|
|
|
-#!/usr/bin/python
|
|
|
+#!/usr/bin/python3
|
|
|
"""
|
|
|
created on Wed Jan 30 2018
|
|
|
author: Christian Olaf Haeusler
|
|
|
|
|
|
To Do:
|
|
|
argparser
|
|
|
+ Erzaehler Filtern wennn MOVIE = True
|
|
|
"""
|
|
|
-from __future__ import print_function
|
|
|
from collections import defaultdict
|
|
|
import os
|
|
|
from os.path import basename
|
|
@@ -17,12 +17,6 @@ import sys
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
-# constants #
|
|
|
-MOVIE = True
|
|
|
-CROPPED = 0 # in sec; is a concatenated time series with cropped volumes used?
|
|
|
-INPUT_FILES = sys.argv[1:]
|
|
|
-OUT_DIR = 'segments'
|
|
|
-
|
|
|
SEGMENTS_OFFSETS = (
|
|
|
(0.00, 0.00),
|
|
|
(886.00, 0.00),
|
|
@@ -34,6 +28,39 @@ SEGMENTS_OFFSETS = (
|
|
|
(6410.44, 0.44), # last segment's start
|
|
|
(7086.00, 0.00)) # movie's last time point
|
|
|
|
|
|
+# dictionaries with paired touples containing time (2sec steps) and offset
|
|
|
+# in respect to the audiovisual movie (forrestgump_researchcut_ger_mono.mkv)
|
|
|
+AUDIO_AV_OFFSETS = {
|
|
|
+ 0: { 0: 21.33},
|
|
|
+ 1: { 0: 37.33,
|
|
|
+ 408: 21.33},
|
|
|
+ 2: { 0: 69.33,
|
|
|
+ 199: 61.33},
|
|
|
+ 3: { 0: 93.33,
|
|
|
+ 320: 101.33},
|
|
|
+ 4: { 0: 109.33,
|
|
|
+ 401: 101.33},
|
|
|
+ 5: { 0: 141.33},
|
|
|
+ 6: { 0: 189.31,
|
|
|
+ 61: 181.31},
|
|
|
+ 7: { 0: 205.33}}
|
|
|
+
|
|
|
+AUDIO_AO_OFFSETS = {
|
|
|
+ 0: { 0: 47.02},
|
|
|
+ 1: { 0: 36.35,
|
|
|
+ 203: 47.02},
|
|
|
+ 2: { 0: 87.02,
|
|
|
+ 199: 92.35},
|
|
|
+ 3: { 0: 124.35,
|
|
|
+ 320: 132.35},
|
|
|
+ 4: { 0: 105.69,
|
|
|
+ 401: 92.35},
|
|
|
+ 5: { 0: 137.69,
|
|
|
+ 364: 167.02},
|
|
|
+ 6: { 0: 201.67,
|
|
|
+ 61: 543.00},
|
|
|
+ 7: { 0:-1422.31}}
|
|
|
+
|
|
|
|
|
|
def time_stamp_to_msec(t_stamp='01:50:34:01'):
|
|
|
'''
|
|
@@ -84,60 +111,47 @@ def get_run_number(starts, onset):
|
|
|
return run
|
|
|
|
|
|
|
|
|
-def fix_segment_shift(timing_in_anno, cropped_time):
|
|
|
+def whole_anno_to_segments(seg_starts, run_nr, anno_time):
|
|
|
'''
|
|
|
- the function is not necessary anymore since the correction
|
|
|
- is implicitly done by additionally given offsets in SEGMENTS_OFFSETS
|
|
|
-
|
|
|
-
|
|
|
- fixes the timing of the 8 stimulus movie sigments
|
|
|
- https://github.com/psychoinformatics-de/studyforrest-data-phase2/blob/master/code/stimulus/movie/segment_timing.csv
|
|
|
+ "The position of an event from a movie annotation with respect to the
|
|
|
+ cropped fMRI time series can now be determined by substracting the
|
|
|
+ start time of the respective segment as listed in Table 1"
|
|
|
+ http://studyforrest.org/annotation_timing.html
|
|
|
'''
|
|
|
- # regular case which will be kept in runs 1 and 2
|
|
|
- timing_in_segment = timing_in_anno
|
|
|
-
|
|
|
- # correct for the accumulating offsets in segments 3 to 8
|
|
|
- for segment_start, offset in sorted(SEGMENTS_OFFSETS, reverse = True):
|
|
|
- # if timing is in a critical segment, correct the timing
|
|
|
- if timing_in_anno >= segment_start + cropped_time:
|
|
|
- timing_in_segment = round(timing_in_anno - offset, 3)
|
|
|
- break
|
|
|
+ seg_time = anno_time - seg_starts[run_nr]
|
|
|
|
|
|
- return timing_in_segment
|
|
|
+ return seg_time
|
|
|
|
|
|
|
|
|
-def fix_audio_timing(uncorrected_audio):
|
|
|
- '''the movie's audiotrack lacks behind the visual frames
|
|
|
- there is an slightly increasing offset (but problably no continuous drift)
|
|
|
- over the movie segments
|
|
|
+def fix_audio_movie_segments(AUDIO_AV_OFFSETS, run, uncorrected):
|
|
|
+ '''corrects the segments' audio offsets
|
|
|
+ in respect to the unsegmented movie
|
|
|
'''
|
|
|
- corrected_audio = uncorrected_audio
|
|
|
- return corrected_audio
|
|
|
+ critical_time_points = sorted(AUDIO_AV_OFFSETS[run].keys(), reverse=True)
|
|
|
+ for crit in critical_time_points:
|
|
|
+ if uncorrected >= crit * 2.0:
|
|
|
+ corrected = uncorrected + (AUDIO_AV_OFFSETS[run][crit] / 1000.0)
|
|
|
+ break
|
|
|
|
|
|
+ return corrected
|
|
|
|
|
|
-def anno_time_to_seg_time(seg_starts, run_nr, anno_time, cropped_time):
|
|
|
- '''
|
|
|
- "The position of an event from a movie annotation with respect to the
|
|
|
- cropped fMRI time series can now be determined by substracting the
|
|
|
- start time of the respective segment as listed in Table 1"
|
|
|
- http://studyforrest.org/annotation_timing.html
|
|
|
|
|
|
- events occur earlier in the cropped stimulus segments.
|
|
|
- hence the cropped ammount is additionally substracted from the anno timing
|
|
|
+def fix_audio_descr_segments(AUDIO_AO_OFFSETS, run, uncorrected):
|
|
|
+ '''corrects the segments' audio offsets
|
|
|
+ in respect to the unsegmented audiobook
|
|
|
'''
|
|
|
- seg_time = round(anno_time - (seg_starts[run_nr] + cropped_time), 2)
|
|
|
+ critical_time_points = sorted(AUDIO_AO_OFFSETS[run].keys(), reverse=True)
|
|
|
+ for crit in critical_time_points:
|
|
|
+ if uncorrected >= crit * 2.0:
|
|
|
+ corrected = uncorrected + (AUDIO_AO_OFFSETS[run][crit] / 1000.0)
|
|
|
+ break
|
|
|
|
|
|
- return seg_time
|
|
|
+ return corrected
|
|
|
|
|
|
|
|
|
-def write_segmented_annos(infilename, movie, cropped, run_dict, out_dir, ):
|
|
|
+def write_segmented_annos(infilename, stimulus, run_dict, out_dir):
|
|
|
'''
|
|
|
'''
|
|
|
- if MOVIE is True:
|
|
|
- stimulus = 'avmovie'
|
|
|
- else:
|
|
|
- stimulus = 'aomovie'
|
|
|
-
|
|
|
basefilename = basename(infilename)[:-4]
|
|
|
outdir = opj(out_dir, stimulus)
|
|
|
if not exists(outdir):
|
|
@@ -153,49 +167,82 @@ def write_segmented_annos(infilename, movie, cropped, run_dict, out_dir, ):
|
|
|
columns=run_dict[run][0].dtype.names).to_csv(
|
|
|
outname,
|
|
|
sep='\t',
|
|
|
- index=False)
|
|
|
+ index=False,
|
|
|
+ encoding='utf-8')
|
|
|
|
|
|
|
|
|
#### main program #####
|
|
|
if __name__ == "__main__":
|
|
|
+ # constants #
|
|
|
+ infile = sys.argv[1]
|
|
|
+ annotated_time = sys.argv[2]
|
|
|
+ target_time = sys.argv[3]
|
|
|
+ outdir = sys.argv[4]
|
|
|
|
|
|
+# with launch_ipdb_on_exception():
|
|
|
# read the annotation file
|
|
|
- for input_file in INPUT_FILES[:1]:
|
|
|
- anno = pd.read_csv(input_file, sep='\t').to_records(index=False)
|
|
|
- segment_starts = [start for start, offset in SEGMENTS_OFFSETS]
|
|
|
-
|
|
|
- run_events = defaultdict(list)
|
|
|
- for row in anno:
|
|
|
- # get the run number
|
|
|
- run = get_run_number(segment_starts, row['onset'])
|
|
|
-
|
|
|
- # SEGMENT SHIFT correction
|
|
|
- # is now implicitly done by func 'anno_time_to_seg_time'
|
|
|
- # using the adjusted segment starts (s. SEGMENTS_OFFSETS)
|
|
|
-# row[0] = fix_segment_shift(row[0], CROPPED)
|
|
|
-# if type(row[1]) == float:
|
|
|
-# row[1] = fix_segment_shift(row[1], CROPPED)
|
|
|
-
|
|
|
- # finally convert the timings of the continouos annotation
|
|
|
- # to timings in respect to the start of the corresponding segment
|
|
|
- onset = anno_time_to_seg_time(
|
|
|
- segment_starts,
|
|
|
- run,
|
|
|
- float(row['onset']),
|
|
|
- CROPPED)
|
|
|
- row['onset'] = onset
|
|
|
-
|
|
|
- # AUDIO TIMING (MOVIE) correction
|
|
|
- # Dialoge im Film kommen 1/2 frame spater als das Hoerspiel,
|
|
|
- # das einem frame (40ms) nach vorn gezogen wurde
|
|
|
- if MOVIE is True:
|
|
|
- pass
|
|
|
-
|
|
|
- # AUDIO TIMING (AUDIOBOOK) correction
|
|
|
- if MOVIE is False:
|
|
|
- pass
|
|
|
-
|
|
|
- # append that shit
|
|
|
- run_events[run].append(row)
|
|
|
-
|
|
|
- write_segmented_annos(input_file, MOVIE, CROPPED, run_events, OUT_DIR)
|
|
|
+ anno = pd.read_csv(infile, sep='\t', encoding='utf-8').to_records(index=False)
|
|
|
+ segment_starts = [start for start, offset in SEGMENTS_OFFSETS]
|
|
|
+
|
|
|
+ run_events = defaultdict(list)
|
|
|
+ for row in anno:
|
|
|
+ # get the run number
|
|
|
+ run = get_run_number(segment_starts, row['onset'])
|
|
|
+
|
|
|
+ # convert the timings of a continuous annotation
|
|
|
+ # to timings in respect to the start of the corresponding segment
|
|
|
+ onset_in_seg = whole_anno_to_segments(
|
|
|
+ segment_starts,
|
|
|
+ run,
|
|
|
+ float(row['onset']))
|
|
|
+
|
|
|
+
|
|
|
+ # correct for the stimulus used to annotate the audiotrack
|
|
|
+ if annotated_time == 'aomovie':
|
|
|
+ # the files
|
|
|
+ # forrestgump_researchcut_ad_ger.flac and
|
|
|
+ # german_dvd_5.1_48000hz_488kb_research_cut_aligned_cutted_narrator_muted_48000Hz.flac
|
|
|
+ # (that contain the audio description) were originally lagging
|
|
|
+ # behind for XYZ msec and were shiftet forward
|
|
|
+ # by one frame (40ms) in respect to the reference file
|
|
|
+ # forrestgump_researchcut_ger.mkv
|
|
|
+
|
|
|
+ # 1st, correct for shifting the narrator (incl. dialogue) 40ms
|
|
|
+ # to the front before annotating the narrator/dialogue
|
|
|
+ onset_in_seg += 0.040
|
|
|
+
|
|
|
+ # 2nd, correct for the offset between the (unshifted) audio
|
|
|
+ # description and the audiovisual movie
|
|
|
+ # -> the offset is varying +/- one frame (40 ms) around 0
|
|
|
+ onset_in_seg -= 0.000
|
|
|
+
|
|
|
+ # 3rd, correct for the offset between whole stimulus
|
|
|
+ # (audiovisual or audio-only) and its segments
|
|
|
+ if target_time == 'avmovie':
|
|
|
+ onset_in_seg = fix_audio_movie_segments(
|
|
|
+ AUDIO_AV_OFFSETS,
|
|
|
+ run,
|
|
|
+ onset_in_seg)
|
|
|
+
|
|
|
+ elif target_time == 'aomovie':
|
|
|
+ onset_in_seg = fix_audio_descr_segments(
|
|
|
+ AUDIO_AO_OFFSETS,
|
|
|
+ run,
|
|
|
+ onset_in_seg)
|
|
|
+
|
|
|
+ else:
|
|
|
+ raise ValueError('Unknown time label %s', target_time)
|
|
|
+
|
|
|
+ elif annotated_time == 'avmovie':
|
|
|
+ # all splendid for now
|
|
|
+ pass
|
|
|
+
|
|
|
+ else:
|
|
|
+ raise ValueError('%s is an unknown annotation', basename(input_file))
|
|
|
+
|
|
|
+ row['onset'] = round(onset_in_seg, 3)
|
|
|
+
|
|
|
+ # append that shit
|
|
|
+ run_events[run].append(row)
|
|
|
+
|
|
|
+ write_segmented_annos(infile, target_time, run_events, outdir)
|