bci2000rawio.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. # -*- coding: utf-8 -*-
  2. """
  3. BCI2000RawIO is a class to read BCI2000 .dat files.
  4. https://www.bci2000.org/mediawiki/index.php/Technical_Reference:BCI2000_File_Format
  5. """
  6. from __future__ import print_function, division, absolute_import # unicode_literals
  7. from .baserawio import BaseRawIO, _signal_channel_dtype, _unit_channel_dtype, _event_channel_dtype
  8. import numpy as np
  9. import re
  10. try:
  11. from urllib.parse import unquote
  12. except ImportError:
  13. from urllib import url2pathname as unquote
  14. class BCI2000RawIO(BaseRawIO):
  15. """
  16. Class for reading data from a BCI2000 .dat file, either version 1.0 or 1.1
  17. """
  18. extensions = ['dat']
  19. rawmode = 'one-file'
  20. def __init__(self, filename=''):
  21. BaseRawIO.__init__(self)
  22. self.filename = filename
  23. self._my_events = None
  24. def _source_name(self):
  25. return self.filename
  26. def _parse_header(self):
  27. file_info, state_defs, param_defs = parse_bci2000_header(self.filename)
  28. self.header = {}
  29. self.header['nb_block'] = 1
  30. self.header['nb_segment'] = [1]
  31. sig_channels = []
  32. for chan_ix in range(file_info['SourceCh']):
  33. ch_name = param_defs['ChannelNames']['value'][chan_ix] \
  34. if 'ChannelNames' in param_defs else 'ch' + str(chan_ix)
  35. chan_id = chan_ix + 1
  36. sr = param_defs['SamplingRate']['value'] # Hz
  37. dtype = file_info['DataFormat']
  38. units = 'uV'
  39. gain = param_defs['SourceChGain']['value'][chan_ix]
  40. offset = param_defs['SourceChOffset']['value'][chan_ix]
  41. group_id = 0
  42. sig_channels.append((ch_name, chan_id, sr, dtype, units, gain, offset, group_id))
  43. self.header['signal_channels'] = np.array(sig_channels, dtype=_signal_channel_dtype)
  44. self.header['unit_channels'] = np.array([], dtype=_unit_channel_dtype)
  45. # creating event channel for each state variable
  46. event_channels = []
  47. for st_ix, st_tup in enumerate(state_defs):
  48. event_channels.append((st_tup[0], 'ev_' + str(st_ix), 'event'))
  49. self.header['event_channels'] = np.array(event_channels, dtype=_event_channel_dtype)
  50. # Add annotations.
  51. # Generates basic annotations in nested dict self.raw_annotations
  52. self._generate_minimal_annotations()
  53. self.raw_annotations['blocks'][0].update({
  54. 'file_info': file_info,
  55. 'param_defs': param_defs
  56. })
  57. for ev_ix, ev_dict in enumerate(self.raw_annotations['event_channels']):
  58. ev_dict.update({
  59. 'length': state_defs[ev_ix][1],
  60. 'startVal': state_defs[ev_ix][2],
  61. 'bytePos': state_defs[ev_ix][3],
  62. 'bitPos': state_defs[ev_ix][4]
  63. })
  64. import time
  65. time_formats = ['%a %b %d %H:%M:%S %Y', '%Y-%m-%dT%H:%M:%S']
  66. try:
  67. self._global_time = time.mktime(time.strptime(param_defs['StorageTime']['value'],
  68. time_formats[0]))
  69. except:
  70. self._global_time = time.mktime(time.strptime(param_defs['StorageTime']['value'],
  71. time_formats[1]))
  72. # Save variables to make it easier to load the binary data.
  73. self._read_info = {
  74. 'header_len': file_info['HeaderLen'],
  75. 'n_chans': file_info['SourceCh'],
  76. 'sample_dtype': {
  77. 'int16': np.int16,
  78. 'int32': np.int32,
  79. 'float32': np.float32}.get(file_info['DataFormat']),
  80. 'state_vec_len': file_info['StatevectorLen'],
  81. 'sampling_rate': param_defs['SamplingRate']['value']
  82. }
  83. # Calculate the dtype for a single timestamp of data. This contains the data + statevector
  84. self._read_info['line_dtype'] = [
  85. ('raw_vector', self._read_info['sample_dtype'], self._read_info['n_chans']),
  86. ('state_vector', np.uint8, self._read_info['state_vec_len'])]
  87. import os
  88. self._read_info['n_samps'] = int((os.stat(self.filename).st_size - file_info['HeaderLen'])
  89. / np.dtype(self._read_info['line_dtype']).itemsize)
  90. # memmap is fast so we can get the data ready for reading now.
  91. self._memmap = np.memmap(self.filename, dtype=self._read_info['line_dtype'],
  92. offset=self._read_info['header_len'], mode='r')
  93. def _segment_t_start(self, block_index, seg_index):
  94. return 0.
  95. def _segment_t_stop(self, block_index, seg_index):
  96. return self._read_info['n_samps'] / self._read_info['sampling_rate']
  97. def _get_signal_size(self, block_index, seg_index, channel_indexes=None):
  98. return self._read_info['n_samps']
  99. def _get_signal_t_start(self, block_index, seg_index, channel_indexes):
  100. return 0.
  101. def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, channel_indexes):
  102. if i_start is None:
  103. i_start = 0
  104. if i_stop is None:
  105. i_stop = self._read_info['n_samps']
  106. assert (0 <= i_start <= self._read_info['n_samps']), "i_start outside data range"
  107. assert (0 <= i_stop <= self._read_info['n_samps']), "i_stop outside data range"
  108. if channel_indexes is None:
  109. channel_indexes = np.arange(self.header['signal_channels'].size)
  110. return self._memmap['raw_vector'][i_start:i_stop, channel_indexes]
  111. def _spike_count(self, block_index, seg_index, unit_index):
  112. return 0
  113. def _get_spike_timestamps(self, block_index, seg_index, unit_index, t_start, t_stop):
  114. return None
  115. def _rescale_spike_timestamp(self, spike_timestamps, dtype):
  116. return None
  117. def _get_spike_raw_waveforms(self, block_index, seg_index, unit_index, t_start, t_stop):
  118. return None
  119. def _event_count(self, block_index, seg_index, event_channel_index):
  120. return self._event_arrays_list[event_channel_index][0].shape[0]
  121. def _get_event_timestamps(self, block_index, seg_index, event_channel_index, t_start, t_stop):
  122. # Return 3 numpy arrays: timestamp, durations, labels
  123. # durations must be None for 'event'
  124. # label must a dtype ='U'
  125. ts, dur, labels = self._event_arrays_list[event_channel_index]
  126. # seg_t_start = self._segment_t_start(block_index, seg_index)
  127. keep = np.ones(ts.shape, dtype=np.bool)
  128. if t_start is not None:
  129. keep = np.logical_and(keep, ts >= t_start)
  130. if t_stop is not None:
  131. keep = np.logical_and(keep, ts <= t_stop)
  132. return ts[keep], dur[keep], labels[keep]
  133. def _rescale_event_timestamp(self, event_timestamps, dtype):
  134. event_times = (event_timestamps / float(self._read_info['sampling_rate'])).astype(dtype)
  135. return event_times
  136. def _rescale_epoch_duration(self, raw_duration, dtype):
  137. durations = (raw_duration / float(self._read_info['sampling_rate'])).astype(dtype)
  138. return durations
  139. @property
  140. def _event_arrays_list(self):
  141. if self._my_events is None:
  142. self._my_events = []
  143. for s_ix, sd in enumerate(self.raw_annotations['event_channels']):
  144. ev_times = durs = vals = np.array([])
  145. # Skip these big but mostly useless (?) states.
  146. if sd['name'] not in ['SourceTime', 'StimulusTime']:
  147. # Determine which bytes of self._memmap['state_vector'] are needed.
  148. nbytes = int(np.ceil((sd['bitPos'] + sd['length']) / 8))
  149. byte_slice = slice(sd['bytePos'], sd['bytePos'] + nbytes)
  150. # Then determine how to mask those bytes to get only the needed bits.
  151. bit_mask = np.array([255] * nbytes, dtype=np.uint8)
  152. bit_mask[0] &= 255 & (255 << sd['bitPos']) # Fix the mask for the first byte
  153. extra_bits = 8 - (sd['bitPos'] + sd['length']) % 8
  154. bit_mask[-1] &= 255 & (255 >> extra_bits) # Fix the mask for the last byte
  155. # When converting to an int, we need to know which integer type it will become
  156. n_max_bytes = 1 << (nbytes - 1).bit_length()
  157. view_type = {1: np.int8, 2: np.int16,
  158. 4: np.int32, 8: np.int64}.get(n_max_bytes)
  159. # Slice and mask the data
  160. masked_byte_array = self._memmap['state_vector'][:, byte_slice] & bit_mask
  161. # Convert byte array to a vector of ints:
  162. # pad to give even columns then view as larger int type
  163. state_vec = np.pad(masked_byte_array,
  164. (0, n_max_bytes - nbytes),
  165. 'constant').view(dtype=view_type)
  166. state_vec = np.right_shift(state_vec, sd['bitPos'])[:, 0]
  167. # In the state vector, find 'events' whenever the state changes
  168. st_ch_ix = np.where(np.hstack((0, np.diff(state_vec))) != 0)[0] # event inds
  169. if len(st_ch_ix) > 0:
  170. ev_times = st_ch_ix
  171. durs = np.asarray([None] * len(st_ch_ix))
  172. # np.hstack((np.diff(st_ch_ix), len(state_vec) - st_ch_ix[-1]))
  173. vals = np.char.mod('%d', state_vec[st_ch_ix]) # event val, string'd
  174. self._my_events.append([ev_times, durs, vals])
  175. return self._my_events
  176. def parse_bci2000_header(filename):
  177. # typically we want parameter values in Hz, seconds, or microvolts.
  178. scales_dict = {
  179. 'hz': 1, 'khz': 1000, 'mhz': 1000000,
  180. 'uv': 1, 'muv': 1, 'mv': 1000, 'v': 1000000,
  181. 's': 1, 'us': 0.000001, 'mus': 0.000001, 'ms': 0.001, 'min': 60,
  182. 'sec': 1, 'usec': 0.000001, 'musec': 0.000001, 'msec': 0.001
  183. }
  184. def rescale_value(param_val, data_type):
  185. unit_str = ''
  186. if param_val.lower().startswith('0x'):
  187. param_val = int(param_val, 16)
  188. elif data_type in ['int', 'float']:
  189. matches = re.match(r'(-*\d+)(\w*)', param_val)
  190. if matches is not None: # Can be None for % in def, min, max vals
  191. param_val, unit_str = matches.group(1), matches.group(2)
  192. param_val = int(param_val) if data_type == 'int' else float(param_val)
  193. if len(unit_str) > 0:
  194. param_val *= scales_dict.get(unit_str.lower(), 1)
  195. else:
  196. param_val = unquote(param_val)
  197. return param_val, unit_str
  198. def parse_dimensions(param_list):
  199. num_els = param_list.pop(0)
  200. # Sometimes the number of elements isn't given,
  201. # but the list of element labels is wrapped with {}
  202. if num_els == '{':
  203. num_els = param_list.index('}')
  204. el_labels = [unquote(param_list.pop(0)) for x in range(num_els)]
  205. param_list.pop(0) # Remove the '}'
  206. else:
  207. num_els = int(num_els)
  208. el_labels = [str(ix) for ix in range(num_els)]
  209. return num_els, el_labels
  210. import io
  211. with io.open(filename, 'rb') as fid:
  212. # Parse the file header (plain text)
  213. # The first line contains basic information which we store in a dictionary.
  214. temp = fid.readline().decode('utf8').split()
  215. keys = [k.rstrip('=') for k in temp[::2]]
  216. vals = temp[1::2]
  217. # Insert default version and format
  218. file_info = {'BCI2000V': 1.0, 'DataFormat': 'int16'}
  219. file_info.update(**dict(zip(keys, vals)))
  220. # From string to float/int
  221. file_info['BCI2000V'] = float(file_info['BCI2000V'])
  222. for k in ['HeaderLen', 'SourceCh', 'StatevectorLen']:
  223. if k in file_info:
  224. file_info[k] = int(file_info[k])
  225. # The next lines contain state vector definitions.
  226. temp = fid.readline().decode('utf8').strip()
  227. assert temp == '[ State Vector Definition ]', \
  228. "State definitions not found in header %s" % filename
  229. state_defs = []
  230. state_def_dtype = [('name', 'a64'),
  231. ('length', int),
  232. ('startVal', int),
  233. ('bytePos', int),
  234. ('bitPos', int)]
  235. while True:
  236. temp = fid.readline().decode('utf8').strip()
  237. if len(temp) == 0 or temp[0] == '[':
  238. # Presence of '[' signifies new section.
  239. break
  240. temp = temp.split()
  241. state_defs.append((temp[0], int(temp[1]), int(temp[2]), int(temp[3]), int(temp[4])))
  242. state_defs = np.array(state_defs, dtype=state_def_dtype)
  243. # The next lines contain parameter definitions.
  244. # There are many, and their formatting can be complicated.
  245. assert temp == '[ Parameter Definition ]', \
  246. "Parameter definitions not found in header %s" % filename
  247. param_defs = {}
  248. while True:
  249. temp = fid.readline().decode('utf8')
  250. if fid.tell() >= file_info['HeaderLen']:
  251. # End of header.
  252. break
  253. if len(temp.strip()) == 0:
  254. continue # Skip empty lines
  255. # Everything after the '//' is a comment.
  256. temp = temp.strip().split('//', 1)
  257. param_def = {'comment': temp[1].strip() if len(temp) > 1 else ''}
  258. # Parse the parameter definition. Generally it is sec:cat:name dtype name param_value+
  259. temp = temp[0].split()
  260. param_def.update(
  261. {'section_category_name': [unquote(x) for x in temp.pop(0).split(':')]})
  262. dtype = temp.pop(0)
  263. param_name = unquote(temp.pop(0).rstrip('='))
  264. # Parse the rest. Parse method depends on the dtype
  265. param_value, units = None, None
  266. if dtype in ('int', 'float'):
  267. param_value = temp.pop(0)
  268. if param_value == 'auto':
  269. param_value = np.nan
  270. units = ''
  271. else:
  272. param_value, units = rescale_value(param_value, dtype)
  273. elif dtype in ('string', 'variant'):
  274. param_value = unquote(temp.pop(0))
  275. elif dtype.endswith('list'): # e.g., intlist, stringlist, floatlist, list
  276. dtype = dtype[:-4]
  277. # The list parameter values will begin with either
  278. # an int to specify the number of elements
  279. # or a list of labels surrounded by { }.
  280. num_elements, element_labels = parse_dimensions(temp) # This will pop off info.
  281. param_def.update({'element_labels': element_labels})
  282. pv_un = [rescale_value(pv, dtype) for pv in temp[:num_elements]]
  283. if len(pv_un) > 0:
  284. param_value, units = zip(*pv_un)
  285. else:
  286. param_value, units = np.nan, ''
  287. temp = temp[num_elements:]
  288. # Sometimes an element list will be a list of ints even though
  289. # the element_type is '' (str)...
  290. # This usually happens for known parameters, such as SourceChOffset,
  291. # that can be dealt with explicitly later.
  292. elif dtype.endswith('matrix'):
  293. dtype = dtype[:-6]
  294. # The parameter values will be preceded by two dimension descriptors,
  295. # first rows then columns. Each dimension might be described by an
  296. # int or a list of labels surrounded by {}
  297. n_rows, row_labels = parse_dimensions(temp)
  298. n_cols, col_labels = parse_dimensions(temp)
  299. param_def.update({'row_labels': row_labels, 'col_labels': col_labels})
  300. param_value = []
  301. units = []
  302. for row_ix in range(n_rows):
  303. cols = []
  304. for col_ix in range(n_cols):
  305. col_val, _units = rescale_value(temp[row_ix * n_cols + col_ix], dtype)
  306. cols.append(col_val)
  307. units.append(_units)
  308. param_value.append(cols)
  309. temp = temp[n_rows * n_cols:]
  310. param_def.update({
  311. 'value': param_value,
  312. 'units': units,
  313. 'dtype': dtype
  314. })
  315. # At the end of the parameter definition, we might get
  316. # default, min, max values for the parameter.
  317. temp.reverse()
  318. if len(temp):
  319. param_def.update({'max_val': rescale_value(temp.pop(0), dtype)})
  320. if len(temp):
  321. param_def.update({'min_val': rescale_value(temp.pop(0), dtype)})
  322. if len(temp):
  323. param_def.update({'default_val': rescale_value(temp.pop(0), dtype)})
  324. param_defs.update({param_name: param_def})
  325. # End parameter block
  326. # Outdent to close file
  327. return file_info, state_defs, param_defs