neuralynxrawio.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. """
  2. Class for reading data from Neuralynx files.
  3. This IO supports NCS, NEV, NSE and NTT file formats.
  4. NCS contains sampled signal for one channel
  5. NEV contains events
  6. NSE contains spikes and waveforms for mono electrodes
  7. NTT contains spikes and waveforms for tetrodes
  8. NCS can contains gaps that can be detected in irregularity
  9. in timestamps of data blocks. Each gap leads to one new segment.
  10. Some NCS files may need to be read entirely to detect those gaps which can be slow.
  11. Author: Julia Sprenger, Carlos Canova, Samuel Garcia
  12. """
  13. # from __future__ import unicode_literals is not compatible with numpy.dtype both py2 py3
  14. from .baserawio import (BaseRawIO, _signal_channel_dtype,
  15. _unit_channel_dtype, _event_channel_dtype)
  16. import numpy as np
  17. import os
  18. import re
  19. import distutils.version
  20. import datetime
  21. from collections import OrderedDict
  22. BLOCK_SIZE = 512 # nb sample per signal block
  23. class NeuralynxRawIO(BaseRawIO):
  24. """"
  25. Class for reading datasets recorded by Neuralynx.
  26. This version only works with rawmode of one-dir for a single directory.
  27. Examples:
  28. >>> reader = NeuralynxRawIO(dirname='Cheetah_v5.5.1/original_data')
  29. >>> reader.parse_header()
  30. Inspect all files in the directory.
  31. >>> print(reader)
  32. Display all information about signal channels, units, segment size....
  33. """
  34. extensions = ['nse', 'ncs', 'nev', 'ntt']
  35. rawmode = 'one-dir'
  36. def __init__(self, dirname='', keep_original_times=False, **kargs):
  37. """
  38. Parameters
  39. ----------
  40. dirname: str
  41. name of directory containing all files for dataset
  42. keep_original_times:
  43. if True, keep original start time as in files,
  44. otherwise set 0 of time to first time in dataset
  45. """
  46. self.dirname = dirname
  47. self.keep_original_times = keep_original_times
  48. BaseRawIO.__init__(self, **kargs)
  49. def _source_name(self):
  50. return self.dirname
  51. def _parse_header(self):
  52. sig_channels = []
  53. unit_channels = []
  54. event_channels = []
  55. self.ncs_filenames = OrderedDict() # (chan_name, chan_id): filename
  56. self.nse_ntt_filenames = OrderedDict() # (chan_name, chan_id): filename
  57. self.nev_filenames = OrderedDict() # chan_id: filename
  58. self._nev_memmap = {}
  59. self._spike_memmap = {}
  60. self.internal_unit_ids = [] # channel_index > ((channel_name, channel_id), unit_id)
  61. self.internal_event_ids = []
  62. self._empty_ncs = [] # this list contains filenames of empty files
  63. self._empty_nev = []
  64. self._empty_nse_ntt = []
  65. # Explore the directory looking for ncs, nev, nse and ntt
  66. # and construct channels headers.
  67. signal_annotations = []
  68. unit_annotations = []
  69. event_annotations = []
  70. for filename in sorted(os.listdir(self.dirname)):
  71. filename = os.path.join(self.dirname, filename)
  72. _, ext = os.path.splitext(filename)
  73. ext = ext[1:] # remove dot
  74. ext = ext.lower() # make lower case for comparisons
  75. if ext not in self.extensions:
  76. continue
  77. # Skip Ncs files with only header. Other empty file types
  78. # will have an empty dataset constructed later.
  79. if (os.path.getsize(filename) <= NlxHeader.HEADER_SIZE) and ext in ['ncs']:
  80. self._empty_ncs.append(filename)
  81. continue
  82. # All file have more or less the same header structure
  83. info = NlxHeader.build_for_file(filename)
  84. chan_names = info['channel_names']
  85. chan_ids = info['channel_ids']
  86. for idx, chan_id in enumerate(chan_ids):
  87. chan_name = chan_names[idx]
  88. chan_uid = (chan_name, chan_id)
  89. if ext == 'ncs':
  90. # a sampled signal channel
  91. units = 'uV'
  92. gain = info['bit_to_microVolt'][idx]
  93. if info.get('input_inverted', False):
  94. gain *= -1
  95. offset = 0.
  96. group_id = 0
  97. sig_channels.append((chan_name, chan_id, info['sampling_rate'],
  98. 'int16', units, gain, offset, group_id))
  99. self.ncs_filenames[chan_uid] = filename
  100. keys = [
  101. 'DspFilterDelay_µs',
  102. 'recording_opened',
  103. 'FileType',
  104. 'DspDelayCompensation',
  105. 'recording_closed',
  106. 'DspLowCutFilterType',
  107. 'HardwareSubSystemName',
  108. 'DspLowCutNumTaps',
  109. 'DSPLowCutFilterEnabled',
  110. 'HardwareSubSystemType',
  111. 'DspHighCutNumTaps',
  112. 'ADMaxValue',
  113. 'DspLowCutFrequency',
  114. 'DSPHighCutFilterEnabled',
  115. 'RecordSize',
  116. 'InputRange',
  117. 'DspHighCutFrequency',
  118. 'input_inverted',
  119. 'NumADChannels',
  120. 'DspHighCutFilterType',
  121. ]
  122. d = {k: info[k] for k in keys if k in info}
  123. signal_annotations.append(d)
  124. elif ext in ('nse', 'ntt'):
  125. # nse and ntt are pretty similar except for the waveform shape.
  126. # A file can contain several unit_id (so several unit channel).
  127. assert chan_id not in self.nse_ntt_filenames, \
  128. 'Several nse or ntt files have the same unit_id!!!'
  129. self.nse_ntt_filenames[chan_uid] = filename
  130. dtype = get_nse_or_ntt_dtype(info, ext)
  131. if (os.path.getsize(filename) <= NlxHeader.HEADER_SIZE):
  132. self._empty_nse_ntt.append(filename)
  133. data = np.zeros((0,), dtype=dtype)
  134. else:
  135. data = np.memmap(filename, dtype=dtype, mode='r',
  136. offset=NlxHeader.HEADER_SIZE)
  137. self._spike_memmap[chan_uid] = data
  138. unit_ids = np.unique(data['unit_id'])
  139. for unit_id in unit_ids:
  140. # a spike channel for each (chan_id, unit_id)
  141. self.internal_unit_ids.append((chan_uid, unit_id))
  142. unit_name = "ch{}#{}#{}".format(chan_name, chan_id, unit_id)
  143. unit_id = '{}'.format(unit_id)
  144. wf_units = 'uV'
  145. wf_gain = info['bit_to_microVolt'][idx]
  146. if info.get('input_inverted', False):
  147. wf_gain *= -1
  148. wf_offset = 0.
  149. wf_left_sweep = -1 # NOT KNOWN
  150. wf_sampling_rate = info['sampling_rate']
  151. unit_channels.append(
  152. (unit_name, '{}'.format(unit_id), wf_units, wf_gain,
  153. wf_offset, wf_left_sweep, wf_sampling_rate))
  154. unit_annotations.append(dict(file_origin=filename))
  155. elif ext == 'nev':
  156. # an event channel
  157. # each ('event_id', 'ttl_input') give a new event channel
  158. self.nev_filenames[chan_id] = filename
  159. if (os.path.getsize(filename) <= NlxHeader.HEADER_SIZE):
  160. self._empty_nev.append(filename)
  161. data = np.zeros((0,), dtype=nev_dtype)
  162. internal_ids = []
  163. else:
  164. data = np.memmap(filename, dtype=nev_dtype, mode='r',
  165. offset=NlxHeader.HEADER_SIZE)
  166. internal_ids = np.unique(data[['event_id', 'ttl_input']]).tolist()
  167. for internal_event_id in internal_ids:
  168. if internal_event_id not in self.internal_event_ids:
  169. event_id, ttl_input = internal_event_id
  170. name = '{} event_id={} ttl={}'.format(
  171. chan_name, event_id, ttl_input)
  172. event_channels.append((name, chan_id, 'event'))
  173. self.internal_event_ids.append(internal_event_id)
  174. self._nev_memmap[chan_id] = data
  175. sig_channels = np.array(sig_channels, dtype=_signal_channel_dtype)
  176. unit_channels = np.array(unit_channels, dtype=_unit_channel_dtype)
  177. event_channels = np.array(event_channels, dtype=_event_channel_dtype)
  178. # require all sampled signals, ncs files, to have same sampling rate
  179. if sig_channels.size > 0:
  180. sampling_rate = np.unique(sig_channels['sampling_rate'])
  181. assert sampling_rate.size == 1
  182. self._sigs_sampling_rate = sampling_rate[0]
  183. # set 2 attributes needed later for header in case there are no ncs files in dataset,
  184. # e.g. Pegasus
  185. self._timestamp_limits = None
  186. self._nb_segment = 1
  187. # Read ncs files for gap detection and nb_segment computation.
  188. # :TODO: current algorithm depends on side-effect of read_ncs_files on
  189. # self._sigs_memmap, self._sigs_t_start, self._sigs_t_stop,
  190. # self._sigs_length, self._nb_segment, self._timestamp_limits
  191. ncsBlocks = self.read_ncs_files(self.ncs_filenames)
  192. # Determine timestamp limits in nev, nse file by scanning them.
  193. ts0, ts1 = None, None
  194. for _data_memmap in (self._spike_memmap, self._nev_memmap):
  195. for _, data in _data_memmap.items():
  196. ts = data['timestamp']
  197. if ts.size == 0:
  198. continue
  199. if ts0 is None:
  200. ts0 = ts[0]
  201. ts1 = ts[-1]
  202. ts0 = min(ts0, ts[0])
  203. ts1 = max(ts1, ts[-1])
  204. # decide on segment and global start and stop times based on files available
  205. if self._timestamp_limits is None:
  206. # case NO ncs but HAVE nev or nse
  207. self._timestamp_limits = [(ts0, ts1)]
  208. self._seg_t_starts = [ts0 / 1e6]
  209. self._seg_t_stops = [ts1 / 1e6]
  210. self.global_t_start = ts0 / 1e6
  211. self.global_t_stop = ts1 / 1e6
  212. elif ts0 is not None:
  213. # case HAVE ncs AND HAVE nev or nse
  214. self.global_t_start = min(ts0 / 1e6, self._sigs_t_start[0])
  215. self.global_t_stop = max(ts1 / 1e6, self._sigs_t_stop[-1])
  216. self._seg_t_starts = list(self._sigs_t_start)
  217. self._seg_t_starts[0] = self.global_t_start
  218. self._seg_t_stops = list(self._sigs_t_stop)
  219. self._seg_t_stops[-1] = self.global_t_stop
  220. else:
  221. # case HAVE ncs but NO nev or nse
  222. self._seg_t_starts = self._sigs_t_start
  223. self._seg_t_stops = self._sigs_t_stop
  224. self.global_t_start = self._sigs_t_start[0]
  225. self.global_t_stop = self._sigs_t_stop[-1]
  226. if self.keep_original_times:
  227. self.global_t_stop = self.global_t_stop - self.global_t_start
  228. self.global_t_start = 0
  229. # fill header dictionary
  230. self.header = {}
  231. self.header['nb_block'] = 1
  232. self.header['nb_segment'] = [self._nb_segment]
  233. self.header['signal_channels'] = sig_channels
  234. self.header['unit_channels'] = unit_channels
  235. self.header['event_channels'] = event_channels
  236. # Annotations
  237. self._generate_minimal_annotations()
  238. bl_annotations = self.raw_annotations['blocks'][0]
  239. for seg_index in range(self._nb_segment):
  240. seg_annotations = bl_annotations['segments'][seg_index]
  241. for c in range(sig_channels.size):
  242. sig_ann = seg_annotations['signals'][c]
  243. sig_ann.update(signal_annotations[c])
  244. for c in range(unit_channels.size):
  245. unit_ann = seg_annotations['units'][c]
  246. unit_ann.update(unit_annotations[c])
  247. for c in range(event_channels.size):
  248. # annotations for channel events
  249. event_id, ttl_input = self.internal_event_ids[c]
  250. chan_id = event_channels[c]['id']
  251. ev_ann = seg_annotations['events'][c]
  252. ev_ann['file_origin'] = self.nev_filenames[chan_id]
  253. # ~ ev_ann['marker_id'] =
  254. # ~ ev_ann['nttl'] =
  255. # ~ ev_ann['digital_marker'] =
  256. # ~ ev_ann['analog_marker'] =
  257. # Accessors for segment times which are offset by appropriate global start time
  258. def _segment_t_start(self, block_index, seg_index):
  259. return self._seg_t_starts[seg_index] - self.global_t_start
  260. def _segment_t_stop(self, block_index, seg_index):
  261. return self._seg_t_stops[seg_index] - self.global_t_start
  262. def _get_signal_size(self, block_index, seg_index, channel_indexes):
  263. return self._sigs_length[seg_index]
  264. def _get_signal_t_start(self, block_index, seg_index, channel_indexes):
  265. return self._sigs_t_start[seg_index] - self.global_t_start
  266. def _get_analogsignal_chunk(self, block_index, seg_index, i_start, i_stop, channel_indexes):
  267. """
  268. Retrieve chunk of analog signal, a chunk being a set of contiguous samples.
  269. PARAMETERS
  270. ----------
  271. block_index:
  272. index of block in dataset, ignored as only 1 block in this implementation
  273. seg_index:
  274. index of segment to use
  275. i_start:
  276. sample index of first sample within segment to retrieve
  277. i_stop:
  278. sample index of last sample within segment to retrieve
  279. channel_indexes:
  280. list of channel indices to return data for
  281. RETURNS
  282. -------
  283. array of samples, with each requested channel in a column
  284. """
  285. if i_start is None:
  286. i_start = 0
  287. if i_stop is None:
  288. i_stop = self._sigs_length[seg_index]
  289. block_start = i_start // BLOCK_SIZE
  290. block_stop = i_stop // BLOCK_SIZE + 1
  291. sl0 = i_start % 512
  292. sl1 = sl0 + (i_stop - i_start)
  293. if channel_indexes is None:
  294. channel_indexes = slice(None)
  295. channel_ids = self.header['signal_channels'][channel_indexes]['id']
  296. channel_names = self.header['signal_channels'][channel_indexes]['name']
  297. # create buffer for samples
  298. sigs_chunk = np.zeros((i_stop - i_start, len(channel_ids)), dtype='int16')
  299. for i, chan_uid in enumerate(zip(channel_names, channel_ids)):
  300. data = self._sigs_memmap[seg_index][chan_uid]
  301. sub = data[block_start:block_stop]
  302. sigs_chunk[:, i] = sub['samples'].flatten()[sl0:sl1]
  303. return sigs_chunk
  304. def _spike_count(self, block_index, seg_index, unit_index):
  305. chan_uid, unit_id = self.internal_unit_ids[unit_index]
  306. data = self._spike_memmap[chan_uid]
  307. ts = data['timestamp']
  308. ts0, ts1 = self._timestamp_limits[seg_index]
  309. # only count spikes inside the timestamp limits, inclusive, and for the specified unit
  310. keep = (ts >= ts0) & (ts <= ts1) & (unit_id == data['unit_id'])
  311. nb_spike = int(data[keep].size)
  312. return nb_spike
  313. def _get_spike_timestamps(self, block_index, seg_index, unit_index, t_start, t_stop):
  314. chan_uid, unit_id = self.internal_unit_ids[unit_index]
  315. data = self._spike_memmap[chan_uid]
  316. ts = data['timestamp']
  317. ts0, ts1 = self._timestamp_limits[seg_index]
  318. if t_start is not None:
  319. ts0 = int((t_start + self.global_t_start) * 1e6)
  320. if t_start is not None:
  321. ts1 = int((t_stop + self.global_t_start) * 1e6)
  322. keep = (ts >= ts0) & (ts <= ts1) & (unit_id == data['unit_id'])
  323. timestamps = ts[keep]
  324. return timestamps
  325. def _rescale_spike_timestamp(self, spike_timestamps, dtype):
  326. spike_times = spike_timestamps.astype(dtype)
  327. spike_times /= 1e6
  328. spike_times -= self.global_t_start
  329. return spike_times
  330. def _get_spike_raw_waveforms(self, block_index, seg_index, unit_index,
  331. t_start, t_stop):
  332. chan_uid, unit_id = self.internal_unit_ids[unit_index]
  333. data = self._spike_memmap[chan_uid]
  334. ts = data['timestamp']
  335. ts0, ts1 = self._timestamp_limits[seg_index]
  336. if t_start is not None:
  337. ts0 = int((t_start + self.global_t_start) * 1e6)
  338. if t_start is not None:
  339. ts1 = int((t_stop + self.global_t_start) * 1e6)
  340. keep = (ts >= ts0) & (ts <= ts1) & (unit_id == data['unit_id'])
  341. wfs = data[keep]['samples']
  342. if wfs.ndim == 2:
  343. # case for nse
  344. waveforms = wfs[:, None, :]
  345. else:
  346. # case for ntt change (n, 32, 4) to (n, 4, 32)
  347. waveforms = wfs.swapaxes(1, 2)
  348. return waveforms
  349. def _event_count(self, block_index, seg_index, event_channel_index):
  350. event_id, ttl_input = self.internal_event_ids[event_channel_index]
  351. chan_id = self.header['event_channels'][event_channel_index]['id']
  352. data = self._nev_memmap[chan_id]
  353. ts0, ts1 = self._timestamp_limits[seg_index]
  354. ts = data['timestamp']
  355. keep = (ts >= ts0) & (ts <= ts1) & (data['event_id'] == event_id) & \
  356. (data['ttl_input'] == ttl_input)
  357. nb_event = int(data[keep].size)
  358. return nb_event
  359. def _get_event_timestamps(self, block_index, seg_index, event_channel_index, t_start, t_stop):
  360. event_id, ttl_input = self.internal_event_ids[event_channel_index]
  361. chan_id = self.header['event_channels'][event_channel_index]['id']
  362. data = self._nev_memmap[chan_id]
  363. ts0, ts1 = self._timestamp_limits[seg_index]
  364. if t_start is not None:
  365. ts0 = int((t_start + self.global_t_start) * 1e6)
  366. if t_start is not None:
  367. ts1 = int((t_stop + self.global_t_start) * 1e6)
  368. ts = data['timestamp']
  369. keep = (ts >= ts0) & (ts <= ts1) & (data['event_id'] == event_id) & \
  370. (data['ttl_input'] == ttl_input)
  371. subdata = data[keep]
  372. timestamps = subdata['timestamp']
  373. labels = subdata['event_string'].astype('U')
  374. durations = None
  375. return timestamps, durations, labels
  376. def _rescale_event_timestamp(self, event_timestamps, dtype):
  377. event_times = event_timestamps.astype(dtype)
  378. event_times /= 1e6
  379. event_times -= self.global_t_start
  380. return event_times
  381. def read_ncs_files(self, ncs_filenames):
  382. """
  383. Given a list of ncs files, return a dictionary of NcsBlocks indexed by channel uid.
  384. :TODO: Current algorithm has side effects on following attributes:
  385. * self._sigs_memmap = [ {} for seg_index in range(self._nb_segment) ]
  386. * self._sigs_t_start = []
  387. * self._sigs_t_stop = []
  388. * self._sigs_length = []
  389. * self._nb_segment
  390. * self._timestamp_limits
  391. The first file is read entirely to detect gaps in timestamp.
  392. each gap lead to a new segment.
  393. Other files are not read entirely but we check than gaps
  394. are at the same place.
  395. gap_indexes can be given (when cached) to avoid full read.
  396. """
  397. # :TODO: Needs to account for gaps and start and end times potentially
  398. # being different in different groups of channels. These groups typically
  399. # correspond to the channels collected by a single ADC card.
  400. if len(ncs_filenames) == 0:
  401. return None
  402. good_delta = int(BLOCK_SIZE * 1e6 / self._sigs_sampling_rate)
  403. chan_uid0 = list(ncs_filenames.keys())[0]
  404. filename0 = ncs_filenames[chan_uid0]
  405. data0 = np.memmap(filename0, dtype=ncs_dtype, mode='r', offset=NlxHeader.HEADER_SIZE)
  406. gap_indexes = None
  407. lost_indexes = None
  408. if self.use_cache:
  409. gap_indexes = self._cache.get('gap_indexes')
  410. lost_indexes = self._cache.get('lost_indexes')
  411. # detect gaps on first file
  412. if (gap_indexes is None) or (lost_indexes is None):
  413. # this can be long!!!!
  414. timestamps0 = data0['timestamp']
  415. deltas0 = np.diff(timestamps0)
  416. # :TODO: This algorithm needs to account for older style files which had a rounded
  417. # off sampling rate in the header.
  418. #
  419. # It should be that:
  420. # gap_indexes, = np.nonzero(deltas0!=good_delta)
  421. # but for a file I have found many deltas0==15999, 16000, 16001 (for sampling at 32000)
  422. # I guess this is a round problem
  423. # So this is the same with a tolerance of 1 or 2 ticks
  424. max_tolerance = 2
  425. mask = np.abs((deltas0 - good_delta).astype('int64')) > max_tolerance
  426. gap_indexes, = np.nonzero(mask)
  427. if self.use_cache:
  428. self.add_in_cache(gap_indexes=gap_indexes)
  429. # update for lost_indexes
  430. # Sometimes NLX writes a faulty block, but it then validates how much samples it wrote
  431. # the validation field is in delta0['nb_valid'], it should be equal to BLOCK_SIZE
  432. # :TODO: this algorithm ignores samples in partially filled blocks, which
  433. # is not strictly necessary as all channels might have same partially filled
  434. # blocks at the end.
  435. lost_indexes, = np.nonzero(data0['nb_valid'] < BLOCK_SIZE)
  436. if self.use_cache:
  437. self.add_in_cache(lost_indexes=lost_indexes)
  438. gap_candidates = np.unique([0]
  439. + [data0.size]
  440. + (gap_indexes + 1).tolist()
  441. + lost_indexes.tolist()) # linear
  442. gap_pairs = np.vstack([gap_candidates[:-1], gap_candidates[1:]]).T # 2D (n_segments, 2)
  443. # construct proper gap ranges free of lost samples artifacts
  444. minimal_segment_length = 1 # in blocks
  445. goodpairs = np.diff(gap_pairs, 1).reshape(-1) > minimal_segment_length
  446. gap_pairs = gap_pairs[goodpairs] # ensures a segment is at least a block wide
  447. self._nb_segment = len(gap_pairs)
  448. self._sigs_memmap = [{} for seg_index in range(self._nb_segment)]
  449. self._sigs_t_start = []
  450. self._sigs_t_stop = []
  451. self._sigs_length = []
  452. self._timestamp_limits = []
  453. # create segment with subdata block/t_start/t_stop/length
  454. for chan_uid, ncs_filename in self.ncs_filenames.items():
  455. data = np.memmap(ncs_filename, dtype=ncs_dtype, mode='r', offset=NlxHeader.HEADER_SIZE)
  456. assert data.size == data0.size, 'ncs files do not have the same data length'
  457. for seg_index, (i0, i1) in enumerate(gap_pairs):
  458. assert data[i0]['timestamp'] == data0[i0][
  459. 'timestamp'], 'ncs files do not have the same gaps'
  460. assert data[i1 - 1]['timestamp'] == data0[i1 - 1][
  461. 'timestamp'], 'ncs files do not have the same gaps'
  462. subdata = data[i0:i1]
  463. self._sigs_memmap[seg_index][chan_uid] = subdata
  464. if chan_uid == chan_uid0:
  465. ts0 = subdata[0]['timestamp']
  466. ts1 = subdata[-1]['timestamp'] +\
  467. np.uint64(BLOCK_SIZE / self._sigs_sampling_rate * 1e6)
  468. self._timestamp_limits.append((ts0, ts1))
  469. t_start = ts0 / 1e6
  470. self._sigs_t_start.append(t_start)
  471. t_stop = ts1 / 1e6
  472. self._sigs_t_stop.append(t_stop)
  473. length = subdata.size * BLOCK_SIZE
  474. self._sigs_length.append(length)
  475. class NcsBlocks():
  476. """
  477. Contains information regarding the blocks of records in an Ncs file.
  478. Factory methods perform parsing of this information from an Ncs file or
  479. confirmation that file agrees with block structure.
  480. """
  481. startBlocks = []
  482. endBlocks = []
  483. sampFreqUsed = 0
  484. microsPerSampUsed = 0
  485. class NlxHeader(OrderedDict):
  486. """
  487. Representation of basic information in all 16 kbytes Neuralynx file headers,
  488. including dates opened and closed if given.
  489. """
  490. HEADER_SIZE = 2 ** 14 # Neuralynx files have a txt header of 16kB
  491. # helper function to interpret boolean keys
  492. def _to_bool(txt):
  493. if txt == 'True':
  494. return True
  495. elif txt == 'False':
  496. return False
  497. else:
  498. raise Exception('Can not convert %s to bool' % (txt))
  499. # keys that may be present in header which we parse
  500. txt_header_keys = [
  501. ('AcqEntName', 'channel_names', None), # used
  502. ('FileType', '', None),
  503. ('FileVersion', '', None),
  504. ('RecordSize', '', None),
  505. ('HardwareSubSystemName', '', None),
  506. ('HardwareSubSystemType', '', None),
  507. ('SamplingFrequency', 'sampling_rate', float), # used
  508. ('ADMaxValue', '', None),
  509. ('ADBitVolts', 'bit_to_microVolt', None), # used
  510. ('NumADChannels', '', None),
  511. ('ADChannel', 'channel_ids', None), # used
  512. ('InputRange', '', None),
  513. ('InputInverted', 'input_inverted', _to_bool), # used
  514. ('DSPLowCutFilterEnabled', '', None),
  515. ('DspLowCutFrequency', '', None),
  516. ('DspLowCutNumTaps', '', None),
  517. ('DspLowCutFilterType', '', None),
  518. ('DSPHighCutFilterEnabled', '', None),
  519. ('DspHighCutFrequency', '', None),
  520. ('DspHighCutNumTaps', '', None),
  521. ('DspHighCutFilterType', '', None),
  522. ('DspDelayCompensation', '', None),
  523. ('DspFilterDelay_µs', '', None),
  524. ('DisabledSubChannels', '', None),
  525. ('WaveformLength', '', int),
  526. ('AlignmentPt', '', None),
  527. ('ThreshVal', '', None),
  528. ('MinRetriggerSamples', '', None),
  529. ('SpikeRetriggerTime', '', None),
  530. ('DualThresholding', '', None),
  531. (r'Feature \w+ \d+', '', None),
  532. ('SessionUUID', '', None),
  533. ('FileUUID', '', None),
  534. ('CheetahRev', '', None), # only for older versions of Cheetah
  535. ('ProbeName', '', None),
  536. ('OriginalFileName', '', None),
  537. ('TimeCreated', '', None),
  538. ('TimeClosed', '', None),
  539. ('ApplicationName', '', None), # also include version number when present
  540. ('AcquisitionSystem', '', None),
  541. ('ReferenceChannel', '', None),
  542. ('NLX_Base_Class_Type', '', None) # in version 4 and earlier versions of Cheetah
  543. ]
  544. # Filename and datetime may appear in header lines starting with # at
  545. # beginning of header or in later versions as a property. The exact format
  546. # used depends on the application name and its version as well as the
  547. # -FileVersion property.
  548. #
  549. # There are 3 styles understood by this code and the patterns used for parsing
  550. # the items within each are stored in a dictionary. Each dictionary is then
  551. # stored in main dictionary keyed by an abbreviation for the style.
  552. header_pattern_dicts = {
  553. # Cheetah before version 5 and BML
  554. 'bv5': dict(
  555. datetime1_regex=r'## Time Opened: \(m/d/y\): (?P<date>\S+)'
  556. r' At Time: (?P<time>\S+)',
  557. filename_regex=r'## File Name: (?P<filename>\S+)',
  558. datetimeformat='%m/%d/%Y %H:%M:%S.%f'),
  559. # Cheetah version 5 before and including v 5.6.4
  560. 'bv5.6.4': dict(
  561. datetime1_regex=r'## Time Opened \(m/d/y\): (?P<date>\S+)'
  562. r' \(h:m:s\.ms\) (?P<time>\S+)',
  563. datetime2_regex=r'## Time Closed \(m/d/y\): (?P<date>\S+)'
  564. r' \(h:m:s\.ms\) (?P<time>\S+)',
  565. filename_regex=r'## File Name (?P<filename>\S+)',
  566. datetimeformat='%m/%d/%Y %H:%M:%S.%f'),
  567. # Cheetah after v 5.6.4 and default for others such as Pegasus
  568. 'def': dict(
  569. datetime1_regex=r'-TimeCreated (?P<date>\S+) (?P<time>\S+)',
  570. datetime2_regex=r'-TimeClosed (?P<date>\S+) (?P<time>\S+)',
  571. filename_regex=r'-OriginalFileName "?(?P<filename>\S+)"?',
  572. datetimeformat='%Y/%m/%d %H:%M:%S')
  573. }
  574. def build_for_file(filename):
  575. """
  576. Factory function to build NlxHeader for a given file.
  577. """
  578. with open(filename, 'rb') as f:
  579. txt_header = f.read(NlxHeader.HEADER_SIZE)
  580. txt_header = txt_header.strip(b'\x00').decode('latin-1')
  581. # must start with 8 # characters
  582. assert txt_header.startswith("########"),\
  583. 'Neuralynx files must start with 8 # characters.'
  584. # find keys
  585. info = NlxHeader()
  586. for k1, k2, type_ in NlxHeader.txt_header_keys:
  587. pattern = r'-(?P<name>' + k1 + r')\s+(?P<value>[\S ]*)'
  588. matches = re.findall(pattern, txt_header)
  589. for match in matches:
  590. if k2 == '':
  591. name = match[0]
  592. else:
  593. name = k2
  594. value = match[1].rstrip(' ')
  595. if type_ is not None:
  596. value = type_(value)
  597. info[name] = value
  598. # if channel_ids or s not in info then the filename is used
  599. name = os.path.splitext(os.path.basename(filename))[0]
  600. # convert channel ids
  601. if 'channel_ids' in info:
  602. chid_entries = re.findall(r'\w+', info['channel_ids'])
  603. info['channel_ids'] = [int(c) for c in chid_entries]
  604. else:
  605. info['channel_ids'] = [name]
  606. # convert channel names
  607. if 'channel_names' in info:
  608. name_entries = re.findall(r'\w+', info['channel_names'])
  609. if len(name_entries) == 1:
  610. info['channel_names'] = name_entries * len(info['channel_ids'])
  611. assert len(info['channel_names']) == len(info['channel_ids']), \
  612. 'Number of channel ids does not match channel names.'
  613. else:
  614. info['channel_names'] = [name] * len(info['channel_ids'])
  615. # version and application name
  616. # older Cheetah versions with CheetahRev property
  617. if 'CheetahRev' in info:
  618. assert 'ApplicationName' not in info
  619. info['ApplicationName'] = 'Cheetah'
  620. app_version = info['CheetahRev']
  621. # new file version 3.4 does not contain CheetahRev property, but ApplicationName instead
  622. elif 'ApplicationName' in info:
  623. pattern = r'(\S*) "([\S ]*)"'
  624. match = re.findall(pattern, info['ApplicationName'])
  625. assert len(match) == 1, 'impossible to find application name and version'
  626. info['ApplicationName'], app_version = match[0]
  627. # BML Ncs file writing contained neither property, assume BML version 2
  628. else:
  629. info['ApplicationName'] = 'BML'
  630. app_version = "2.0"
  631. info['ApplicationVersion'] = distutils.version.LooseVersion(app_version)
  632. # convert bit_to_microvolt
  633. if 'bit_to_microVolt' in info:
  634. btm_entries = re.findall(r'\S+', info['bit_to_microVolt'])
  635. if len(btm_entries) == 1:
  636. btm_entries = btm_entries * len(info['channel_ids'])
  637. info['bit_to_microVolt'] = [float(e) * 1e6 for e in btm_entries]
  638. assert len(info['bit_to_microVolt']) == len(info['channel_ids']), \
  639. 'Number of channel ids does not match bit_to_microVolt conversion factors.'
  640. if 'InputRange' in info:
  641. ir_entries = re.findall(r'\w+', info['InputRange'])
  642. if len(ir_entries) == 1:
  643. info['InputRange'] = [int(ir_entries[0])] * len(chid_entries)
  644. else:
  645. info['InputRange'] = [int(e) for e in ir_entries]
  646. assert len(info['InputRange']) == len(chid_entries), \
  647. 'Number of channel ids does not match input range values.'
  648. # Filename and datetime depend on app name, app version, and -FileVersion
  649. an = info['ApplicationName']
  650. if an == 'Cheetah':
  651. av = info['ApplicationVersion']
  652. if av < '5':
  653. hpd = NlxHeader.header_pattern_dicts['bv5']
  654. elif av <= '5.6.4':
  655. hpd = NlxHeader.header_pattern_dicts['bv5.6.4']
  656. else:
  657. hpd = NlxHeader.header_pattern_dicts['def']
  658. elif an == 'BML':
  659. hpd = NlxHeader.header_pattern_dicts['bv5']
  660. else:
  661. hpd = NlxHeader.header_pattern_dicts['def']
  662. original_filename = re.search(hpd['filename_regex'], txt_header).groupdict()['filename']
  663. # opening time
  664. dt1 = re.search(hpd['datetime1_regex'], txt_header).groupdict()
  665. info['recording_opened'] = datetime.datetime.strptime(
  666. dt1['date'] + ' ' + dt1['time'], hpd['datetimeformat'])
  667. # close time, if available
  668. if 'datetime2_regex' in hpd:
  669. dt2 = re.search(hpd['datetime2_regex'], txt_header).groupdict()
  670. info['recording_closed'] = datetime.datetime.strptime(
  671. dt2['date'] + ' ' + dt2['time'], hpd['datetimeformat'])
  672. return info
  673. def type_of_recording(self):
  674. """
  675. Determines type of recording in Ncs file with this header.
  676. RETURN:
  677. one of 'PRE4','BML','DIGITALLYNX','DIGITALLYNXSX','UNKNOWN'
  678. """
  679. if 'NLX_Base_Class_Type' in self:
  680. # older style standard neuralynx acquisition with rounded sampling frequency
  681. if self['NLX_Base_Class_Type'] == 'CscAcqEnt':
  682. return 'PRE4'
  683. # BML style with fractional frequency and microsPerSamp
  684. elif self['NLX_Base_Class_Type'] == 'BmlAcq':
  685. return 'BML'
  686. else:
  687. return 'UNKNOWN'
  688. elif 'HardwareSubSystemType' in self:
  689. # DigitalLynx
  690. if self['HardwareSubSystemType'] == 'DigitalLynx':
  691. return 'DIGITALLYNX'
  692. # DigitalLynxSX
  693. elif self['HardwareSubSystemType'] == 'DigitalLynxSX':
  694. return 'DIGITALLYNXSX'
  695. elif 'FileType' in self:
  696. if self['FileVersion'] in ['3.3', '3.4']:
  697. return self['AcquisitionSystem'].split()[1].upper()
  698. else:
  699. return 'UNKNOWN'
  700. else:
  701. return 'UNKNOWN'
  702. class NcsHeader():
  703. """
  704. Representation of information in Ncs file headers, including exact
  705. recording type.
  706. """
  707. ncs_dtype = [('timestamp', 'uint64'), ('channel_id', 'uint32'), ('sample_rate', 'uint32'),
  708. ('nb_valid', 'uint32'), ('samples', 'int16', (BLOCK_SIZE,))]
  709. nev_dtype = [
  710. ('reserved', '<i2'),
  711. ('system_id', '<i2'),
  712. ('data_size', '<i2'),
  713. ('timestamp', '<u8'),
  714. ('event_id', '<i2'),
  715. ('ttl_input', '<i2'),
  716. ('crc_check', '<i2'),
  717. ('dummy1', '<i2'),
  718. ('dummy2', '<i2'),
  719. ('extra', '<i4', (8,)),
  720. ('event_string', 'S128'),
  721. ]
  722. def get_nse_or_ntt_dtype(info, ext):
  723. """
  724. For NSE and NTT the dtype depend on the header.
  725. """
  726. dtype = [('timestamp', 'uint64'), ('channel_id', 'uint32'), ('unit_id', 'uint32')]
  727. # count feature
  728. nb_feature = 0
  729. for k in info.keys():
  730. if k.startswith('Feature '):
  731. nb_feature += 1
  732. dtype += [('features', 'int32', (nb_feature,))]
  733. # count sample
  734. if ext == 'nse':
  735. nb_sample = info['WaveformLength']
  736. dtype += [('samples', 'int16', (nb_sample,))]
  737. elif ext == 'ntt':
  738. nb_sample = info['WaveformLength']
  739. nb_chan = 4 # check this if not tetrode
  740. dtype += [('samples', 'int16', (nb_sample, nb_chan))]
  741. return dtype