nestio.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751
  1. """
  2. Class for reading output files from NEST simulations
  3. ( http://www.nest-simulator.org/ ).
  4. Tested with NEST2.10.0
  5. Depends on: numpy, quantities
  6. Supported: Read
  7. Authors: Julia Sprenger, Maximilian Schmidt, Johanna Senk
  8. """
  9. # needed for Python3 compatibility
  10. import os.path
  11. import warnings
  12. from datetime import datetime
  13. import numpy as np
  14. import quantities as pq
  15. from neo.io.baseio import BaseIO
  16. from neo.core import Block, Segment, SpikeTrain, AnalogSignal
  17. value_type_dict = {'V': pq.mV,
  18. 'I': pq.pA,
  19. 'g': pq.CompoundUnit("10^-9*S"),
  20. 'no type': pq.dimensionless}
  21. class NestIO(BaseIO):
  22. """
  23. Class for reading NEST output files. GDF files for the spike data and DAT
  24. files for analog signals are possible.
  25. Usage:
  26. >>> from neo.io.nestio import NestIO
  27. >>> files = ['membrane_voltages-1261-0.dat',
  28. 'spikes-1258-0.gdf']
  29. >>> r = NestIO(filenames=files)
  30. >>> seg = r.read_segment(gid_list=[], t_start=400 * pq.ms,
  31. t_stop=600 * pq.ms,
  32. id_column_gdf=0, time_column_gdf=1,
  33. id_column_dat=0, time_column_dat=1,
  34. value_columns_dat=2)
  35. """
  36. is_readable = True # class supports reading, but not writing
  37. is_writable = False
  38. supported_objects = [SpikeTrain, AnalogSignal, Segment, Block]
  39. readable_objects = [SpikeTrain, AnalogSignal, Segment, Block]
  40. has_header = False
  41. is_streameable = False
  42. write_params = None # writing is not supported
  43. name = 'nest'
  44. extensions = ['gdf', 'dat']
  45. mode = 'file'
  46. def __init__(self, filenames=None):
  47. """
  48. Parameters
  49. ----------
  50. filenames: string or list of strings, default=None
  51. The filename or list of filenames to load.
  52. """
  53. if isinstance(filenames, str):
  54. filenames = [filenames]
  55. self.filenames = filenames
  56. self.avail_formats = {}
  57. self.avail_IOs = {}
  58. for filename in filenames:
  59. path, ext = os.path.splitext(filename)
  60. ext = ext.strip('.')
  61. if ext in self.extensions:
  62. if ext in self.avail_IOs:
  63. raise ValueError('Received multiple files with "%s" '
  64. 'extention. Can only load single file of '
  65. 'this type.' % ext)
  66. self.avail_IOs[ext] = ColumnIO(filename)
  67. self.avail_formats[ext] = path
  68. def __read_analogsignals(self, gid_list, time_unit, t_start=None,
  69. t_stop=None, sampling_period=None,
  70. id_column=0, time_column=1,
  71. value_columns=2, value_types=None,
  72. value_units=None):
  73. """
  74. Internal function called by read_analogsignal() and read_segment().
  75. """
  76. if 'dat' not in self.avail_formats:
  77. raise ValueError('Can not load analogsignals. No DAT file '
  78. 'provided.')
  79. # checking gid input parameters
  80. gid_list, id_column = self._check_input_gids(gid_list, id_column)
  81. # checking time input parameters
  82. t_start, t_stop = self._check_input_times(t_start, t_stop,
  83. mandatory=False)
  84. # checking value input parameters
  85. (value_columns, value_types, value_units) = \
  86. self._check_input_values_parameters(value_columns, value_types,
  87. value_units)
  88. # defining standard column order for internal usage
  89. # [id_column, time_column, value_column1, value_column2, ...]
  90. column_ids = [id_column, time_column] + value_columns
  91. for i, cid in enumerate(column_ids):
  92. if cid is None:
  93. column_ids[i] = -1
  94. # assert that no single column is assigned twice
  95. column_list = [id_column, time_column] + value_columns
  96. column_list_no_None = [c for c in column_list if c is not None]
  97. if len(np.unique(column_list_no_None)) < len(column_list_no_None):
  98. raise ValueError(
  99. 'One or more columns have been specified to contain '
  100. 'the same data. Columns were specified to %s.'
  101. '' % column_list_no_None)
  102. # extracting condition and sorting parameters for raw data loading
  103. (condition, condition_column,
  104. sorting_column) = self._get_conditions_and_sorting(id_column,
  105. time_column,
  106. gid_list,
  107. t_start,
  108. t_stop)
  109. # loading raw data columns
  110. data = self.avail_IOs['dat'].get_columns(
  111. column_ids=column_ids,
  112. condition=condition,
  113. condition_column=condition_column,
  114. sorting_columns=sorting_column)
  115. sampling_period = self._check_input_sampling_period(sampling_period,
  116. time_column,
  117. time_unit,
  118. data)
  119. analogsignal_list = []
  120. # extracting complete gid list for anasig generation
  121. if (gid_list == []) and id_column is not None:
  122. gid_list = np.unique(data[:, id_column])
  123. # generate analogsignals for each neuron ID
  124. for i in gid_list:
  125. selected_ids = self._get_selected_ids(
  126. i, id_column, time_column, t_start, t_stop, time_unit,
  127. data)
  128. # extract starting time of analogsignal
  129. if (time_column is not None) and data.size:
  130. anasig_start_time = data[selected_ids[0], 1] * time_unit
  131. else:
  132. # set t_start equal to sampling_period because NEST starts
  133. # recording only after 1 sampling_period
  134. anasig_start_time = 1. * sampling_period
  135. # create one analogsignal per value column requested
  136. for v_id, value_column in enumerate(value_columns):
  137. signal = data[
  138. selected_ids[0]:selected_ids[1], value_column]
  139. # create AnalogSignal objects and annotate them with
  140. # the neuron ID
  141. analogsignal_list.append(AnalogSignal(
  142. signal * value_units[v_id],
  143. sampling_period=sampling_period,
  144. t_start=anasig_start_time,
  145. id=i,
  146. type=value_types[v_id]))
  147. # check for correct length of analogsignal
  148. assert (analogsignal_list[-1].t_stop
  149. == anasig_start_time + len(signal) * sampling_period)
  150. return analogsignal_list
  151. def __read_spiketrains(self, gdf_id_list, time_unit,
  152. t_start, t_stop, id_column,
  153. time_column, **args):
  154. """
  155. Internal function for reading multiple spiketrains at once.
  156. This function is called by read_spiketrain() and read_segment().
  157. """
  158. if 'gdf' not in self.avail_IOs:
  159. raise ValueError('Can not load spiketrains. No GDF file provided.')
  160. # assert that the file contains spike times
  161. if time_column is None:
  162. raise ValueError('Time column is None. No spike times to '
  163. 'be read in.')
  164. gdf_id_list, id_column = self._check_input_gids(gdf_id_list, id_column)
  165. t_start, t_stop = self._check_input_times(t_start, t_stop,
  166. mandatory=True)
  167. # assert that no single column is assigned twice
  168. if id_column == time_column:
  169. raise ValueError('One or more columns have been specified to '
  170. 'contain the same data.')
  171. # defining standard column order for internal usage
  172. # [id_column, time_column, value_column1, value_column2, ...]
  173. column_ids = [id_column, time_column]
  174. for i, cid in enumerate(column_ids):
  175. if cid is None:
  176. column_ids[i] = -1
  177. (condition, condition_column, sorting_column) = \
  178. self._get_conditions_and_sorting(id_column, time_column,
  179. gdf_id_list, t_start, t_stop)
  180. data = self.avail_IOs['gdf'].get_columns(
  181. column_ids=column_ids,
  182. condition=condition,
  183. condition_column=condition_column,
  184. sorting_columns=sorting_column)
  185. # create a list of SpikeTrains for all neuron IDs in gdf_id_list
  186. # assign spike times to neuron IDs if id_column is given
  187. if id_column is not None:
  188. if (gdf_id_list == []) and id_column is not None:
  189. gdf_id_list = np.unique(data[:, id_column])
  190. spiketrain_list = []
  191. for nid in gdf_id_list:
  192. selected_ids = self._get_selected_ids(nid, id_column,
  193. time_column, t_start,
  194. t_stop, time_unit, data)
  195. times = data[selected_ids[0]:selected_ids[1], time_column]
  196. spiketrain_list.append(SpikeTrain(
  197. times, units=time_unit,
  198. t_start=t_start, t_stop=t_stop,
  199. id=nid, **args))
  200. # if id_column is not given, all spike times are collected in one
  201. # spike train with id=None
  202. else:
  203. train = data[:, time_column]
  204. spiketrain_list = [SpikeTrain(train, units=time_unit,
  205. t_start=t_start, t_stop=t_stop,
  206. id=None, **args)]
  207. return spiketrain_list
  208. def _check_input_times(self, t_start, t_stop, mandatory=True):
  209. """
  210. Checks input times for existence and setting default values if
  211. necessary.
  212. t_start: pq.quantity.Quantity, start time of the time range to load.
  213. t_stop: pq.quantity.Quantity, stop time of the time range to load.
  214. mandatory: bool, if True times can not be None and an error will be
  215. raised. if False, time values of None will be replaced by
  216. -infinity or infinity, respectively. default: True.
  217. """
  218. if t_stop is None:
  219. if mandatory:
  220. raise ValueError('No t_start specified.')
  221. else:
  222. t_stop = np.inf * pq.s
  223. if t_start is None:
  224. if mandatory:
  225. raise ValueError('No t_stop specified.')
  226. else:
  227. t_start = -np.inf * pq.s
  228. for time in (t_start, t_stop):
  229. if not isinstance(time, pq.quantity.Quantity):
  230. raise TypeError('Time value (%s) is not a quantity.' % time)
  231. return t_start, t_stop
  232. def _check_input_values_parameters(self, value_columns, value_types,
  233. value_units):
  234. """
  235. Checks value parameters for consistency.
  236. value_columns: int, column id containing the value to load.
  237. value_types: list of strings, type of values.
  238. value_units: list of units of the value columns.
  239. Returns
  240. adjusted list of [value_columns, value_types, value_units]
  241. """
  242. if value_columns is None:
  243. raise ValueError('No value column provided.')
  244. if isinstance(value_columns, int):
  245. value_columns = [value_columns]
  246. if value_types is None:
  247. value_types = ['no type'] * len(value_columns)
  248. elif isinstance(value_types, str):
  249. value_types = [value_types]
  250. # translating value types into units as far as possible
  251. if value_units is None:
  252. short_value_types = [vtype.split('_')[0] for vtype in value_types]
  253. if not all([svt in value_type_dict for svt in short_value_types]):
  254. raise ValueError('Can not interpret value types '
  255. '"%s"' % value_types)
  256. value_units = [value_type_dict[svt] for svt in short_value_types]
  257. # checking for same number of value types, units and columns
  258. if not (len(value_types) == len(value_units) == len(value_columns)):
  259. raise ValueError('Length of value types, units and columns does '
  260. 'not match (%i,%i,%i)' % (len(value_types),
  261. len(value_units),
  262. len(value_columns)))
  263. if not all([isinstance(vunit, pq.UnitQuantity) for vunit in
  264. value_units]):
  265. raise ValueError('No value unit or standard value type specified.')
  266. return value_columns, value_types, value_units
  267. def _check_input_gids(self, gid_list, id_column):
  268. """
  269. Checks gid values and column for consistency.
  270. gid_list: list of int or None, gid to load.
  271. id_column: int, id of the column containing the gids.
  272. Returns
  273. adjusted list of [gid_list, id_column].
  274. """
  275. if gid_list is None:
  276. gid_list = [gid_list]
  277. if None in gid_list and id_column is not None:
  278. raise ValueError('No neuron IDs specified but file contains '
  279. 'neuron IDs in column %s. Specify empty list to '
  280. 'retrieve spiketrains of all neurons.'
  281. '' % str(id_column))
  282. if gid_list != [None] and id_column is None:
  283. raise ValueError('Specified neuron IDs to be %s, but no ID column '
  284. 'specified.' % gid_list)
  285. return gid_list, id_column
  286. def _check_input_sampling_period(self, sampling_period, time_column,
  287. time_unit, data):
  288. """
  289. Checks sampling period, times and time unit for consistency.
  290. sampling_period: pq.quantity.Quantity, sampling period of data to load.
  291. time_column: int, column id of times in data to load.
  292. time_unit: pq.quantity.Quantity, unit of time used in the data to load.
  293. data: numpy array, the data to be loaded / interpreted.
  294. Returns
  295. pq.quantities.Quantity object, the updated sampling period.
  296. """
  297. if sampling_period is None:
  298. if time_column is not None:
  299. data_sampling = np.unique(
  300. np.diff(sorted(np.unique(data[:, 1]))))
  301. if len(data_sampling) > 1:
  302. raise ValueError('Different sampling distances found in '
  303. 'data set (%s)' % data_sampling)
  304. else:
  305. dt = data_sampling[0]
  306. else:
  307. raise ValueError('Can not estimate sampling rate without time '
  308. 'column id provided.')
  309. sampling_period = pq.CompoundUnit(str(dt) + '*'
  310. + time_unit.units.u_symbol)
  311. elif not isinstance(sampling_period, pq.UnitQuantity):
  312. raise ValueError("sampling_period is not specified as a unit.")
  313. return sampling_period
  314. def _get_conditions_and_sorting(self, id_column, time_column, gid_list,
  315. t_start, t_stop):
  316. """
  317. Calculates the condition, condition_column and sorting_column based on
  318. other parameters supplied for loading the data.
  319. id_column: int, id of the column containing gids.
  320. time_column: int, id of the column containing times.
  321. gid_list: list of int, gid to be loaded.
  322. t_start: pq.quantity.Quantity, start of the time range to be loaded.
  323. t_stop: pq.quantity.Quantity, stop of the time range to be loaded.
  324. Returns
  325. updated [condition, condition_column, sorting_column].
  326. """
  327. condition, condition_column = None, None
  328. sorting_column = []
  329. curr_id = 0
  330. if ((gid_list != [None]) and (gid_list is not None)):
  331. if gid_list != []:
  332. def condition(x):
  333. return x in gid_list
  334. condition_column = id_column
  335. sorting_column.append(curr_id) # Sorting according to gids first
  336. curr_id += 1
  337. if time_column is not None:
  338. sorting_column.append(curr_id) # Sorting according to time
  339. curr_id += 1
  340. elif t_start != -np.inf and t_stop != np.inf:
  341. warnings.warn('Ignoring t_start and t_stop parameters, because no '
  342. 'time column id is provided.')
  343. if sorting_column == []:
  344. sorting_column = None
  345. else:
  346. sorting_column = sorting_column[::-1]
  347. return condition, condition_column, sorting_column
  348. def _get_selected_ids(self, gid, id_column, time_column, t_start, t_stop,
  349. time_unit, data):
  350. """
  351. Calculates the data range to load depending on the selected gid
  352. and the provided time range (t_start, t_stop)
  353. gid: int, gid to be loaded.
  354. id_column: int, id of the column containing gids.
  355. time_column: int, id of the column containing times.
  356. t_start: pq.quantity.Quantity, start of the time range to load.
  357. t_stop: pq.quantity.Quantity, stop of the time range to load.
  358. time_unit: pq.quantity.Quantity, time unit of the data to load.
  359. data: numpy array, data to load.
  360. Returns
  361. list of selected gids
  362. """
  363. gid_ids = np.array([0, data.shape[0]])
  364. if id_column is not None:
  365. gid_ids = np.array([np.searchsorted(data[:, 0], gid, side='left'),
  366. np.searchsorted(data[:, 0], gid, side='right')])
  367. gid_data = data[gid_ids[0]:gid_ids[1], :]
  368. # select only requested time range
  369. id_shifts = np.array([0, 0])
  370. if time_column is not None:
  371. id_shifts[0] = np.searchsorted(gid_data[:, 1],
  372. t_start.rescale(
  373. time_unit).magnitude,
  374. side='left')
  375. id_shifts[1] = (np.searchsorted(gid_data[:, 1],
  376. t_stop.rescale(
  377. time_unit).magnitude,
  378. side='left') - gid_data.shape[0])
  379. selected_ids = gid_ids + id_shifts
  380. return selected_ids
  381. def read_block(self, gid_list=None, time_unit=pq.ms, t_start=None,
  382. t_stop=None, sampling_period=None, id_column_dat=0,
  383. time_column_dat=1, value_columns_dat=2,
  384. id_column_gdf=0, time_column_gdf=1, value_types=None,
  385. value_units=None, lazy=False):
  386. assert not lazy, 'Do not support lazy'
  387. seg = self.read_segment(gid_list, time_unit, t_start,
  388. t_stop, sampling_period, id_column_dat,
  389. time_column_dat, value_columns_dat,
  390. id_column_gdf, time_column_gdf, value_types,
  391. value_units)
  392. blk = Block(file_origin=seg.file_origin, file_datetime=seg.file_datetime)
  393. blk.segments.append(seg)
  394. seg.block = blk
  395. return blk
  396. def read_segment(self, gid_list=None, time_unit=pq.ms, t_start=None,
  397. t_stop=None, sampling_period=None, id_column_dat=0,
  398. time_column_dat=1, value_columns_dat=2,
  399. id_column_gdf=0, time_column_gdf=1, value_types=None,
  400. value_units=None, lazy=False):
  401. """
  402. Reads a Segment which contains SpikeTrain(s) with specified neuron IDs
  403. from the GDF data.
  404. Arguments
  405. ----------
  406. gid_list : list, default: None
  407. A list of GDF IDs of which to return SpikeTrain(s). gid_list must
  408. be specified if the GDF file contains neuron IDs, the default None
  409. then raises an error. Specify an empty list [] to retrieve the spike
  410. trains of all neurons.
  411. time_unit : Quantity (time), optional, default: quantities.ms
  412. The time unit of recorded time stamps in DAT as well as GDF files.
  413. t_start : Quantity (time), optional, default: 0 * pq.ms
  414. Start time of SpikeTrain.
  415. t_stop : Quantity (time), default: None
  416. Stop time of SpikeTrain. t_stop must be specified, the default None
  417. raises an error.
  418. sampling_period : Quantity (frequency), optional, default: None
  419. Sampling period of the recorded data.
  420. id_column_dat : int, optional, default: 0
  421. Column index of neuron IDs in the DAT file.
  422. time_column_dat : int, optional, default: 1
  423. Column index of time stamps in the DAT file.
  424. value_columns_dat : int, optional, default: 2
  425. Column index of the analog values recorded in the DAT file.
  426. id_column_gdf : int, optional, default: 0
  427. Column index of neuron IDs in the GDF file.
  428. time_column_gdf : int, optional, default: 1
  429. Column index of time stamps in the GDF file.
  430. value_types : str, optional, default: None
  431. Nest data type of the analog values recorded, eg.'V_m', 'I', 'g_e'
  432. value_units : Quantity (amplitude), default: None
  433. The physical unit of the recorded signal values.
  434. lazy : bool, optional, default: False
  435. Returns
  436. -------
  437. seg : Segment
  438. The Segment contains one SpikeTrain and one AnalogSignal for
  439. each ID in gid_list.
  440. """
  441. assert not lazy, 'Do not support lazy'
  442. if isinstance(gid_list, tuple):
  443. if gid_list[0] > gid_list[1]:
  444. raise ValueError('The second entry in gid_list must be '
  445. 'greater or equal to the first entry.')
  446. gid_list = range(gid_list[0], gid_list[1] + 1)
  447. # __read_xxx() needs a list of IDs
  448. if gid_list is None:
  449. gid_list = [None]
  450. # create an empty Segment
  451. seg = Segment(file_origin=",".join(self.filenames))
  452. seg.file_datetime = datetime.fromtimestamp(os.stat(self.filenames[0]).st_mtime)
  453. # todo: rather than take the first file for the timestamp, we should take the oldest
  454. # in practice, there won't be much difference
  455. # Load analogsignals and attach to Segment
  456. if 'dat' in self.avail_formats:
  457. seg.analogsignals = self.__read_analogsignals(
  458. gid_list,
  459. time_unit,
  460. t_start,
  461. t_stop,
  462. sampling_period=sampling_period,
  463. id_column=id_column_dat,
  464. time_column=time_column_dat,
  465. value_columns=value_columns_dat,
  466. value_types=value_types,
  467. value_units=value_units)
  468. if 'gdf' in self.avail_formats:
  469. seg.spiketrains = self.__read_spiketrains(
  470. gid_list,
  471. time_unit,
  472. t_start,
  473. t_stop,
  474. id_column=id_column_gdf,
  475. time_column=time_column_gdf)
  476. return seg
  477. def read_analogsignal(self, gid=None, time_unit=pq.ms, t_start=None,
  478. t_stop=None, sampling_period=None, id_column=0,
  479. time_column=1, value_column=2, value_type=None,
  480. value_unit=None, lazy=False):
  481. """
  482. Reads an AnalogSignal with specified neuron ID from the DAT data.
  483. Arguments
  484. ----------
  485. gid : int, default: None
  486. The GDF ID of the returned SpikeTrain. gdf_id must be specified if
  487. the GDF file contains neuron IDs, the default None then raises an
  488. error. Specify an empty list [] to retrieve the spike trains of all
  489. neurons.
  490. time_unit : Quantity (time), optional, default: quantities.ms
  491. The time unit of recorded time stamps.
  492. t_start : Quantity (time), optional, default: 0 * pq.ms
  493. Start time of SpikeTrain.
  494. t_stop : Quantity (time), default: None
  495. Stop time of SpikeTrain. t_stop must be specified, the default None
  496. raises an error.
  497. sampling_period : Quantity (frequency), optional, default: None
  498. Sampling period of the recorded data.
  499. id_column : int, optional, default: 0
  500. Column index of neuron IDs.
  501. time_column : int, optional, default: 1
  502. Column index of time stamps.
  503. value_column : int, optional, default: 2
  504. Column index of the analog values recorded.
  505. value_type : str, optional, default: None
  506. Nest data type of the analog values recorded, eg.'V_m', 'I', 'g_e'.
  507. value_unit : Quantity (amplitude), default: None
  508. The physical unit of the recorded signal values.
  509. lazy : bool, optional, default: False
  510. Returns
  511. -------
  512. spiketrain : SpikeTrain
  513. The requested SpikeTrain object with an annotation 'id'
  514. corresponding to the gdf_id parameter.
  515. """
  516. assert not lazy, 'Do not support lazy'
  517. # __read_spiketrains() needs a list of IDs
  518. return self.__read_analogsignals([gid], time_unit,
  519. t_start, t_stop,
  520. sampling_period=sampling_period,
  521. id_column=id_column,
  522. time_column=time_column,
  523. value_columns=value_column,
  524. value_types=value_type,
  525. value_units=value_unit)[0]
  526. def read_spiketrain(
  527. self, gdf_id=None, time_unit=pq.ms, t_start=None, t_stop=None,
  528. id_column=0, time_column=1, lazy=False, **args):
  529. """
  530. Reads a SpikeTrain with specified neuron ID from the GDF data.
  531. Arguments
  532. ----------
  533. gdf_id : int, default: None
  534. The GDF ID of the returned SpikeTrain. gdf_id must be specified if
  535. the GDF file contains neuron IDs. Providing [] loads all available
  536. IDs.
  537. time_unit : Quantity (time), optional, default: quantities.ms
  538. The time unit of recorded time stamps.
  539. t_start : Quantity (time), default: None
  540. Start time of SpikeTrain. t_start must be specified.
  541. t_stop : Quantity (time), default: None
  542. Stop time of SpikeTrain. t_stop must be specified.
  543. id_column : int, optional, default: 0
  544. Column index of neuron IDs.
  545. time_column : int, optional, default: 1
  546. Column index of time stamps.
  547. lazy : bool, optional, default: False
  548. Returns
  549. -------
  550. spiketrain : SpikeTrain
  551. The requested SpikeTrain object with an annotation 'id'
  552. corresponding to the gdf_id parameter.
  553. """
  554. assert not lazy, 'Do not support lazy'
  555. if (not isinstance(gdf_id, int)) and gdf_id is not None:
  556. raise ValueError('gdf_id has to be of type int or None.')
  557. if gdf_id is None and id_column is not None:
  558. raise ValueError('No neuron ID specified but file contains '
  559. 'neuron IDs in column ' + str(id_column) + '.')
  560. return self.__read_spiketrains([gdf_id], time_unit,
  561. t_start, t_stop,
  562. id_column, time_column,
  563. **args)[0]
  564. class ColumnIO:
  565. '''
  566. Class for reading an ASCII file containing multiple columns of data.
  567. '''
  568. def __init__(self, filename):
  569. """
  570. filename: string, path to ASCII file to read.
  571. """
  572. self.filename = filename
  573. # read the first line to check the data type (int or float) of the data
  574. f = open(self.filename)
  575. line = f.readline()
  576. additional_parameters = {}
  577. if '.' not in line:
  578. additional_parameters['dtype'] = np.int32
  579. self.data = np.loadtxt(self.filename, **additional_parameters)
  580. if len(self.data.shape) == 1:
  581. self.data = self.data[:, np.newaxis]
  582. def get_columns(self, column_ids='all', condition=None,
  583. condition_column=None, sorting_columns=None):
  584. """
  585. column_ids : 'all' or list of int, the ids of columns to
  586. extract.
  587. condition : None or function, which is applied to each row to evaluate
  588. if it should be included in the result.
  589. Needs to return a bool value.
  590. condition_column : int, id of the column on which the condition
  591. function is applied to
  592. sorting_columns : int or list of int, column ids to sort by.
  593. List entries have to be ordered by increasing sorting
  594. priority!
  595. Returns
  596. -------
  597. numpy array containing the requested data.
  598. """
  599. if column_ids == [] or column_ids == 'all':
  600. column_ids = range(self.data.shape[-1])
  601. if isinstance(column_ids, (int, float)):
  602. column_ids = [column_ids]
  603. column_ids = np.array(column_ids)
  604. if column_ids is not None:
  605. if max(column_ids) >= len(self.data) - 1:
  606. raise ValueError('Can not load column ID %i. File contains '
  607. 'only %i columns' % (max(column_ids),
  608. len(self.data)))
  609. if sorting_columns is not None:
  610. if isinstance(sorting_columns, int):
  611. sorting_columns = [sorting_columns]
  612. if (max(sorting_columns) >= self.data.shape[1]):
  613. raise ValueError('Can not sort by column ID %i. File contains '
  614. 'only %i columns' % (max(sorting_columns),
  615. self.data.shape[1]))
  616. # Starting with whole dataset being selected for return
  617. selected_data = self.data
  618. # Apply filter condition to rows
  619. if condition and (condition_column is None):
  620. raise ValueError('Filter condition provided, but no '
  621. 'condition_column ID provided')
  622. elif (condition_column is not None) and (condition is None):
  623. warnings.warn('Condition column ID provided, but no condition '
  624. 'given. No filtering will be performed.')
  625. elif (condition is not None) and (condition_column is not None):
  626. condition_function = np.vectorize(condition)
  627. mask = condition_function(
  628. selected_data[:, condition_column]).astype(bool)
  629. selected_data = selected_data[mask, :]
  630. # Apply sorting if requested
  631. if sorting_columns is not None:
  632. values_to_sort = selected_data[:, sorting_columns].T
  633. ordered_ids = np.lexsort(tuple(values_to_sort[i] for i in
  634. range(len(values_to_sort))))
  635. selected_data = selected_data[ordered_ids, :]
  636. # Select only requested columns
  637. selected_data = selected_data[:, column_ids]
  638. return selected_data