pandas_bridge.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. # -*- coding: utf-8 -*-
  2. """
  3. Bridge to the pandas library.
  4. :copyright: Copyright 2014-2016 by the Elephant team, see AUTHORS.txt.
  5. :license: Modified BSD, see LICENSE.txt for details.
  6. """
  7. from __future__ import division, print_function, unicode_literals
  8. import numpy as np
  9. import pandas as pd
  10. import quantities as pq
  11. from elephant.neo_tools import (extract_neo_attrs, get_all_epochs,
  12. get_all_events, get_all_spiketrains)
  13. def _multiindex_from_dict(inds):
  14. """Given a dictionary, return a `pandas.MultiIndex`.
  15. Parameters
  16. ----------
  17. inds : dict
  18. A dictionary where the keys are annotations or attribute names and
  19. the values are the corresponding annotation or attribute value.
  20. Returns
  21. -------
  22. pandas MultiIndex
  23. """
  24. names, indexes = zip(*sorted(inds.items()))
  25. return pd.MultiIndex.from_tuples([indexes], names=names)
  26. def _sort_inds(obj, axis=0):
  27. """Put the indexes and index levels of a pandas object in sorted order.
  28. Paramters
  29. ---------
  30. obj : pandas Series, DataFrame, Panel, or Panel4D
  31. The object whose indexes should be sorted.
  32. axis : int, list, optional, 'all'
  33. The axis whose indexes should be sorted. Default is 0.
  34. Can also be a list of indexes, in which case all of those axes
  35. are sorted. If 'all', sort all indexes.
  36. Returns
  37. -------
  38. pandas Series, DataFrame, Panel, or Panel4D
  39. A copy of the object with indexes sorted.
  40. Indexes are sorted in-place.
  41. """
  42. if axis == 'all':
  43. return _sort_inds(obj, axis=range(obj.ndim))
  44. if hasattr(axis, '__iter__'):
  45. for iax in axis:
  46. obj = _sort_inds(obj, iax)
  47. return obj
  48. obj = obj.reorder_levels(sorted(obj.axes[axis].names), axis=axis)
  49. return obj.sortlevel(0, axis=axis, sort_remaining=True)
  50. def _extract_neo_attrs_safe(obj, parents=True, child_first=True):
  51. """Given a neo object, return a dictionary of attributes and annotations.
  52. This is done in a manner that is safe for `pandas` indexes.
  53. Parameters
  54. ----------
  55. obj : neo object
  56. parents : bool, optional
  57. Also include attributes and annotations from parent neo
  58. objects (if any).
  59. child_first : bool, optional
  60. If True (default True), values of child attributes are used
  61. over parent attributes in the event of a name conflict.
  62. If False, parent attributes are used.
  63. This parameter does nothing if `parents` is False.
  64. Returns
  65. -------
  66. dict
  67. A dictionary where the keys are annotations or attribute names and
  68. the values are the corresponding annotation or attribute value.
  69. """
  70. res = extract_neo_attrs(obj, skip_array=True, skip_none=True,
  71. parents=parents, child_first=child_first)
  72. for key, value in res.items():
  73. res[key] = _convert_value_safe(value)
  74. key2 = _convert_value_safe(key)
  75. if key2 is not key:
  76. res[key2] = res.pop(key)
  77. return res
  78. def _convert_value_safe(value):
  79. """Convert `neo` values to a value compatible with `pandas`.
  80. Some types and dtypes used with neo are not safe to use with pandas in some
  81. or all situations.
  82. `quantities.Quantity` don't follow the normal python rule that values
  83. with that are equal should have the same hash, making it fundamentally
  84. incompatible with `pandas`.
  85. On python 3, `pandas` coerces `S` dtypes to bytes, which are not always
  86. safe to use.
  87. Parameters
  88. ----------
  89. value : any
  90. Value to convert (if it has any known issues).
  91. Returns
  92. -------
  93. any
  94. `value` or a version of value with potential problems fixed.
  95. """
  96. if hasattr(value, 'dimensionality'):
  97. return (value.magnitude.tolist(), str(value.dimensionality))
  98. if hasattr(value, 'dtype') and value.dtype.kind == 'S':
  99. return value.astype('U').tolist()
  100. if hasattr(value, 'tolist'):
  101. return value.tolist()
  102. if hasattr(value, 'decode') and not hasattr(value, 'encode'):
  103. return value.decode('UTF8')
  104. return value
  105. def spiketrain_to_dataframe(spiketrain, parents=True, child_first=True):
  106. """Convert a `neo.SpikeTrain` to a `pandas.DataFrame`.
  107. The `pandas.DataFrame` object has a single column, with each element
  108. being the spike time converted to a `float` value in seconds.
  109. The column heading is a `pandas.MultiIndex` with one index
  110. for each of the scalar attributes and annotations. The `index`
  111. is the spike number.
  112. Parameters
  113. ----------
  114. spiketrain : neo SpikeTrain
  115. The SpikeTrain to convert.
  116. parents : bool, optional
  117. Also include attributes and annotations from parent neo
  118. objects (if any).
  119. Returns
  120. -------
  121. pandas DataFrame
  122. A DataFrame containing the spike times from `spiketrain`.
  123. Notes
  124. -----
  125. The index name is `spike_number`.
  126. Attributes that contain non-scalar values are skipped. So are
  127. annotations or attributes containing a value of `None`.
  128. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  129. and annotations of that type are converted to a tuple where the first
  130. element is the scalar value and the second is the string representation of
  131. the units.
  132. """
  133. attrs = _extract_neo_attrs_safe(spiketrain,
  134. parents=parents, child_first=child_first)
  135. columns = _multiindex_from_dict(attrs)
  136. times = spiketrain.magnitude
  137. times = pq.Quantity(times, spiketrain.units).rescale('s').magnitude
  138. times = times[np.newaxis].T
  139. index = pd.Index(np.arange(len(spiketrain)), name='spike_number')
  140. pdobj = pd.DataFrame(times, index=index, columns=columns)
  141. return _sort_inds(pdobj, axis=1)
  142. def event_to_dataframe(event, parents=True, child_first=True):
  143. """Convert a `neo.core.Event` to a `pandas.DataFrame`.
  144. The `pandas.DataFrame` object has a single column, with each element
  145. being the event label from the `event.label` attribute.
  146. The column heading is a `pandas.MultiIndex` with one index
  147. for each of the scalar attributes and annotations. The `index`
  148. is the time stamp from the `event.times` attribute.
  149. Parameters
  150. ----------
  151. event : neo Event
  152. The Event to convert.
  153. parents : bool, optional
  154. Also include attributes and annotations from parent neo
  155. objects (if any).
  156. child_first : bool, optional
  157. If True (default True), values of child attributes are used
  158. over parent attributes in the event of a name conflict.
  159. If False, parent attributes are used.
  160. This parameter does nothing if `parents` is False.
  161. Returns
  162. -------
  163. pandas DataFrame
  164. A DataFrame containing the labels from `event`.
  165. Notes
  166. -----
  167. If the length of event.times and event.labels are not the same,
  168. the longer will be truncated to the length of the shorter.
  169. The index name is `times`.
  170. Attributes that contain non-scalar values are skipped. So are
  171. annotations or attributes containing a value of `None`.
  172. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  173. and annotations of that type are converted to a tuple where the first
  174. element is the scalar value and the second is the string representation of
  175. the units.
  176. """
  177. attrs = _extract_neo_attrs_safe(event,
  178. parents=parents, child_first=child_first)
  179. columns = _multiindex_from_dict(attrs)
  180. times = event.times.rescale('s').magnitude
  181. labels = event.labels.astype('U')
  182. times = times[:len(labels)]
  183. labels = labels[:len(times)]
  184. index = pd.Index(times, name='times')
  185. pdobj = pd.DataFrame(labels[np.newaxis].T, index=index, columns=columns)
  186. return _sort_inds(pdobj, axis=1)
  187. def epoch_to_dataframe(epoch, parents=True, child_first=True):
  188. """Convert a `neo.core.Epoch` to a `pandas.DataFrame`.
  189. The `pandas.DataFrame` object has a single column, with each element
  190. being the epoch label from the `epoch.label` attribute.
  191. The column heading is a `pandas.MultiIndex` with one index
  192. for each of the scalar attributes and annotations. The `index`
  193. is a `pandas.MultiIndex`, with the first index being the time stamp from
  194. the `epoch.times` attribute and the second being the duration from
  195. the `epoch.durations` attribute.
  196. Parameters
  197. ----------
  198. epoch : neo Epoch
  199. The Epoch to convert.
  200. parents : bool, optional
  201. Also include attributes and annotations from parent neo
  202. objects (if any).
  203. child_first : bool, optional
  204. If True (default True), values of child attributes are used
  205. over parent attributes in the event of a name conflict.
  206. If False, parent attributes are used.
  207. This parameter does nothing if `parents` is False.
  208. Returns
  209. -------
  210. pandas DataFrame
  211. A DataFrame containing the labels from `epoch`.
  212. Notes
  213. -----
  214. If the length of `epoch.times`, `epoch.duration`, and `epoch.labels` are
  215. not the same, the longer will be truncated to the length of the shortest.
  216. The index names for `epoch.times` and `epoch.durations` are `times` and
  217. `durations`, respectively.
  218. Attributes that contain non-scalar values are skipped. So are
  219. annotations or attributes containing a value of `None`.
  220. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  221. and annotations of that type are converted to a tuple where the first
  222. element is the scalar value and the second is the string representation of
  223. the units.
  224. """
  225. attrs = _extract_neo_attrs_safe(epoch,
  226. parents=parents, child_first=child_first)
  227. columns = _multiindex_from_dict(attrs)
  228. times = epoch.times.rescale('s').magnitude
  229. durs = epoch.durations.rescale('s').magnitude
  230. labels = epoch.labels.astype('U')
  231. minlen = min([len(durs), len(times), len(labels)])
  232. index = pd.MultiIndex.from_arrays([times[:minlen], durs[:minlen]],
  233. names=['times', 'durations'])
  234. pdobj = pd.DataFrame(labels[:minlen][np.newaxis].T,
  235. index=index, columns=columns)
  236. return _sort_inds(pdobj, axis='all')
  237. def _multi_objs_to_dataframe(container, conv_func, get_func,
  238. parents=True, child_first=True):
  239. """Convert one or more of a given `neo` object to a `pandas.DataFrame`.
  240. The objects can be any list, dict, or other iterable or mapping containing
  241. the object, as well as any neo object that can hold the object.
  242. Objects are searched recursively, so the objects can be nested (such as a
  243. list of blocks).
  244. The column heading is a `pandas.MultiIndex` with one index
  245. for each of the scalar attributes and annotations of the respective
  246. object.
  247. Parameters
  248. ----------
  249. container : list, tuple, iterable, dict, neo container object
  250. The container for the objects to convert.
  251. parents : bool, optional
  252. Also include attributes and annotations from parent neo
  253. objects (if any).
  254. child_first : bool, optional
  255. If True (default True), values of child attributes are used
  256. over parent attributes in the event of a name conflict.
  257. If False, parent attributes are used.
  258. This parameter does nothing if `parents` is False.
  259. Returns
  260. -------
  261. pandas DataFrame
  262. A DataFrame containing the converted objects.
  263. Attributes that contain non-scalar values are skipped. So are
  264. annotations or attributes containing a value of `None`.
  265. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  266. and annotations of that type are converted to a tuple where the first
  267. element is the scalar value and the second is the string representation of
  268. the units.
  269. """
  270. res = pd.concat([conv_func(obj, parents=parents, child_first=child_first)
  271. for obj in get_func(container)], axis=1)
  272. return _sort_inds(res, axis=1)
  273. def multi_spiketrains_to_dataframe(container,
  274. parents=True, child_first=True):
  275. """Convert one or more `neo.SpikeTrain` objects to a `pandas.DataFrame`.
  276. The objects can be any list, dict, or other iterable or mapping containing
  277. spiketrains, as well as any neo object that can hold spiketrains:
  278. `neo.Block`, `neo.ChannelIndex`, `neo.Unit`, and `neo.Segment`.
  279. Objects are searched recursively, so the objects can be nested (such as a
  280. list of blocks).
  281. The `pandas.DataFrame` object has one column for each spiketrain, with each
  282. element being the spike time converted to a `float` value in seconds.
  283. columns are padded to the same length with `NaN` values.
  284. The column heading is a `pandas.MultiIndex` with one index
  285. for each of the scalar attributes and annotations of the respective
  286. spiketrain. The `index` is the spike number.
  287. Parameters
  288. ----------
  289. container : list, tuple, iterable, dict,
  290. neo Block, neo Segment, neo Unit, neo ChannelIndex
  291. The container for the spiketrains to convert.
  292. parents : bool, optional
  293. Also include attributes and annotations from parent neo
  294. objects (if any).
  295. child_first : bool, optional
  296. If True (default True), values of child attributes are used
  297. over parent attributes in the event of a name conflict.
  298. If False, parent attributes are used.
  299. This parameter does nothing if `parents` is False.
  300. Returns
  301. -------
  302. pandas DataFrame
  303. A DataFrame containing the spike times from `container`.
  304. Notes
  305. -----
  306. The index name is `spike_number`.
  307. Attributes that contain non-scalar values are skipped. So are
  308. annotations or attributes containing a value of `None`.
  309. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  310. and annotations of that type are converted to a tuple where the first
  311. element is the scalar value and the second is the string representation of
  312. the units.
  313. """
  314. return _multi_objs_to_dataframe(container,
  315. spiketrain_to_dataframe,
  316. get_all_spiketrains,
  317. parents=parents, child_first=child_first)
  318. def multi_events_to_dataframe(container, parents=True, child_first=True):
  319. """Convert one or more `neo.Event` objects to a `pandas.DataFrame`.
  320. The objects can be any list, dict, or other iterable or mapping containing
  321. events, as well as any neo object that can hold events:
  322. `neo.Block` and `neo.Segment`. Objects are searched recursively, so the
  323. objects can be nested (such as a list of blocks).
  324. The `pandas.DataFrame` object has one column for each event, with each
  325. element being the event label. columns are padded to the same length with
  326. `NaN` values.
  327. The column heading is a `pandas.MultiIndex` with one index
  328. for each of the scalar attributes and annotations of the respective
  329. event. The `index` is the time stamp from the `event.times` attribute.
  330. Parameters
  331. ----------
  332. container : list, tuple, iterable, dict, neo Block, neo Segment
  333. The container for the events to convert.
  334. parents : bool, optional
  335. Also include attributes and annotations from parent neo
  336. objects (if any).
  337. child_first : bool, optional
  338. If True (default True), values of child attributes are used
  339. over parent attributes in the event of a name conflict.
  340. If False, parent attributes are used.
  341. This parameter does nothing if `parents` is False.
  342. Returns
  343. -------
  344. pandas DataFrame
  345. A DataFrame containing the labels from `container`.
  346. Notes
  347. -----
  348. If the length of event.times and event.labels are not the same for any
  349. individual event, the longer will be truncated to the length of the
  350. shorter for that event. Between events, lengths can differ.
  351. The index name is `times`.
  352. Attributes that contain non-scalar values are skipped. So are
  353. annotations or attributes containing a value of `None`.
  354. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  355. and annotations of that type are converted to a tuple where the first
  356. element is the scalar value and the second is the string representation of
  357. the units.
  358. """
  359. return _multi_objs_to_dataframe(container,
  360. event_to_dataframe, get_all_events,
  361. parents=parents, child_first=child_first)
  362. def multi_epochs_to_dataframe(container, parents=True, child_first=True):
  363. """Convert one or more `neo.Epoch` objects to a `pandas.DataFrame`.
  364. The objects can be any list, dict, or other iterable or mapping containing
  365. epochs, as well as any neo object that can hold epochs:
  366. `neo.Block` and `neo.Segment`. Objects are searched recursively, so the
  367. objects can be nested (such as a list of blocks).
  368. The `pandas.DataFrame` object has one column for each epoch, with each
  369. element being the epoch label. columns are padded to the same length with
  370. `NaN` values.
  371. The column heading is a `pandas.MultiIndex` with one index
  372. for each of the scalar attributes and annotations of the respective
  373. epoch. The `index` is a `pandas.MultiIndex`, with the first index being
  374. the time stamp from the `epoch.times` attribute and the second being the
  375. duration from the `epoch.durations` attribute.
  376. Parameters
  377. ----------
  378. container : list, tuple, iterable, dict, neo Block, neo Segment
  379. The container for the epochs to convert.
  380. parents : bool, optional
  381. Also include attributes and annotations from parent neo
  382. objects (if any).
  383. child_first : bool, optional
  384. If True (default True), values of child attributes are used
  385. over parent attributes in the event of a name conflict.
  386. If False, parent attributes are used.
  387. This parameter does nothing if `parents` is False.
  388. Returns
  389. -------
  390. pandas DataFrame
  391. A DataFrame containing the labels from `container`.
  392. Notes
  393. -----
  394. If the length of `epoch.times`, `epoch.duration`, and `epoch.labels` are
  395. not the same for any individual epoch, the longer will be truncated to the
  396. length of the shorter for that epoch. Between epochs, lengths can differ.
  397. The index level names for `epoch.times` and `epoch.durations` are
  398. `times` and `durations`, respectively.
  399. Attributes that contain non-scalar values are skipped. So are
  400. annotations or attributes containing a value of `None`.
  401. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  402. and annotations of that type are converted to a tuple where the first
  403. element is the scalar value and the second is the string representation of
  404. the units.
  405. """
  406. return _multi_objs_to_dataframe(container,
  407. epoch_to_dataframe, get_all_epochs,
  408. parents=parents, child_first=child_first)
  409. def slice_spiketrain(pdobj, t_start=None, t_stop=None):
  410. """Slice a `pandas.DataFrame`, changing indices appropriately.
  411. Values outside the sliced range are converted to `NaN` values.
  412. Slicing happens over columns.
  413. This sets the `t_start` and `t_stop` column indexes to be the new values.
  414. Otherwise it is the same as setting values outside the range to `NaN`.
  415. Parameters
  416. ----------
  417. pdobj : pandas DataFrame
  418. The DataFrame to slice.
  419. t_start : float, optional.
  420. If specified, the returned DataFrame values less than this set
  421. to `NaN`.
  422. Default is `None` (do not use this argument).
  423. t_stop : float, optional.
  424. If specified, the returned DataFrame values greater than this set
  425. to `NaN`.
  426. Default is `None` (do not use this argument).
  427. Returns
  428. -------
  429. pdobj : scalar, pandas Series, DataFrame, or Panel
  430. The returned data type is the same as the type of `pdobj`
  431. Note
  432. ----
  433. The order of the index and/or column levels of the returned object may
  434. differ from the order of the original.
  435. If `t_start` or `t_stop` is specified, all columns indexes will be changed
  436. to the respective values, including those already within the new range.
  437. If `t_start` or `t_stop` is not specified, those column indexes will not
  438. be changed.
  439. Returns a copy, even if `t_start` and `t_stop` are both `None`.
  440. """
  441. if t_start is None and t_stop is None:
  442. return pdobj.copy()
  443. if t_stop is not None:
  444. pdobj[pdobj > t_stop] = np.nan
  445. pdobj = pdobj.T.reset_index(level='t_stop')
  446. pdobj['t_stop'] = t_stop
  447. pdobj = pdobj.set_index('t_stop', append=True).T
  448. pdobj = _sort_inds(pdobj, axis=1)
  449. if t_start is not None:
  450. pdobj[pdobj < t_start] = np.nan
  451. pdobj = pdobj.T.reset_index(level='t_start')
  452. pdobj['t_start'] = t_start
  453. pdobj = pdobj.set_index('t_start', append=True).T
  454. pdobj = _sort_inds(pdobj, axis=1)
  455. return pdobj