change_point_detection.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503
  1. # -*- coding: utf-8 -*-
  2. """
  3. This algorithm determines if a spike train `spk` can be considered as
  4. stationary process (constant firing rate) or not as stationary process (i.e.
  5. presence of one or more points at which the rate increases or decreases). In
  6. case of non-stationarity, the output is a list of detected Change Points (CPs).
  7. Essentially, a det of two-sided window of width `h` (`_filter(t, h, spk)`)
  8. slides over the spike train within the time `[h, t_final-h]`. This generates a
  9. `_filter_process(time_step, h, spk)` that assigns at each time `t` the
  10. difference between a spike lying in the right and left window. If at any time
  11. `t` this difference is large 'enough' is assumed the presence of a rate Change
  12. Point in a neighborhood of `t`. A threshold `test_quantile` for the maximum of
  13. the filter_process (max difference of spike count between the left and right
  14. window) is derived based on asymptotic considerations. The procedure is
  15. repeated for an arbitrary set of windows, with different size `h`.
  16. Examples
  17. --------
  18. The following applies multiple_filter_test to a spike trains.
  19. >>> import quantities as pq
  20. >>> import neo
  21. >>> from elephant.change_point_detection import multiple_filter_test
  22. ...
  23. >>> test_array = [1.1,1.2,1.4, 1.6,1.7,1.75,1.8,1.85,1.9,1.95]
  24. >>> st = neo.SpikeTrain(test_array, units='s', t_stop = 2.1)
  25. >>> window_size = [0.5]*pq.s
  26. >>> t_fin = 2.1*pq.s
  27. >>> alpha = 5.0
  28. >>> num_surrogates = 10000
  29. >>> change_points = multiple_filter_test(window_size, st, t_fin, alpha,
  30. ... num_surrogates, time_step = 0.5*pq.s)
  31. References
  32. ----------
  33. Messer, M., Kirchner, M., Schiemann, J., Roeper, J., Neininger, R., &
  34. Schneider, G. (2014). A multiple filter test for the detection of rate changes
  35. in renewal processes with varying variance. The Annals of Applied Statistics,
  36. 8(4),2027-2067.
  37. Original code
  38. -------------
  39. Adapted from the published R implementation:
  40. DOI: 10.1214/14-AOAS782SUPP;.r
  41. """
  42. from __future__ import division, print_function, unicode_literals
  43. import numpy as np
  44. import quantities as pq
  45. from elephant.utils import deprecated_alias
  46. __all__ = [
  47. "multiple_filter_test",
  48. "empirical_parameters"
  49. ]
  50. @deprecated_alias(dt='time_step')
  51. def multiple_filter_test(window_sizes, spiketrain, t_final, alpha,
  52. n_surrogates, test_quantile=None, test_param=None,
  53. time_step=None):
  54. """
  55. Detects change points.
  56. This function returns the detected change points, that correspond to the
  57. maxima of the `_filter_processes`. These are the processes generated by
  58. sliding the windows of step `time_step`; at each step the difference
  59. between spike on the right and left window is calculated.
  60. Parameters
  61. ----------
  62. window_sizes : list of quantity objects
  63. list that contains windows sizes
  64. spiketrain : neo.SpikeTrain, numpy array or list
  65. spiketrain objects to analyze
  66. t_final : quantity
  67. final time of the spike train which is to be analysed
  68. alpha : float
  69. alpha-quantile in range [0, 100] for the set of maxima of the limit
  70. processes
  71. n_surrogates : integer
  72. numbers of simulated limit processes
  73. test_quantile : float
  74. threshold for the maxima of the filter derivative processes, if any
  75. of these maxima is larger than this value, it is assumed the
  76. presence of a cp at the time corresponding to that maximum
  77. time_step : quantity
  78. resolution, time step at which the windows are slided
  79. test_param : np.array of shape (3, num of window),
  80. first row: list of `h`, second and third rows: empirical means and
  81. variances of the limit process correspodning to `h`. This will be
  82. used to normalize the `filter_process` in order to give to the
  83. every maximum the same impact on the global statistic.
  84. Returns
  85. -------
  86. cps : list of list
  87. one list for each window size `h`, containing the points detected with
  88. the corresponding `filter_process`. N.B.: only cps whose h-neighborhood
  89. does not include previously detected cps (with smaller window h) are
  90. added to the list.
  91. """
  92. if (test_quantile is None) and (test_param is None):
  93. test_quantile, test_param = empirical_parameters(window_sizes, t_final,
  94. alpha, n_surrogates,
  95. time_step)
  96. elif test_quantile is None:
  97. test_quantile = empirical_parameters(window_sizes, t_final, alpha,
  98. n_surrogates, time_step)[0]
  99. elif test_param is None:
  100. test_param = empirical_parameters(window_sizes, t_final, alpha,
  101. n_surrogates, time_step)[1]
  102. spk = spiketrain
  103. # List of lists of detected change points (CPs), to be returned
  104. cps = []
  105. for i, h in enumerate(window_sizes):
  106. # automatic setting of time_step
  107. dt_temp = h / 20 if time_step is None else time_step
  108. # filter_process for window of size h
  109. t, differences = _filter_process(dt_temp, h, spk, t_final, test_param)
  110. time_index = np.arange(len(differences))
  111. # Point detected with window h
  112. cps_window = []
  113. while np.max(differences) > test_quantile:
  114. cp_index = np.argmax(differences)
  115. # from index to time
  116. cp = cp_index * dt_temp + h
  117. # before repeating the procedure, the h-neighbourgs of detected CP
  118. # are discarded, because rate changes into it are alrady explained
  119. mask_fore = time_index > cp_index - int((h / dt_temp).simplified)
  120. mask_back = time_index < cp_index + int((h / dt_temp).simplified)
  121. differences[mask_fore & mask_back] = 0
  122. # check if the neighbourhood of detected cp does not contain cps
  123. # detected with other windows
  124. neighbourhood_free = True
  125. # iterate on lists of cps detected with smaller window
  126. for j in range(i):
  127. # iterate on CPs detected with the j-th smallest window
  128. for c_pre in cps[j]:
  129. if c_pre - h < cp < c_pre + h:
  130. neighbourhood_free = False
  131. break
  132. # if none of the previously detected CPs falls in the h-
  133. # neighbourhood
  134. if neighbourhood_free:
  135. # add the current CP to the list
  136. cps_window.append(cp)
  137. # add the present list to the grand list
  138. cps.append(cps_window)
  139. return cps
  140. def _brownian_motion(t_in, t_fin, x_in, time_step):
  141. """
  142. Generate a Brownian Motion.
  143. Parameters
  144. ----------
  145. t_in : quantities,
  146. initial time
  147. t_fin : quantities,
  148. final time
  149. x_in : float,
  150. initial point of the process: _brownian_motio(0) = x_in
  151. time_step : quantities,
  152. resolution, time step at which brownian increments are summed
  153. Returns
  154. -------
  155. Brownian motion on [t_in, t_fin], with resolution time_step and initial
  156. state x_in
  157. """
  158. u = 1 * pq.s
  159. try:
  160. t_in_sec = t_in.rescale(u).magnitude
  161. except ValueError:
  162. raise ValueError("t_in must be a time quantity")
  163. try:
  164. t_fin_sec = t_fin.rescale(u).magnitude
  165. except ValueError:
  166. raise ValueError("t_fin must be a time quantity")
  167. try:
  168. dt_sec = time_step.rescale(u).magnitude
  169. except ValueError:
  170. raise ValueError("dt must be a time quantity")
  171. x = np.random.normal(0, np.sqrt(dt_sec),
  172. size=int((t_fin_sec - t_in_sec) / dt_sec))
  173. s = np.cumsum(x)
  174. return s + x_in
  175. def _limit_processes(window_sizes, t_final, time_step):
  176. """
  177. Generate the limit processes (depending only on t_final and h), one for
  178. each window size `h` in H. The distribution of maxima of these processes
  179. is used to derive threshold `test_quantile` and parameters `test_param`.
  180. Parameters
  181. ----------
  182. window_sizes : list of quantities
  183. set of windows' size
  184. t_final : quantity object
  185. end of limit process
  186. time_step : quantity object
  187. resolution, time step at which the windows are slided
  188. Returns
  189. -------
  190. limit_processes : list of numpy array
  191. each entries contains the limit processes for each h,
  192. evaluated in [h,T-h] with steps time_step
  193. """
  194. limit_processes = []
  195. u = 1 * pq.s
  196. try:
  197. window_sizes_sec = window_sizes.rescale(u).magnitude
  198. except ValueError:
  199. raise ValueError("window_sizes must be a list of times")
  200. try:
  201. dt_sec = time_step.rescale(u).magnitude
  202. except ValueError:
  203. raise ValueError("time_step must be a time quantity")
  204. w = _brownian_motion(0 * u, t_final, 0, time_step)
  205. for h in window_sizes_sec:
  206. # BM on [h,T-h], shifted in time t-->t+h
  207. brownian_right = w[int(2 * h / dt_sec):]
  208. # BM on [h,T-h], shifted in time t-->t-h
  209. brownian_left = w[:int(-2 * h / dt_sec)]
  210. # BM on [h,T-h]
  211. brownian_center = w[int(h / dt_sec):int(-h / dt_sec)]
  212. modul = np.abs(brownian_right + brownian_left - 2 * brownian_center)
  213. limit_process_h = modul / (np.sqrt(2 * h))
  214. limit_processes.append(limit_process_h)
  215. return limit_processes
  216. @deprecated_alias(dt='time_step')
  217. def empirical_parameters(window_sizes, t_final, alpha, n_surrogates,
  218. time_step=None):
  219. """
  220. This function generates the threshold and the null parameters.
  221. The`_filter_process_h` has been proved to converge (for t_fin,
  222. h-->infinity) to a continuous functional of a Brownaian motion
  223. ('limit_process'). Using a MonteCarlo technique, maxima of
  224. these limit_processes are collected.
  225. The threshold is defined as the alpha quantile of this set of maxima.
  226. Namely:
  227. test_quantile := alpha quantile of {max_(h in window_size)[
  228. max_(t in [h, t_final-h])_limit_process_h(t)]}
  229. Parameters
  230. ----------
  231. window_sizes : list of quantity objects
  232. set of windows' size
  233. t_final : quantity object
  234. final time of the spike
  235. alpha : float
  236. alpha-quantile in range [0, 100]
  237. n_surrogates : integer
  238. numbers of simulated limit processes
  239. time_step : quantity object
  240. resolution, time step at which the windows are slided
  241. Returns
  242. -------
  243. test_quantile : float
  244. threshold for the maxima of the filter derivative processes, if any
  245. of these maxima is larger than this value, it is assumed the
  246. presence of a cp at the time corresponding to that maximum
  247. test_param : np.array 3 * num of window,
  248. first row: list of `h`, second and third rows: empirical means and
  249. variances of the limit process correspodning to `h`. This will be
  250. used to normalize the `filter_process` in order to give to the every
  251. maximum the same impact on the global statistic.
  252. """
  253. # try:
  254. # window_sizes_sec = window_sizes.rescale(u)
  255. # except ValueError:
  256. # raise ValueError("H must be a list of times")
  257. # window_sizes_mag = window_sizes_sec.magnitude
  258. # try:
  259. # t_final_sec = t_final.rescale(u)
  260. # except ValueError:
  261. # raise ValueError("T must be a time quantity")
  262. # t_final_mag = t_final_sec.magnitude
  263. if not isinstance(window_sizes, pq.Quantity):
  264. raise ValueError("window_sizes must be a list of time quantities")
  265. if not isinstance(t_final, pq.Quantity):
  266. raise ValueError("t_final must be a time quantity")
  267. if not isinstance(n_surrogates, int):
  268. raise TypeError("n_surrogates must be an integer")
  269. if not (isinstance(time_step, pq.Quantity) or (time_step is None)):
  270. raise ValueError("time_step must be a time quantity")
  271. if t_final <= 0:
  272. raise ValueError("t_final needs to be strictly positive")
  273. if alpha * (100.0 - alpha) < 0:
  274. raise ValueError("alpha needs to be in (0,100)")
  275. if np.min(window_sizes) <= 0:
  276. raise ValueError("window size needs to be strictly positive")
  277. if np.max(window_sizes) >= t_final / 2:
  278. raise ValueError("window size too large")
  279. if time_step is not None:
  280. for h in window_sizes:
  281. if int(h.rescale('us')) % int(time_step.rescale('us')) != 0:
  282. raise ValueError(
  283. "Every window size h must be a multiple of time_step")
  284. # Generate a matrix M*: n X m where n = n_surrogates is the number of
  285. # simulated limit processes and m is the number of chosen window sizes.
  286. # Elements are: M*(i,h) = max(t in T)[`limit_process_h`(t)],
  287. # for each h in H and surrogate i
  288. maxima_matrix = []
  289. for i in range(n_surrogates):
  290. # mh_star = []
  291. simu = _limit_processes(window_sizes, t_final, time_step)
  292. # for i, h in enumerate(window_sizes_mag):
  293. # # max over time of the limit process generated with window h
  294. # m_h = np.max(simu[i])
  295. # mh_star.append(m_h)
  296. # max over time of the limit process generated with window h
  297. mh_star = [np.max(x) for x in simu]
  298. maxima_matrix.append(mh_star)
  299. maxima_matrix = np.asanyarray(maxima_matrix)
  300. # these parameters will be used to normalize both the limit_processes (H0)
  301. # and the filter_processes
  302. null_mean = maxima_matrix.mean(axis=0)
  303. null_var = maxima_matrix.var(axis=0)
  304. # matrix normalization by mean and variance of the limit process, in order
  305. # to give, for every h, the same impact on the global maximum
  306. matrix_normalized = (maxima_matrix - null_mean) / np.sqrt(null_var)
  307. great_maxs = np.max(matrix_normalized, axis=1)
  308. test_quantile = np.percentile(great_maxs, 100.0 - alpha)
  309. null_parameters = [window_sizes, null_mean, null_var]
  310. test_param = np.asanyarray(null_parameters)
  311. return test_quantile, test_param
  312. def _filter(t_center, window, spiketrain):
  313. """
  314. This function calculates the difference of spike counts in the left and
  315. right side of a window of size h centered in t and normalized by its
  316. variance. The variance of this count can be expressed as a combination of
  317. mean and var of the I.S.I. lying inside the window.
  318. Parameters
  319. ----------
  320. t_center : quantity
  321. time on which the window is centered
  322. window : quantity
  323. window's size
  324. spiketrain : list, numpy array or SpikeTrain
  325. spike train to analyze
  326. Returns
  327. -------
  328. difference : float,
  329. difference of spike count normalized by its variance
  330. """
  331. u = 1 * pq.s
  332. try:
  333. t_sec = t_center.rescale(u).magnitude
  334. except AttributeError:
  335. raise ValueError("t must be a quantities object")
  336. # tm = t_sec.magnitude
  337. try:
  338. h_sec = window.rescale(u).magnitude
  339. except AttributeError:
  340. raise ValueError("h must be a time quantity")
  341. # hm = h_sec.magnitude
  342. try:
  343. spk_sec = spiketrain.rescale(u).magnitude
  344. except AttributeError:
  345. raise ValueError(
  346. "spiketrain must be a list (array) of times or a neo spiketrain")
  347. # cut spike-train on the right
  348. train_right = spk_sec[(t_sec < spk_sec) & (spk_sec < t_sec + h_sec)]
  349. # cut spike-train on the left
  350. train_left = spk_sec[(t_sec - h_sec < spk_sec) & (spk_sec < t_sec)]
  351. # spike count in the right side
  352. count_right = train_right.size
  353. # spike count in the left side
  354. count_left = train_left.size
  355. # form spikes to I.S.I
  356. isi_right = np.diff(train_right)
  357. isi_left = np.diff(train_left)
  358. if isi_right.size == 0:
  359. mu_ri = 0
  360. sigma_ri = 0
  361. else:
  362. # mean of I.S.I inside the window
  363. mu_ri = np.mean(isi_right)
  364. # var of I.S.I inside the window
  365. sigma_ri = np.var(isi_right)
  366. if isi_left.size == 0:
  367. mu_le = 0
  368. sigma_le = 0
  369. else:
  370. mu_le = np.mean(isi_left)
  371. sigma_le = np.var(isi_left)
  372. if (sigma_le > 0) & (sigma_ri > 0):
  373. s_quad = (sigma_ri / mu_ri**3) * h_sec + (sigma_le / mu_le**3) * h_sec
  374. else:
  375. s_quad = 0
  376. if s_quad == 0:
  377. difference = 0
  378. else:
  379. difference = (count_right - count_left) / np.sqrt(s_quad)
  380. return difference
  381. def _filter_process(time_step, h, spk, t_final, test_param):
  382. """
  383. Given a spike train `spk` and a window size `h`, this function generates
  384. the `filter derivative process` by evaluating the function `_filter`
  385. in steps of `time_step`.
  386. Parameters
  387. ----------
  388. h : quantity object
  389. window's size
  390. t_final : quantity,
  391. time on which the window is centered
  392. spk : list, array or SpikeTrain
  393. spike train to analyze
  394. time_step : quantity object, time step at which the windows are slided
  395. resolution
  396. test_param : matrix, the means of the first row list of `h`,
  397. the second row Empirical and the third row variances of
  398. the limit processes `Lh` are used to normalize the number
  399. of elements inside the windows
  400. Returns
  401. -------
  402. time_domain : numpy array
  403. time domain of the `filter derivative process`
  404. filter_process : array,
  405. values of the `filter derivative process`
  406. """
  407. u = 1 * pq.s
  408. try:
  409. h_sec = h.rescale(u).magnitude
  410. except AttributeError:
  411. raise ValueError("h must be a time quantity")
  412. try:
  413. t_final_sec = t_final.rescale(u).magnitude
  414. except AttributeError:
  415. raise ValueError("t_final must be a time quanity")
  416. try:
  417. dt_sec = time_step.rescale(u).magnitude
  418. except AttributeError:
  419. raise ValueError("time_step must be a time quantity")
  420. # domain of the process
  421. time_domain = np.arange(h_sec, t_final_sec - h_sec, dt_sec)
  422. filter_trajectrory = []
  423. # taken from the function used to generate the threshold
  424. emp_mean_h = test_param[1][test_param[0] == h]
  425. emp_var_h = test_param[2][test_param[0] == h]
  426. for t in time_domain:
  427. filter_trajectrory.append(_filter(t * u, h, spk))
  428. filter_trajectrory = np.asanyarray(filter_trajectrory)
  429. # ordered normalization to give each process the same impact on the max
  430. filter_process = (
  431. np.abs(filter_trajectrory) - emp_mean_h) / np.sqrt(emp_var_h)
  432. return time_domain, filter_process