spade.py 92 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370
  1. """
  2. SPADE [1]_, [2]_, [3]_ is the combination of a mining technique and multiple
  3. statistical tests to detect and assess the statistical significance of repeated
  4. occurrences of spike sequences (spatio-temporal patterns, STP).
  5. .. autosummary::
  6. :toctree: toctree/spade
  7. spade
  8. concepts_mining
  9. pvalue_spectrum
  10. test_signature_significance
  11. approximate_stability
  12. pattern_set_reduction
  13. concept_output_to_patterns
  14. Visualization
  15. -------------
  16. Visualization of SPADE analysis is covered in Viziphant:
  17. https://viziphant.readthedocs.io/en/latest/modules.html
  18. Notes
  19. -----
  20. This modules relies on the implementation of the fp-growth algorithm contained
  21. in the file fim.so which can be found here (http://www.borgelt.net/pyfim.html)
  22. and should be available in the spade_src folder (elephant/spade_src/).
  23. If the fim.so module is not present in the correct location or cannot be
  24. imported (only available for linux OS) SPADE will make use of a python
  25. implementation of the fast fca algorithm contained in
  26. `elephant/spade_src/fast_fca.py`, which is about 10 times slower.
  27. Examples
  28. --------
  29. Given a list of Neo Spiketrain objects, assumed to be recorded in parallel, the
  30. SPADE analysis can be applied as demonstrated in this short toy example of 10
  31. artificial spike trains of exhibiting fully synchronous events of order 10.
  32. >>> from elephant.spade import spade
  33. >>> import elephant.spike_train_generation
  34. >>> import quantities as pq
  35. Generate correlated spiketrains.
  36. >>> spiketrains = elephant.spike_train_generation.cpp(
  37. ... rate=5*pq.Hz, A=[0]+[0.99]+[0]*9+[0.01], t_stop=10*pq.s)
  38. Mining patterns with SPADE using a `bin_size` of 1 ms and a window length of 1
  39. bin (i.e., detecting only synchronous patterns).
  40. >>> patterns = spade(
  41. ... spiketrains=spiketrains, bin_size=1*pq.ms, winlen=1, dither=5*pq.ms,
  42. ... min_spikes=10, n_surr=10, psr_param=[0,0,3],
  43. ... output_format='patterns')['patterns'][0]
  44. >>> import matplotlib.pyplot as plt
  45. >>> for neu in patterns['neurons']:
  46. ... label = 'pattern' if neu == 0 else None
  47. ... plt.plot(patterns['times'], [neu]*len(patterns['times']), 'ro',
  48. ... label=label)
  49. Raster plot of the spiketrains.
  50. >>> for st_idx, spiketrain in enumerate(spiketrains):
  51. ... label = 'pattern' if st_idx == 0 else None
  52. ... plt.plot(spiketrain.rescale(pq.ms), [st_idx] * len(spiketrain),
  53. ... 'k.', label=label)
  54. >>> plt.ylim([-1, len(spiketrains)])
  55. >>> plt.xlabel('time (ms)')
  56. >>> plt.ylabel('neurons ids')
  57. >>> plt.legend()
  58. >>> plt.show()
  59. References
  60. ----------
  61. .. [1] Torre, E., Picado-Muino, D., Denker, M., Borgelt, C., & Gruen, S.
  62. (2013). Statistical evaluation of synchronous spike patterns extracted
  63. by frequent item set mining. Frontiers in Computational Neuroscience, 7.
  64. .. [2] Quaglio, P., Yegenoglu, A., Torre, E., Endres, D. M., & Gruen, S.
  65. (2017). Detection and Evaluation of Spatio-Temporal Spike Patterns in
  66. Massively Parallel Spike Train Data with SPADE. Frontiers in
  67. Computational Neuroscience, 11.
  68. .. [3] Stella, A., Quaglio, P., Torre, E., & Gruen, S. (2019). 3d-SPADE:
  69. Significance evaluation of spatio-temporal patterns of various temporal
  70. extents. Biosystems, 185, 104022.
  71. :copyright: Copyright 2017 by the Elephant team, see `doc/authors.rst`.
  72. :license: BSD, see LICENSE.txt for details.
  73. """
  74. from __future__ import division, print_function, unicode_literals
  75. import operator
  76. import time
  77. import warnings
  78. from collections import defaultdict
  79. from functools import reduce
  80. from itertools import chain, combinations
  81. import neo
  82. import numpy as np
  83. import quantities as pq
  84. from scipy import sparse
  85. import elephant.conversion as conv
  86. import elephant.spike_train_surrogates as surr
  87. from elephant.spade_src import fast_fca
  88. from elephant.utils import deprecated_alias
  89. __all__ = [
  90. "spade",
  91. "concepts_mining",
  92. "pvalue_spectrum",
  93. "test_signature_significance",
  94. "approximate_stability",
  95. "pattern_set_reduction",
  96. "concept_output_to_patterns"
  97. ]
  98. warnings.simplefilter('once', UserWarning)
  99. try:
  100. from mpi4py import MPI # for parallelized routines
  101. HAVE_MPI = True
  102. except ImportError: # pragma: no cover
  103. HAVE_MPI = False
  104. try:
  105. from elephant.spade_src import fim
  106. HAVE_FIM = True
  107. except ImportError: # pragma: no cover
  108. HAVE_FIM = False
  109. @deprecated_alias(binsize='bin_size')
  110. def spade(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
  111. max_spikes=None, max_occ=None, min_neu=1, approx_stab_pars=None,
  112. n_surr=0, dither=15 * pq.ms, spectrum='#',
  113. alpha=None, stat_corr='fdr_bh', surr_method='dither_spikes',
  114. psr_param=None, output_format='patterns', **surr_kwargs):
  115. r"""
  116. Perform the SPADE [1]_, [2]_, [3]_ analysis for the parallel input
  117. `spiketrains`. They are discretized with a temporal resolution equal to
  118. `bin_size` in a sliding window of `winlen*bin_size`.
  119. First, spike patterns are mined from the `spiketrains` using a technique
  120. called frequent itemset mining (FIM) or formal concept analysis (FCA). In
  121. this framework, a particular spatio-temporal spike pattern is called a
  122. "concept". It is then possible to compute the stability and the p-value
  123. of all pattern candidates. In a final step, concepts are filtered
  124. according to a stability threshold and a significance level `alpha`.
  125. Parameters
  126. ----------
  127. spiketrains : list of neo.SpikeTrain
  128. List containing the parallel spike trains to analyze
  129. bin_size : pq.Quantity
  130. The time precision used to discretize the spiketrains (binning).
  131. winlen : int
  132. The size (number of bins) of the sliding window used for the analysis.
  133. The maximal length of a pattern (delay between first and last spike) is
  134. then given by winlen*bin_size
  135. min_spikes : int, optional
  136. Minimum number of spikes of a sequence to be considered a pattern.
  137. Default: 2
  138. min_occ : int, optional
  139. Minimum number of occurrences of a sequence to be considered as a
  140. pattern.
  141. Default: 2
  142. max_spikes : int, optional
  143. Maximum number of spikes of a sequence to be considered a pattern. If
  144. None no maximal number of spikes is considered.
  145. Default: None
  146. max_occ : int, optional
  147. Maximum number of occurrences of a sequence to be considered as a
  148. pattern. If None, no maximal number of occurrences is considered.
  149. Default: None
  150. min_neu : int, optional
  151. Minimum number of neurons in a sequence to considered a pattern.
  152. Default: 1
  153. approx_stab_pars : dict or None, optional
  154. Parameter values for approximate stability computation.
  155. 'n_subsets': int
  156. Number of subsets of a concept used to approximate its stability.
  157. If `n_subsets` is 0, it is calculated according to to the formula
  158. given in Babin, Kuznetsov (2012), proposition 6:
  159. .. math::
  160. n_{\text{subset}} = \frac{1}{2 \cdot \epsilon^2}
  161. \ln{\left( \frac{2}{\delta} \right)} +1
  162. 'delta' : float, optional
  163. delta: probability with at least :math:`1-\delta`
  164. 'epsilon' : float, optional
  165. epsilon: absolute error
  166. 'stability_thresh' : None or list of float
  167. List containing the stability thresholds used to filter the
  168. concepts.
  169. If `stability_thresh` is None, then the concepts are not filtered.
  170. Otherwise, only concepts with
  171. * intensional stability is greater than `stability_thresh[0]` or
  172. * extensional stability is greater than `stability_thresh[1]`
  173. are further analyzed.
  174. n_surr : int, optional
  175. Number of surrogates to generate to compute the p-value spectrum.
  176. This number should be large (`n_surr >= 1000` is recommended for 100
  177. spike trains in `spiketrains`). If `n_surr == 0`, then the p-value
  178. spectrum is not computed.
  179. Default: 0
  180. dither : pq.Quantity, optional
  181. Amount of spike time dithering for creating the surrogates for
  182. filtering the pattern spectrum. A spike at time `t` is placed randomly
  183. within `[t-dither, t+dither]` (see also
  184. :func:`elephant.spike_train_surrogates.dither_spikes`).
  185. Default: 15*pq.ms
  186. spectrum : {'#', '3d#'}, optional
  187. Define the signature of the patterns.
  188. '#': pattern spectrum using the as signature the pair:
  189. (number of spikes, number of occurrences)
  190. '3d#': pattern spectrum using the as signature the triplets:
  191. (number of spikes, number of occurrence, difference between last
  192. and first spike of the pattern)
  193. Default: '#'
  194. alpha : float, optional
  195. The significance level of the hypothesis tests performed.
  196. If `alpha is None`, all the concepts are returned.
  197. If `0.<alpha<1.`, the concepts are filtered according to their
  198. signature in the p-value spectrum.
  199. Default: None
  200. stat_corr : str
  201. Method used for multiple testing.
  202. See: :func:`test_signature_significance`
  203. Default: 'fdr_bh'
  204. surr_method : str
  205. Method to generate surrogates. You can use every method defined in
  206. :func:`elephant.spike_train_surrogates.surrogates`.
  207. Default: 'dither_spikes'
  208. psr_param : None or list of int or tuple of int
  209. This list contains parameters used in the pattern spectrum filtering:
  210. `psr_param[0]`: correction parameter for subset filtering
  211. (see `h_subset_filtering` in :func:`pattern_set_reduction`).
  212. `psr_param[1]`: correction parameter for superset filtering
  213. (see `k_superset_filtering` in :func:`pattern_set_reduction`).
  214. `psr_param[2]`: correction parameter for covered-spikes criterion
  215. (see `l_covered_spikes` in :func:`pattern_set_reduction`).
  216. output_format : {'concepts', 'patterns'}
  217. Distinguish the format of the output (see Returns).
  218. Default: 'patterns'
  219. surr_kwargs
  220. Keyword arguments that are passed to the surrogate methods.
  221. Returns
  222. -------
  223. output : dict
  224. 'patterns':
  225. If `output_format` is 'patterns', see the return of
  226. :func:`concept_output_to_patterns`
  227. If `output_format` is 'concepts', then `output['patterns']` is a
  228. tuple of patterns which in turn are tuples of
  229. 1. spikes in the pattern
  230. 2. occurrences of the pattern
  231. For details see :func:`concepts_mining`.
  232. if stability is calculated, there are also:
  233. 3. intensional stability
  234. 4. extensional stability
  235. For details see :func:`approximate_stability`.
  236. 'pvalue_spectrum' (only if `n_surr > 0`):
  237. A list of signatures in tuples format:
  238. * size
  239. * number of occurrences
  240. * duration (only if `spectrum=='3d#'`)
  241. * p-value
  242. 'non_sgnf_sgnt': list
  243. Non significant signatures of 'pvalue_spectrum'.
  244. Notes
  245. -----
  246. If detected, this function will use MPI to parallelize the analysis.
  247. References
  248. ----------
  249. .. [1] Torre, E., Picado-Muino, D., Denker, M., Borgelt, C., & Gruen, S.
  250. (2013). Statistical evaluation of synchronous spike patterns extracted
  251. by frequent item set mining. Frontiers in Computational Neuroscience, 7.
  252. .. [2] Quaglio, P., Yegenoglu, A., Torre, E., Endres, D. M., & Gruen, S.
  253. (2017). Detection and Evaluation of Spatio-Temporal Spike Patterns in
  254. Massively Parallel Spike Train Data with SPADE. Frontiers in
  255. Computational Neuroscience, 11.
  256. .. [3] Stella, A., Quaglio, P., Torre, E., & Gruen, S. (2019). 3d-SPADE:
  257. Significance evaluation of spatio-temporal patterns of various temporal
  258. extents. Biosystems, 185, 104022.
  259. Examples
  260. --------
  261. The following example applies SPADE to `spiketrains` (list of
  262. `neo.SpikeTrain`).
  263. >>> from elephant.spade import spade
  264. >>> import quantities as pq
  265. >>> bin_size = 3 * pq.ms # time resolution to discretize the spiketrains
  266. >>> winlen = 10 # maximal pattern length in bins (i.e., sliding window)
  267. >>> result_spade = spade(spiketrains, bin_size, winlen)
  268. """
  269. if HAVE_MPI: # pragma: no cover
  270. comm = MPI.COMM_WORLD # create MPI communicator
  271. rank = comm.Get_rank() # get rank of current MPI task
  272. else:
  273. rank = 0
  274. compute_stability = _check_input(
  275. spiketrains=spiketrains, bin_size=bin_size, winlen=winlen,
  276. min_spikes=min_spikes, min_occ=min_occ,
  277. max_spikes=max_spikes, max_occ=max_occ, min_neu=min_neu,
  278. approx_stab_pars=approx_stab_pars,
  279. n_surr=n_surr, dither=dither, spectrum=spectrum,
  280. alpha=alpha, stat_corr=stat_corr, surr_method=surr_method,
  281. psr_param=psr_param, output_format=output_format)
  282. time_mining = time.time()
  283. if rank == 0 or compute_stability:
  284. # Mine the spiketrains for extraction of concepts
  285. concepts, rel_matrix = concepts_mining(
  286. spiketrains, bin_size, winlen, min_spikes=min_spikes,
  287. min_occ=min_occ, max_spikes=max_spikes, max_occ=max_occ,
  288. min_neu=min_neu, report='a')
  289. time_mining = time.time() - time_mining
  290. print("Time for data mining: {}".format(time_mining))
  291. # Decide if compute the approximated stability
  292. if compute_stability:
  293. if 'stability_thresh' in approx_stab_pars.keys():
  294. stability_thresh = approx_stab_pars.pop('stability_thresh')
  295. else:
  296. stability_thresh = None
  297. # Computing the approximated stability of all the concepts
  298. time_stability = time.time()
  299. concepts = approximate_stability(
  300. concepts, rel_matrix, **approx_stab_pars)
  301. time_stability = time.time() - time_stability
  302. print("Time for stability computation: {}".format(time_stability))
  303. # Filtering the concepts using stability thresholds
  304. if stability_thresh is not None:
  305. concepts = [concept for concept in concepts
  306. if _stability_filter(concept, stability_thresh)]
  307. output = {}
  308. pv_spec = None # initialize pv_spec to None
  309. # Decide whether compute pvalue spectrum
  310. if n_surr > 0:
  311. # Compute pvalue spectrum
  312. time_pvalue_spectrum = time.time()
  313. pv_spec = pvalue_spectrum(
  314. spiketrains, bin_size, winlen, dither=dither, n_surr=n_surr,
  315. min_spikes=min_spikes, min_occ=min_occ, max_spikes=max_spikes,
  316. max_occ=max_occ, min_neu=min_neu, spectrum=spectrum,
  317. surr_method=surr_method, **surr_kwargs)
  318. time_pvalue_spectrum = time.time() - time_pvalue_spectrum
  319. print("Time for pvalue spectrum computation: {}".format(
  320. time_pvalue_spectrum))
  321. # Storing pvalue spectrum
  322. output['pvalue_spectrum'] = pv_spec
  323. # rank!=0 returning None
  324. if rank != 0:
  325. warnings.warn('Returning None because executed on a process != 0')
  326. return None
  327. # Initialize non-significant signatures as empty list:
  328. ns_signatures = []
  329. # Decide whether filter concepts with psf
  330. if n_surr > 0:
  331. if len(pv_spec) > 0 and alpha is not None:
  332. # Computing non-significant entries of the spectrum applying
  333. # the statistical correction
  334. ns_signatures = test_signature_significance(
  335. pv_spec, concepts, alpha, winlen, corr=stat_corr,
  336. report='non_significant', spectrum=spectrum)
  337. # Storing non-significant entries of the pvalue spectrum
  338. output['non_sgnf_sgnt'] = ns_signatures
  339. # Filter concepts with pvalue spectrum (psf)
  340. if len(ns_signatures) > 0:
  341. concepts = [concept for concept in concepts
  342. if _pattern_spectrum_filter(concept, ns_signatures,
  343. spectrum, winlen)]
  344. # Decide whether to filter concepts using psr
  345. if psr_param is not None:
  346. # Filter using conditional tests (psr)
  347. concepts = pattern_set_reduction(
  348. concepts, ns_signatures, winlen=winlen, spectrum=spectrum,
  349. h_subset_filtering=psr_param[0], k_superset_filtering=psr_param[1],
  350. l_covered_spikes=psr_param[2], min_spikes=min_spikes,
  351. min_occ=min_occ)
  352. # Storing patterns for output format concepts
  353. if output_format == 'concepts':
  354. output['patterns'] = concepts
  355. else: # output_format == 'patterns':
  356. # Transforming concepts to dictionary containing pattern's infos
  357. output['patterns'] = concept_output_to_patterns(
  358. concepts, winlen, bin_size, pv_spec, spectrum,
  359. spiketrains[0].t_start)
  360. return output
  361. def _check_input(
  362. spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
  363. max_spikes=None, max_occ=None, min_neu=1, approx_stab_pars=None,
  364. n_surr=0, dither=15 * pq.ms, spectrum='#',
  365. alpha=None, stat_corr='fdr_bh', surr_method='dither_spikes',
  366. psr_param=None, output_format='patterns'):
  367. """
  368. Checks all input given to SPADE
  369. Parameters
  370. ----------
  371. see :`func`:`spade`
  372. Returns
  373. -------
  374. compute_stability: bool
  375. if the stability calculation is used
  376. """
  377. # Check spiketrains
  378. if not all([isinstance(elem, neo.SpikeTrain) for elem in spiketrains]):
  379. raise TypeError(
  380. 'spiketrains must be a list of SpikeTrains')
  381. # Check that all spiketrains have same t_start and same t_stop
  382. if not all([spiketrain.t_start == spiketrains[0].t_start
  383. for spiketrain in spiketrains]) or \
  384. not all([spiketrain.t_stop == spiketrains[0].t_stop
  385. for spiketrain in spiketrains]):
  386. raise ValueError(
  387. 'All spiketrains must have the same t_start and t_stop')
  388. # Check bin_size
  389. if not isinstance(bin_size, pq.Quantity):
  390. raise TypeError('bin_size must be a pq.Quantity')
  391. # Check winlen
  392. if not isinstance(winlen, int):
  393. raise TypeError('winlen must be an integer')
  394. # Check min_spikes
  395. if not isinstance(min_spikes, int):
  396. raise TypeError('min_spikes must be an integer')
  397. # Check min_occ
  398. if not isinstance(min_occ, int):
  399. raise TypeError('min_occ must be an integer')
  400. # Check max_spikes
  401. if not (isinstance(max_spikes, int) or max_spikes is None):
  402. raise TypeError('max_spikes must be an integer or None')
  403. # Check max_occ
  404. if not (isinstance(max_occ, int) or max_occ is None):
  405. raise TypeError('max_occ must be an integer or None')
  406. # Check min_neu
  407. if not isinstance(min_neu, int):
  408. raise TypeError('min_neu must be an integer')
  409. # Check approx_stab_pars
  410. compute_stability = False
  411. if isinstance(approx_stab_pars, dict):
  412. if 'n_subsets' in approx_stab_pars.keys() or\
  413. ('epsilon' in approx_stab_pars.keys() and
  414. 'delta' in approx_stab_pars.keys()):
  415. compute_stability = True
  416. else:
  417. raise ValueError(
  418. 'for approximate stability computation you need to '
  419. 'pass n_subsets or epsilon and delta.')
  420. # Check n_surr
  421. if not isinstance(n_surr, int):
  422. raise TypeError('n_surr must be an integer')
  423. # Check dither
  424. if not isinstance(dither, pq.Quantity):
  425. raise TypeError('dither must be a pq.Quantity')
  426. # Check spectrum
  427. if spectrum not in ('#', '3d#'):
  428. raise ValueError("spectrum must be '#' or '3d#'")
  429. # Check alpha
  430. if isinstance(alpha, (int, float)):
  431. # Check redundant use of alpha
  432. if 0. < alpha < 1. and n_surr == 0:
  433. warnings.warn('0.<alpha<1. but p-value spectrum has not been '
  434. 'computed (n_surr==0)')
  435. elif alpha is not None:
  436. raise TypeError('alpha must be an integer, a float or None')
  437. # Check stat_corr:
  438. if stat_corr not in \
  439. ('bonferroni', 'sidak', 'holm-sidak', 'holm',
  440. 'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by',
  441. 'fdr_tsbh', 'fdr_tsbky', '', 'no'):
  442. raise ValueError("Parameter stat_corr not recognized")
  443. # Check surr_method
  444. if surr_method not in surr.SURR_METHODS:
  445. raise ValueError(
  446. 'specified surr_method (=%s) not valid' % surr_method)
  447. # Check psr_param
  448. if psr_param is not None:
  449. if not isinstance(psr_param, (list, tuple)):
  450. raise TypeError('psr_param must be None or a list or tuple of '
  451. 'integer')
  452. if not all(isinstance(param, int) for param in psr_param):
  453. raise TypeError('elements of psr_param must be integers')
  454. # Check output_format
  455. if output_format not in ('concepts', 'patterns'):
  456. raise ValueError("The output_format value has to be"
  457. "'patterns' or 'concepts'")
  458. return compute_stability
  459. @deprecated_alias(binsize='bin_size')
  460. def concepts_mining(spiketrains, bin_size, winlen, min_spikes=2, min_occ=2,
  461. max_spikes=None, max_occ=None, min_neu=1, report='a'):
  462. """
  463. Find pattern candidates extracting all the concepts of the context, formed
  464. by the objects defined as all windows of length `winlen*bin_size` slided
  465. along the discretized `spiketrains` and the attributes as the spikes
  466. occurring in each of the windows. Hence, the output are all the repeated
  467. sequences of spikes with maximal length `winlen`, which are not trivially
  468. explained by the same number of occurrences of a superset of spikes.
  469. Parameters
  470. ----------
  471. spiketrains : list of neo.SpikeTrain or conv.BinnedSpikeTrain
  472. Either list of the spiketrains to analyze or
  473. BinningSpikeTrain object containing the binned spiketrains to analyze
  474. bin_size : pq.Quantity
  475. The time precision used to discretize the `spiketrains` (clipping).
  476. winlen : int
  477. The size (number of bins) of the sliding window used for the analysis.
  478. The maximal length of a pattern (delay between first and last spike) is
  479. then given by `winlen*bin_size`
  480. min_spikes : int, optional
  481. Minimum number of spikes of a sequence to be considered a pattern.
  482. Default: 2
  483. min_occ : int, optional
  484. Minimum number of occurrences of a sequence to be considered as a
  485. pattern.
  486. Default: 2
  487. max_spikes : int, optional
  488. Maximum number of spikes of a sequence to be considered a pattern. If
  489. None no maximal number of spikes is considered.
  490. Default: None
  491. max_occ : int, optional
  492. Maximum number of occurrences of a sequence to be considered as a
  493. pattern. If None, no maximal number of occurrences is considered.
  494. Default: None
  495. min_neu : int, optional
  496. Minimum number of neurons in a sequence to considered a pattern.
  497. Default: 1
  498. report : {'a', '#', '3d#'}, optional
  499. Indicates the output of the function.
  500. 'a':
  501. All the mined patterns
  502. '#':
  503. Pattern spectrum using as signature the pair:
  504. (number of spikes, number of occurrence)
  505. '3d#':
  506. Pattern spectrum using as signature the triplets:
  507. (number of spikes, number of occurrence, difference between the
  508. times of the last and the first spike of the pattern)
  509. Default: 'a'
  510. Returns
  511. -------
  512. mining_results : np.ndarray
  513. If report == 'a':
  514. Numpy array of all the pattern candidates (concepts) found in the
  515. `spiketrains`. Each pattern is represented as a tuple containing
  516. (spike IDs, discrete times (window position) of the occurrences
  517. of the pattern). The spike IDs are defined as:
  518. `spike_id=neuron_id*bin_id` with `neuron_id` in
  519. `[0, len(spiketrains)]` and `bin_id` in `[0, winlen]`.
  520. If report == '#':
  521. The pattern spectrum is represented as a numpy array of triplets
  522. (pattern size, number of occurrences, number of patterns).
  523. If report == '3d#':
  524. The pattern spectrum is represented as a numpy array of
  525. quadruplets (pattern size, number of occurrences, difference
  526. between last and first spike of the pattern, number of patterns)
  527. rel_matrix : sparse.coo_matrix
  528. A binary matrix of shape (number of windows, winlen*len(spiketrains)).
  529. Each row corresponds to a window (order
  530. according to their position in time). Each column corresponds to one
  531. bin and one neuron and it is 0 if no spikes or 1 if one or more spikes
  532. occurred in that bin for that particular neuron. For example, the entry
  533. [0,0] of this matrix corresponds to the first bin of the first window
  534. position for the first neuron, the entry `[0,winlen]` to the first
  535. bin of the first window position for the second neuron.
  536. """
  537. if report not in ('a', '#', '3d#'):
  538. raise ValueError(
  539. "report has to assume of the following values:" +
  540. " 'a', '#' and '3d#,' got {} instead".format(report))
  541. # if spiketrains is list of neo.SpikeTrain convert to conv.BinnedSpikeTrain
  542. if isinstance(spiketrains, list) and \
  543. isinstance(spiketrains[0], neo.SpikeTrain):
  544. spiketrains = conv.BinnedSpikeTrain(
  545. spiketrains, bin_size=bin_size, tolerance=None)
  546. if not isinstance(spiketrains, conv.BinnedSpikeTrain):
  547. raise TypeError(
  548. 'spiketrains must be either a list of neo.SpikeTrain or '
  549. 'a conv.BinnedSpikeTrain object')
  550. # Clipping the spiketrains and (binary matrix)
  551. binary_matrix = spiketrains.to_sparse_bool_array().tocoo(copy=False)
  552. # Computing the context and the binary matrix encoding the relation between
  553. # objects (window positions) and attributes (spikes,
  554. # indexed with a number equal to neuron idx*winlen+bin idx)
  555. context, transactions, rel_matrix = _build_context(binary_matrix, winlen)
  556. # By default, set the maximum pattern size to the maximum number of
  557. # spikes in a window
  558. if max_spikes is None:
  559. max_spikes = binary_matrix.shape[0] * winlen
  560. # By default, set maximum number of occurrences to number of non-empty
  561. # windows
  562. if max_occ is None:
  563. max_occ = int(np.sum(np.sum(rel_matrix, axis=1) > 0))
  564. # Check if fim.so available and use it
  565. if HAVE_FIM:
  566. # Return the output
  567. mining_results = _fpgrowth(
  568. transactions,
  569. rel_matrix=rel_matrix,
  570. min_c=min_occ,
  571. min_z=min_spikes,
  572. max_z=max_spikes,
  573. max_c=max_occ,
  574. winlen=winlen,
  575. min_neu=min_neu,
  576. report=report)
  577. return mining_results, rel_matrix
  578. # Otherwise use fast_fca python implementation
  579. warnings.warn(
  580. 'Optimized C implementation of FCA (fim.so/fim.pyd) not found ' +
  581. 'in elephant/spade_src folder, or not compatible with this ' +
  582. 'Python version. You are using the pure Python implementation ' +
  583. 'of fast fca.')
  584. # Return output
  585. mining_results = _fast_fca(
  586. context,
  587. min_c=min_occ,
  588. min_z=min_spikes,
  589. max_z=max_spikes,
  590. max_c=max_occ,
  591. winlen=winlen,
  592. min_neu=min_neu,
  593. report=report)
  594. return mining_results, rel_matrix
  595. def _build_context(binary_matrix, winlen):
  596. """
  597. Building the context given a matrix (number of trains x number of bins) of
  598. binned spike trains
  599. Parameters
  600. ----------
  601. binary_matrix : sparse.coo_matrix
  602. Binary matrix containing the binned spike trains
  603. winlen : int
  604. Length of the bin_size used to bin the spiketrains
  605. Returns
  606. -------
  607. context : list of tuple
  608. List of tuples containing one object (window position idx) and one of
  609. the correspondent spikes idx (bin idx * neuron idx)
  610. transactions : list
  611. List of all transactions, each element of the list contains the
  612. attributes of the corresponding object.
  613. rel_matrix : sparse.coo_matrix
  614. A binary matrix with shape (number of windows,
  615. winlen*len(spiketrains)). Each row corresponds to a window (order
  616. according to their position in time).
  617. Each column corresponds to one bin and one neuron and it is 0 if no
  618. spikes or 1 if one or more spikes occurred in that bin for that
  619. particular neuron.
  620. E.g. the entry [0,0] of this matrix corresponds to the first bin of the
  621. first window position for the first neuron, the entry [0,winlen] to the
  622. first bin of the first window position for the second neuron.
  623. """
  624. # Initialization of the outputs
  625. context = []
  626. transactions = []
  627. num_neurons, num_bins = binary_matrix.shape
  628. indices = np.argsort(binary_matrix.col)
  629. binary_matrix.row = binary_matrix.row[indices]
  630. binary_matrix.col = binary_matrix.col[indices]
  631. # out of all window positions
  632. # get all non-empty first bins
  633. unique_cols, unique_col_idx = np.unique(
  634. binary_matrix.col, return_index=True)
  635. unique_col_idx = np.concatenate((unique_col_idx, [len(binary_matrix.col)]))
  636. windows_row = []
  637. windows_col = []
  638. # all non-empty bins are starting positions for windows
  639. for idx, window_idx in enumerate(unique_cols):
  640. # find the end of the current window in unique_cols
  641. end_of_window = np.searchsorted(unique_cols, window_idx + winlen)
  642. # loop over all non-empty bins in the current window
  643. for rel_idx, col in enumerate(unique_cols[idx:end_of_window]):
  644. # get all occurrences of the current col in binary_matrix.col
  645. spike_indices_in_window = np.arange(
  646. unique_col_idx[idx + rel_idx],
  647. unique_col_idx[idx + rel_idx + 1])
  648. # get the binary_matrix.row entries matching the current col
  649. # prepare the row of rel_matrix matching the current window
  650. # spikes are indexed as (neuron_id * winlen + bin_id)
  651. windows_col.extend(
  652. binary_matrix.row[spike_indices_in_window] * winlen
  653. + (col - window_idx))
  654. windows_row.extend([window_idx] * len(spike_indices_in_window))
  655. # Shape of the rel_matrix:
  656. # (total number of bins,
  657. # number of bins in one window * number of neurons)
  658. rel_matrix = sparse.coo_matrix(
  659. (np.ones((len(windows_col)), dtype=bool),
  660. (windows_row, windows_col)),
  661. shape=(num_bins, winlen * num_neurons),
  662. dtype=bool).A
  663. # Array containing all the possible attributes (each spike is indexed by
  664. # a number equal to neu idx*winlen + bin_idx)
  665. attributes = np.array(
  666. [s * winlen + t for s in range(binary_matrix.shape[0])
  667. for t in range(winlen)])
  668. # Building context and rel_matrix
  669. # Looping all the window positions w
  670. for window in unique_cols:
  671. # spikes in the current window
  672. times = rel_matrix[window]
  673. current_transactions = attributes[times]
  674. # adding to the context the window positions and the correspondent
  675. # attributes (spike idx) (fast_fca input)
  676. context.extend(
  677. (window, transaction) for transaction in current_transactions)
  678. # appending to the transactions spike idx (fast_fca input) of the
  679. # current window (fpgrowth input)
  680. transactions.append(list(current_transactions))
  681. # Return context and rel_matrix
  682. return context, transactions, rel_matrix
  683. def _fpgrowth(transactions, min_c=2, min_z=2, max_z=None,
  684. max_c=None, rel_matrix=None, winlen=1, min_neu=1,
  685. target='c', report='a'):
  686. """
  687. Find frequent item sets with the fpgrowth algorithm.
  688. Parameters
  689. ----------
  690. transactions: tuple
  691. Transactions database to mine.
  692. The database must be an iterable of transactions;
  693. each transaction must be an iterable of items;
  694. each item must be a hashable object.
  695. If the database is a dictionary, the transactions are
  696. the keys, the values their (integer) multiplicities.
  697. target: str
  698. type of frequent item sets to find
  699. s/a sets/all all frequent item sets
  700. c closed closed frequent item sets
  701. m maximal maximal frequent item sets
  702. g gens generators
  703. Default:'c'
  704. min_c: int
  705. minimum support of an item set
  706. Default: 2
  707. min_z: int
  708. minimum number of items per item set
  709. Default: 2
  710. max_z: None/int
  711. maximum number of items per item set. If max_c==None no maximal
  712. size required
  713. Default: None
  714. max_c: None/int
  715. maximum support per item set. If max_c==None no maximal
  716. support required
  717. Default: None
  718. report: str
  719. 'a': all the mined patterns
  720. '#': pattern spectrum using as signature the pair:
  721. (number of spikes, number of occurrence)
  722. '3d#': pattern spectrum using as signature the triplets:
  723. (number of spikes, number of occurrence, difference between the
  724. times of the last and the first spike of the pattern)
  725. Default: 'a'
  726. rel_matrix : None or sparse.coo_matrix
  727. A binary matrix with shape (number of windows,
  728. winlen*len(spiketrains)). Each row corresponds to a window (order
  729. according to their position in time).
  730. Each column corresponds to one bin and one neuron and it is 0 if no
  731. spikes or 1 if one or more spikes occurred in that bin for that
  732. particular neuron.
  733. E.g. the entry [0,0] of this matrix corresponds to the first bin of the
  734. first window position for the first neuron, the entry [0,winlen] to the
  735. first bin of the first window position for the second neuron.
  736. If == None only the closed frequent itemsets (intent) are returned and
  737. not which the index of their occurrences (extent)
  738. Default: None
  739. The following parameters are specific to Massive parallel SpikeTrains
  740. winlen: int
  741. The size (number of bins) of the sliding window used for the
  742. analysis. The maximal length of a pattern (delay between first and
  743. last spike) is then given by winlen*bin_size
  744. Default: 1
  745. min_neu: int
  746. Minimum number of neurons in a sequence to considered a
  747. potential pattern.
  748. Default: 1
  749. Returns
  750. -------
  751. If report == 'a':
  752. numpy array of all the pattern candidates (concepts) found in the
  753. spiketrains.
  754. Each pattern is represented as a tuple containing
  755. (spike IDs, discrete times (window position)
  756. of the occurrences of the pattern). The spike IDs are defined as:
  757. spike_id=neuron_id*bin_id; with neuron_id in [0, len(spiketrains)] and
  758. bin_id in [0, winlen].
  759. If report == '#':
  760. The pattern spectrum is represented as a numpy array of triplets each
  761. formed by:
  762. (pattern size, number of occurrences, number of patterns)
  763. If report == '3d#':
  764. The pattern spectrum is represented as a numpy array of quadruplets
  765. each formed by:
  766. (pattern size, number of occurrences, difference between last
  767. and first spike of the pattern, number of patterns)
  768. """
  769. if min_neu < 1:
  770. raise ValueError('min_neu must be an integer >=1')
  771. # By default, set the maximum pattern size to the number of spiketrains
  772. if max_z is None:
  773. max_z = max(max(map(len, transactions)), min_z + 1)
  774. # By default set maximum number of data to number of bins
  775. if max_c is None:
  776. max_c = len(transactions)
  777. # Initializing outputs
  778. concepts = []
  779. if report == '#':
  780. spec_matrix = np.zeros((max_z + 1, max_c + 1))
  781. if report == '3d#':
  782. spec_matrix = np.zeros((max_z + 1, max_c + 1, winlen))
  783. spectrum = []
  784. # check whether all transactions are identical
  785. # in that case FIM would not find anything,
  786. # so we need to create the output manually
  787. # for optimal performance,
  788. # we do the check sequentially and immediately break
  789. # once we find a second unique transaction
  790. first_transaction = transactions[0]
  791. for transaction in transactions[1:]:
  792. if transaction != first_transaction:
  793. # Mining the spiketrains with fpgrowth algorithm
  794. fpgrowth_output = fim.fpgrowth(
  795. tracts=transactions,
  796. target=target,
  797. supp=-min_c,
  798. zmin=min_z,
  799. zmax=max_z,
  800. report='a',
  801. algo='s')
  802. break
  803. else:
  804. fpgrowth_output = [(tuple(transactions[0]), len(transactions))]
  805. # Applying min/max conditions and computing extent (window positions)
  806. fpgrowth_output = [concept for concept in fpgrowth_output
  807. if _fpgrowth_filter(concept, winlen, max_c, min_neu)]
  808. # filter out subsets of patterns that are found as a side-effect
  809. # of using the moving window strategy
  810. fpgrowth_output = _filter_for_moving_window_subsets(
  811. fpgrowth_output, winlen)
  812. for (intent, supp) in fpgrowth_output:
  813. if report == 'a':
  814. if rel_matrix is not None:
  815. # Computing the extent of the concept (patterns
  816. # occurrences), checking in rel_matrix in which windows
  817. # the intent occurred
  818. extent = tuple(
  819. np.nonzero(
  820. np.all(rel_matrix[:, intent], axis=1)
  821. )[0])
  822. concepts.append((intent, extent))
  823. # Computing 2d spectrum
  824. elif report == '#':
  825. spec_matrix[len(intent) - 1, supp - 1] += 1
  826. # Computing 3d spectrum
  827. elif report == '3d#':
  828. spec_matrix[len(intent) - 1, supp - 1, max(
  829. np.array(intent) % winlen)] += 1
  830. del fpgrowth_output
  831. if report == 'a':
  832. return concepts
  833. if report == '#':
  834. for (size, occurrences) in np.transpose(np.where(spec_matrix != 0)):
  835. spectrum.append(
  836. (size + 1, occurrences + 1,
  837. int(spec_matrix[size, occurrences])))
  838. elif report == '3d#':
  839. for (size, occurrences, duration) in\
  840. np.transpose(np.where(spec_matrix != 0)):
  841. spectrum.append(
  842. (size + 1, occurrences + 1, duration,
  843. int(spec_matrix[size, occurrences, duration])))
  844. del spec_matrix
  845. if len(spectrum) > 0:
  846. spectrum = np.array(spectrum)
  847. elif report == '#':
  848. spectrum = np.zeros(shape=(0, 3))
  849. elif report == '3d#':
  850. spectrum = np.zeros(shape=(0, 4))
  851. return spectrum
  852. def _fpgrowth_filter(concept, winlen, max_c, min_neu):
  853. """
  854. Filter for selecting closed frequent items set with a minimum number of
  855. neurons and a maximum number of occurrences and first spike in the first
  856. bin position
  857. """
  858. intent = np.array(concept[0])
  859. keep_concept = (min(intent % winlen) == 0
  860. and concept[1] <= max_c
  861. and np.unique(intent // winlen).shape[0] >= min_neu
  862. )
  863. return keep_concept
  864. def _rereference_to_last_spike(transactions, winlen):
  865. """
  866. Converts transactions from the default format
  867. neu_idx * winlen + bin_idx (relative to window start)
  868. into the format
  869. neu_idx * winlen + bin_idx (relative to last spike)
  870. """
  871. len_transactions = len(transactions)
  872. neurons = np.zeros(len_transactions, dtype=int)
  873. bins = np.zeros(len_transactions, dtype=int)
  874. # extract neuron and bin indices
  875. for idx, attribute in enumerate(transactions):
  876. neurons[idx] = attribute // winlen
  877. bins[idx] = attribute % winlen
  878. # rereference bins to last spike
  879. bins = bins.max() - bins
  880. # calculate converted transactions
  881. converted_transactions = neurons * winlen + bins
  882. return converted_transactions
  883. def _filter_for_moving_window_subsets(concepts, winlen):
  884. """
  885. Since we're using a moving window subpatterns starting from
  886. subsequent spikes after the first pattern spike will also be found.
  887. This filter removes them if they do not occur on their own in
  888. addition to the occurrences explained by their superset.
  889. Uses a reverse map with a set representation.
  890. """
  891. # don't do anything if the input list is empty
  892. if len(concepts) == 0:
  893. return concepts
  894. # don't do anything if winlen is one
  895. if winlen == 1:
  896. return concepts
  897. if hasattr(concepts[0], 'intent'):
  898. # fca format
  899. # sort the concepts by (decreasing) support
  900. concepts.sort(key=lambda c: -len(c.extent))
  901. support = np.array([len(c.extent) for c in concepts])
  902. # convert transactions relative to last pattern spike
  903. converted_transactions = [_rereference_to_last_spike(concept.intent,
  904. winlen=winlen)
  905. for concept in concepts]
  906. else:
  907. # fim.fpgrowth format
  908. # sort the concepts by (decreasing) support
  909. concepts.sort(key=lambda concept: -concept[1])
  910. support = np.array([concept[1] for concept in concepts])
  911. # convert transactions relative to last pattern spike
  912. converted_transactions = [_rereference_to_last_spike(concept[0],
  913. winlen=winlen)
  914. for concept in concepts]
  915. output = []
  916. for current_support in np.unique(support):
  917. support_indices = np.nonzero(support == current_support)[0]
  918. # construct reverse map
  919. reverse_map = defaultdict(set)
  920. for map_idx, i in enumerate(support_indices):
  921. for window_bin in converted_transactions[i]:
  922. reverse_map[window_bin].add(map_idx)
  923. for i in support_indices:
  924. intersection = reduce(
  925. operator.and_,
  926. (reverse_map[window_bin]
  927. for window_bin in converted_transactions[i]))
  928. if len(intersection) == 1:
  929. output.append(concepts[i])
  930. return output
  931. def _fast_fca(context, min_c=2, min_z=2, max_z=None,
  932. max_c=None, report='a', winlen=1, min_neu=1):
  933. """
  934. Find concepts of the context with the fast-fca algorithm.
  935. Parameters
  936. ----------
  937. context : list
  938. List of tuples containing one object and one the correspondent
  939. attribute
  940. min_c: int
  941. minimum support of an item set
  942. Default: 2
  943. min_z: int
  944. minimum number of items per item set
  945. Default: 2
  946. max_z: None/int
  947. maximum number of items per item set. If max_c==None no maximal
  948. size required
  949. Default: None
  950. max_c: None/int
  951. maximum support per item set. If max_c==None no maximal
  952. support required
  953. Default: None
  954. report: str
  955. 'a': all the mined patterns
  956. '#': pattern spectrum using as signature the pair:
  957. (number of spikes, number of occurrence)
  958. '3d#': pattern spectrum using as signature the triplets:
  959. (number of spikes, number of occurrence, difference between the
  960. times of the last and the first spike of the pattern)
  961. Default: 'a'
  962. The following parameters are specific to Massive parallel SpikeTrains
  963. winlen: int
  964. The size (number of bins) of the sliding window used for the
  965. analysis. The maximal length of a pattern (delay between first and
  966. last spike) is then given by winlen*bin_size
  967. Default: 1
  968. min_neu: int
  969. Minimum number of neurons in a sequence to considered a
  970. potential pattern.
  971. Default: 1
  972. Returns
  973. -------
  974. If report == 'a':
  975. All the pattern candidates (concepts) found in the spiketrains. Each
  976. pattern is represented as a tuple containing
  977. (spike IDs, discrete times (window position)
  978. of the occurrences of the pattern). The spike IDs are defined as:
  979. spike_id=neuron_id*bin_id; with neuron_id in [0, len(spiketrains)] and
  980. bin_id in [0, winlen].
  981. If report == '#':
  982. The pattern spectrum is represented as a list of triplets each
  983. formed by:
  984. (pattern size, number of occurrences, number of patterns)
  985. If report == '3d#':
  986. The pattern spectrum is represented as a list of quadruplets each
  987. formed by:
  988. (pattern size, number of occurrences, difference between last
  989. and first spike of the pattern, number of patterns)
  990. """
  991. # Initializing outputs
  992. concepts = []
  993. # Check parameters
  994. if min_neu < 1:
  995. raise ValueError('min_neu must be an integer >=1')
  996. # By default set maximum number of attributes
  997. if max_z is None:
  998. max_z = len(context)
  999. # By default set maximum number of data to number of bins
  1000. if max_c is None:
  1001. max_c = len(context)
  1002. if report == '#':
  1003. spec_matrix = np.zeros((max_z, max_c))
  1004. elif report == '3d#':
  1005. spec_matrix = np.zeros((max_z, max_c, winlen))
  1006. spectrum = []
  1007. # Mining the spiketrains with fast fca algorithm
  1008. fca_out = fast_fca.FormalConcepts(context)
  1009. fca_out.computeLattice()
  1010. fca_concepts = fca_out.concepts
  1011. fca_concepts = [concept for concept in fca_concepts
  1012. if _fca_filter(concept, winlen, min_c, min_z, max_c, max_z,
  1013. min_neu)]
  1014. fca_concepts = _filter_for_moving_window_subsets(fca_concepts, winlen)
  1015. # Applying min/max conditions
  1016. for fca_concept in fca_concepts:
  1017. intent = tuple(fca_concept.intent)
  1018. extent = tuple(fca_concept.extent)
  1019. concepts.append((intent, extent))
  1020. # computing spectrum
  1021. if report == '#':
  1022. spec_matrix[len(intent) - 1, len(extent) - 1] += 1
  1023. elif report == '3d#':
  1024. spec_matrix[len(intent) - 1, len(extent) - 1, max(
  1025. np.array(intent) % winlen)] += 1
  1026. if report == 'a':
  1027. return concepts
  1028. del concepts
  1029. # returning spectrum
  1030. if report == '#':
  1031. for (size, occurrence) in np.transpose(np.where(spec_matrix != 0)):
  1032. spectrum.append(
  1033. (size + 1, occurrence + 1, int(spec_matrix[size, occurrence])))
  1034. if report == '3d#':
  1035. for (size, occurrence, duration) in\
  1036. np.transpose(np.where(spec_matrix != 0)):
  1037. spectrum.append(
  1038. (size + 1, occurrence + 1, duration,
  1039. int(spec_matrix[size, occurrence, duration])))
  1040. del spec_matrix
  1041. if len(spectrum) > 0:
  1042. spectrum = np.array(spectrum)
  1043. elif report == '#':
  1044. spectrum = np.zeros(shape=(0, 3))
  1045. elif report == '3d#':
  1046. spectrum = np.zeros(shape=(0, 4))
  1047. return spectrum
  1048. def _fca_filter(concept, winlen, min_c, min_z, max_c, max_z, min_neu):
  1049. """
  1050. Filter to select concepts with minimum/maximum number of spikes and
  1051. occurrences and first spike in the first bin position
  1052. """
  1053. intent = tuple(concept.intent)
  1054. extent = tuple(concept.extent)
  1055. keep_concepts = \
  1056. min_z <= len(intent) <= max_z and \
  1057. min_c <= len(extent) <= max_c and \
  1058. len(np.unique(np.array(intent) // winlen)) >= min_neu and \
  1059. min(np.array(intent) % winlen) == 0
  1060. return keep_concepts
  1061. @deprecated_alias(binsize='bin_size')
  1062. def pvalue_spectrum(
  1063. spiketrains, bin_size, winlen, dither, n_surr, min_spikes=2, min_occ=2,
  1064. max_spikes=None, max_occ=None, min_neu=1, spectrum='#',
  1065. surr_method='dither_spikes', **surr_kwargs):
  1066. """
  1067. Compute the p-value spectrum of pattern signatures extracted from
  1068. surrogates of parallel spike trains, under the null hypothesis of
  1069. independent spiking.
  1070. * n_surr surrogates are obtained from each spike train by spike dithering
  1071. * pattern candidates (concepts) are collected from each surrogate data
  1072. * the signatures (number of spikes, number of occurrences) of all patterns
  1073. are computed, and their occurrence probability estimated by their
  1074. occurrence frequency (p-value spectrum)
  1075. Parameters
  1076. ----------
  1077. spiketrains : list of neo.SpikeTrain
  1078. List containing the parallel spike trains to analyze
  1079. bin_size : pq.Quantity
  1080. The time precision used to discretize the `spiketrains` (binning).
  1081. winlen : int
  1082. The size (number of bins) of the sliding window used for the analysis.
  1083. The maximal length of a pattern (delay between first and last spike) is
  1084. then given by `winlen*bin_size`
  1085. dither : pq.Quantity
  1086. Amount of spike time dithering for creating the surrogates for
  1087. filtering the pattern spectrum. A spike at time t is placed randomly
  1088. within `[t-dither, t+dither]` (see also
  1089. :func:`elephant.spike_train_surrogates.dither_spikes`).
  1090. Default: 15*pq.s
  1091. n_surr : int
  1092. Number of surrogates to generate to compute the p-value spectrum.
  1093. This number should be large (`n_surr>=1000` is recommended for 100
  1094. spike trains in spiketrains). If `n_surr` is 0, then the p-value
  1095. spectrum is not computed.
  1096. Default: 0
  1097. min_spikes : int, optional
  1098. Minimum number of spikes of a sequence to be considered a pattern.
  1099. Default: 2
  1100. min_occ : int, optional
  1101. Minimum number of occurrences of a sequence to be considered as a
  1102. pattern.
  1103. Default: 2
  1104. max_spikes : int, optional
  1105. Maximum number of spikes of a sequence to be considered a pattern. If
  1106. None no maximal number of spikes is considered.
  1107. Default: None
  1108. max_occ : int, optional
  1109. Maximum number of occurrences of a sequence to be considered as a
  1110. pattern. If None, no maximal number of occurrences is considered.
  1111. Default: None
  1112. min_neu : int, optional
  1113. Minimum number of neurons in a sequence to considered a pattern.
  1114. Default: 1
  1115. spectrum : {'#', '3d#'}, optional
  1116. Defines the signature of the patterns.
  1117. '#': pattern spectrum using the as signature the pair:
  1118. (number of spikes, number of occurrence)
  1119. '3d#': pattern spectrum using the as signature the triplets:
  1120. (number of spikes, number of occurrence, difference between last
  1121. and first spike of the pattern)
  1122. Default: '#'
  1123. surr_method : str
  1124. Method that is used to generate the surrogates. You can use every
  1125. method defined in
  1126. :func:`elephant.spike_train_surrogates.dither_spikes`.
  1127. Default: 'dither_spikes'
  1128. surr_kwargs
  1129. Keyword arguments that are passed to the surrogate methods.
  1130. Returns
  1131. -------
  1132. pv_spec : list
  1133. if spectrum == '#':
  1134. A list of triplets (z,c,p), where (z,c) is a pattern signature
  1135. and p is the corresponding p-value (fraction of surrogates
  1136. containing signatures (z*,c*)>=(z,c)).
  1137. if spectrum == '3d#':
  1138. A list of triplets (z,c,l,p), where (z,c,l) is a pattern signature
  1139. and p is the corresponding p-value (fraction of surrogates
  1140. containing signatures (z*,c*,l*)>=(z,c,l)).
  1141. Signatures whose empirical p-value is 0 are not listed.
  1142. """
  1143. # Initializing variables for parallel computing
  1144. if HAVE_MPI: # pragma: no cover
  1145. comm = MPI.COMM_WORLD # create MPI communicator
  1146. rank = comm.Get_rank() # get rank of current MPI task
  1147. size = comm.Get_size() # get tot number of MPI tasks
  1148. else:
  1149. rank = 0
  1150. size = 1
  1151. # Check on number of surrogates
  1152. if n_surr <= 0:
  1153. raise ValueError('n_surr has to be >0')
  1154. if surr_method not in surr.SURR_METHODS:
  1155. raise ValueError(
  1156. 'specified surr_method (=%s) not valid' % surr_method)
  1157. if spectrum not in ('#', '3d#'):
  1158. raise ValueError("Invalid spectrum: '{}'".format(spectrum))
  1159. len_partition = n_surr // size # length of each MPI task
  1160. len_remainder = n_surr % size
  1161. add_remainder = rank < len_remainder
  1162. if max_spikes is None:
  1163. # if max_spikes not defined, set it to the number of spiketrains times
  1164. # number of bins per window.
  1165. max_spikes = len(spiketrains) * winlen
  1166. if spectrum == '#':
  1167. max_occs = np.empty(shape=(len_partition + add_remainder,
  1168. max_spikes - min_spikes + 1),
  1169. dtype=np.uint16)
  1170. else: # spectrum == '3d#':
  1171. max_occs = np.empty(shape=(len_partition + add_remainder,
  1172. max_spikes - min_spikes + 1, winlen),
  1173. dtype=np.uint16)
  1174. for surr_id, binned_surrogates in _generate_binned_surrogates(
  1175. spiketrains, bin_size=bin_size, dither=dither,
  1176. surr_method=surr_method, n_surrogates=len_partition+add_remainder,
  1177. **surr_kwargs):
  1178. # Find all pattern signatures in the current surrogate data set
  1179. surr_concepts = concepts_mining(
  1180. binned_surrogates, bin_size, winlen, min_spikes=min_spikes,
  1181. max_spikes=max_spikes, min_occ=min_occ, max_occ=max_occ,
  1182. min_neu=min_neu, report=spectrum)[0]
  1183. # The last entry of the signature is the number of times the
  1184. # signature appeared. This entry is not needed here.
  1185. surr_concepts = surr_concepts[:, :-1]
  1186. max_occs[surr_id] = _get_max_occ(
  1187. surr_concepts, min_spikes, max_spikes, winlen, spectrum)
  1188. # Collecting results on the first PCU
  1189. if size != 1:
  1190. max_occs = comm.gather(max_occs, root=0)
  1191. if rank != 0: # pragma: no cover
  1192. return []
  1193. # The gather operator gives a list out. This is rearranged as a 2 resp.
  1194. # 3 dimensional numpy-array.
  1195. max_occs = np.vstack(max_occs)
  1196. # Compute the p-value spectrum, and return it
  1197. return _get_pvalue_spec(max_occs, min_spikes, max_spikes, min_occ,
  1198. n_surr, winlen, spectrum)
  1199. def _generate_binned_surrogates(
  1200. spiketrains, bin_size, dither, surr_method, n_surrogates,
  1201. **surr_kwargs):
  1202. if surr_method == 'bin_shuffling':
  1203. binned_spiketrains = [
  1204. conv.BinnedSpikeTrain(
  1205. spiketrain, bin_size=bin_size, tolerance=None)
  1206. for spiketrain in spiketrains]
  1207. max_displacement = int(dither.rescale(pq.ms).magnitude /
  1208. bin_size.rescale(pq.ms).magnitude)
  1209. elif surr_method in ('joint_isi_dithering', 'isi_dithering'):
  1210. isi_dithering = surr_method == 'isi_dithering'
  1211. joint_isi_instances = \
  1212. [surr.JointISI(spiketrain, dither=dither,
  1213. isi_dithering=isi_dithering, **surr_kwargs)
  1214. for spiketrain in spiketrains]
  1215. for surr_id in range(n_surrogates):
  1216. if surr_method == 'bin_shuffling':
  1217. binned_surrogates = \
  1218. [surr.bin_shuffling(binned_spiketrain,
  1219. max_displacement=max_displacement,
  1220. **surr_kwargs)[0]
  1221. for binned_spiketrain in binned_spiketrains]
  1222. binned_surrogates = np.array(
  1223. [binned_surrogate.to_bool_array()[0]
  1224. for binned_surrogate in binned_surrogates])
  1225. binned_surrogates = conv.BinnedSpikeTrain(
  1226. binned_surrogates,
  1227. bin_size=bin_size,
  1228. t_start=spiketrains[0].t_start,
  1229. t_stop=spiketrains[0].t_stop)
  1230. elif surr_method in ('joint_isi_dithering', 'isi_dithering'):
  1231. surrs = [instance.dithering()[0]
  1232. for instance in joint_isi_instances]
  1233. elif surr_method == 'dither_spikes_with_refractory_period':
  1234. # The initial refractory period is set to the bin size in order to
  1235. # prevent that spikes fall into the same bin, if the spike trains
  1236. # are sparse (min(ISI)>bin size).
  1237. surrs = \
  1238. [surr.dither_spikes(
  1239. spiketrain, dither=dither, n_surrogates=1,
  1240. refractory_period=bin_size, **surr_kwargs)[0]
  1241. for spiketrain in spiketrains]
  1242. else:
  1243. surrs = \
  1244. [surr.surrogates(
  1245. spiketrain, n_surrogates=1, method=surr_method,
  1246. dt=dither, **surr_kwargs)[0]
  1247. for spiketrain in spiketrains]
  1248. if surr_method != 'bin_shuffling':
  1249. binned_surrogates = conv.BinnedSpikeTrain(
  1250. surrs, bin_size=bin_size, tolerance=None)
  1251. yield surr_id, binned_surrogates
  1252. def _get_pvalue_spec(max_occs, min_spikes, max_spikes, min_occ, n_surr, winlen,
  1253. spectrum):
  1254. """
  1255. This function converts the list of maximal occurrences into the
  1256. corresponding p-value spectrum.
  1257. Parameters
  1258. ----------
  1259. max_occs: np.ndarray
  1260. min_spikes: int
  1261. max_spikes: int
  1262. min_occ: int
  1263. n_surr: int
  1264. winlen: int
  1265. spectrum: {'#', '3d#'}
  1266. Returns
  1267. -------
  1268. if spectrum == '#':
  1269. List[List]:
  1270. each entry has the form: [pattern_size, pattern_occ, p_value]
  1271. if spectrum == '3d#':
  1272. List[List]:
  1273. each entry has the form:
  1274. [pattern_size, pattern_occ, pattern_dur, p_value]
  1275. """
  1276. if spectrum not in ('#', '3d#'):
  1277. raise ValueError("Invalid spectrum: '{}'".format(spectrum))
  1278. pv_spec = []
  1279. if spectrum == '#':
  1280. max_occs = np.expand_dims(max_occs, axis=2)
  1281. winlen = 1
  1282. for size_id, pt_size in enumerate(range(min_spikes, max_spikes + 1)):
  1283. for dur in range(winlen):
  1284. max_occs_size_dur = max_occs[:, size_id, dur]
  1285. counts, occs = np.histogram(
  1286. max_occs_size_dur,
  1287. bins=np.arange(min_occ,
  1288. np.max(max_occs_size_dur) + 2))
  1289. occs = occs[:-1].astype(np.uint16)
  1290. pvalues = np.cumsum(counts[::-1])[::-1] / n_surr
  1291. for occ_id, occ in enumerate(occs):
  1292. if spectrum == '#':
  1293. pv_spec.append([pt_size, occ, pvalues[occ_id]])
  1294. else: # spectrum == '3d#':
  1295. pv_spec.append([pt_size, occ, dur, pvalues[occ_id]])
  1296. return pv_spec
  1297. def _get_max_occ(surr_concepts, min_spikes, max_spikes, winlen, spectrum):
  1298. """
  1299. This function takes from a list of surrogate_concepts those concepts which
  1300. have the highest occurrence for a given pattern size and duration.
  1301. Parameters
  1302. ----------
  1303. surr_concepts: List[List]
  1304. min_spikes: int
  1305. max_spikes: int
  1306. winlen: int
  1307. spectrum: {'#', '3d#'}
  1308. Returns
  1309. -------
  1310. np.ndarray
  1311. Two-dimensional array. Each element corresponds to a highest occurrence
  1312. for a specific pattern size (which range from min_spikes to max_spikes)
  1313. and pattern duration (which range from 0 to winlen-1).
  1314. The first axis corresponds to the pattern size the second to the
  1315. duration.
  1316. """
  1317. if spectrum == '#':
  1318. winlen = 1
  1319. max_occ = np.zeros(shape=(max_spikes - min_spikes + 1, winlen))
  1320. for size_id, pt_size in enumerate(range(min_spikes, max_spikes + 1)):
  1321. concepts_for_size = surr_concepts[
  1322. surr_concepts[:, 0] == pt_size][:, 1:]
  1323. for dur in range(winlen):
  1324. if spectrum == '#':
  1325. occs = concepts_for_size[:, 0]
  1326. else: # spectrum == '3d#':
  1327. occs = concepts_for_size[concepts_for_size[:, 1] == dur][:, 0]
  1328. max_occ[size_id, dur] = np.max(occs, initial=0)
  1329. for pt_size in range(max_spikes - 1, min_spikes - 1, -1):
  1330. size_id = pt_size - min_spikes
  1331. max_occ[size_id] = np.max(max_occ[size_id:size_id + 2], axis=0)
  1332. if spectrum == '#':
  1333. max_occ = np.squeeze(max_occ, axis=1)
  1334. return max_occ
  1335. def _stability_filter(concept, stability_thresh):
  1336. """Criteria by which to filter concepts from the lattice"""
  1337. # stabilities larger then stability_thresh
  1338. keep_concept = \
  1339. concept[2] > stability_thresh[0]\
  1340. or concept[3] > stability_thresh[1]
  1341. return keep_concept
  1342. def _mask_pvalue_spectrum(pv_spec, concepts, spectrum, winlen):
  1343. """
  1344. The function filters the pvalue spectrum based on the number of
  1345. the statistical tests to be done. Only the entries of the pvalue spectrum
  1346. that coincide with concepts found in the original data are kept.
  1347. Moreover, entries of the pvalue spectrum with a value of 1 (all surrogates
  1348. datasets containing at least one mined pattern with that signature)
  1349. are discarded as well and considered trivial.
  1350. Parameters
  1351. ----------
  1352. pv_spec: List[List]
  1353. concepts: List[Tuple]
  1354. spectrum: {'#', '3d#'}
  1355. winlen: int
  1356. Returns
  1357. -------
  1358. mask : np.array
  1359. An array of boolean values, indicating if a signature of p-value
  1360. spectrum is also in the mined concepts of the original data.
  1361. """
  1362. if spectrum == '#':
  1363. signatures = {(len(concept[0]), len(concept[1]))
  1364. for concept in concepts}
  1365. else: # spectrum == '3d#':
  1366. # third entry of signatures is the duration, fixed as the maximum lag
  1367. signatures = {(len(concept[0]), len(concept[1]),
  1368. max(np.array(concept[0]) % winlen))
  1369. for concept in concepts}
  1370. mask = np.zeros(len(pv_spec), dtype=bool)
  1371. for index, pv_entry in enumerate(pv_spec):
  1372. if tuple(pv_entry[:-1]) in signatures \
  1373. and not np.isclose(pv_entry[-1], [1]):
  1374. # select the highest number of occurrences for size and duration
  1375. mask[index] = True
  1376. if mask[index-1]:
  1377. if spectrum == '#':
  1378. size = pv_spec[index][0]
  1379. prev_size = pv_spec[index-1][0]
  1380. if prev_size == size:
  1381. mask[index-1] = False
  1382. else:
  1383. size, duration = pv_spec[index][[0, 2]]
  1384. prev_size, prev_duration = pv_spec[index-1][[0, 2]]
  1385. if prev_size == size and duration == prev_duration:
  1386. mask[index-1] = False
  1387. return mask
  1388. def test_signature_significance(pv_spec, concepts, alpha, winlen,
  1389. corr='fdr_bh', report='spectrum',
  1390. spectrum='#'):
  1391. """
  1392. Compute the significance spectrum of a pattern spectrum.
  1393. Given pvalue_spectrum `pv_spec` as a list of triplets (z,c,p), where z is
  1394. pattern size, c is pattern support and p is the p-value of the signature
  1395. (z,c), this routine assesses the significance of (z,c) using the
  1396. confidence level alpha.
  1397. Bonferroni or FDR statistical corrections can be applied.
  1398. Parameters
  1399. ----------
  1400. pv_spec : list
  1401. A list of triplets (z,c,p), where z is pattern size, c is pattern
  1402. support and p is the p-value of signature (z,c)
  1403. concepts : list of tuple
  1404. Output of the concepts mining for the original data.
  1405. alpha : float
  1406. Significance level of the statistical test
  1407. winlen : int
  1408. Size (number of bins) of the sliding window used for the analysis
  1409. corr : str, optional
  1410. Method used for testing and adjustment of pvalues.
  1411. Can be either the full name or initial letters.
  1412. Available methods are:
  1413. 'bonferroni' : one-step correction
  1414. 'sidak' : one-step correction
  1415. 'holm-sidak' : step down method using Sidak adjustments
  1416. 'holm' : step-down method using Bonferroni adjustments
  1417. 'simes-hochberg' : step-up method (independent)
  1418. 'hommel' : closed method based on Simes tests (non-negative)
  1419. 'fdr_bh' : Benjamini/Hochberg (non-negative)
  1420. 'fdr_by' : Benjamini/Yekutieli (negative)
  1421. 'fdr_tsbh' : two stage fdr correction (non-negative)
  1422. 'fdr_tsbky' : two stage fdr correction (non-negative)
  1423. '' or 'no': no statistical correction
  1424. For further description see:
  1425. https://www.statsmodels.org/stable/generated/statsmodels.stats.multitest.multipletests.html
  1426. Default: 'fdr_bh'
  1427. report : {'spectrum', 'significant', 'non_significant'}, optional
  1428. Format to be returned for the significance spectrum:
  1429. 'spectrum': list of triplets (z,c,b), where b is a boolean specifying
  1430. whether signature (z,c) is significant (True) or not
  1431. (False)
  1432. 'significant': list containing only the significant signatures (z,c) of
  1433. pvalue_spectrum
  1434. 'non_significant': list containing only the non-significant signatures
  1435. spectrum : {'#', '3d#'}, optional
  1436. Defines the signature of the patterns.
  1437. '#': pattern spectrum using the as signature the pair:
  1438. (number of spikes, number of occurrence)
  1439. '3d#': pattern spectrum using the as signature the triplets:
  1440. (number of spikes, number of occurrence, difference between last
  1441. and first spike of the pattern)
  1442. Default: '#'
  1443. Returns
  1444. -------
  1445. sig_spectrum : list
  1446. Significant signatures of pvalue_spectrum, in the format specified
  1447. by `report`
  1448. """
  1449. # If alpha == 1 all signatures are significant
  1450. if alpha == 1:
  1451. return []
  1452. if spectrum not in ('#', '3d#'):
  1453. raise ValueError("spectrum must be either '#' or '3d#', "
  1454. "got {} instead".format(spectrum))
  1455. if report not in ('spectrum', 'significant', 'non_significant'):
  1456. raise ValueError("report must be either 'spectrum'," +
  1457. " 'significant' or 'non_significant'," +
  1458. "got {} instead".format(report))
  1459. if corr not in ('bonferroni', 'sidak', 'holm-sidak', 'holm',
  1460. 'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by',
  1461. 'fdr_tsbh', 'fdr_tsbky', '', 'no'):
  1462. raise ValueError("Parameter corr not recognized")
  1463. pv_spec = np.array(pv_spec)
  1464. mask = _mask_pvalue_spectrum(pv_spec, concepts, spectrum, winlen)
  1465. pvalues = pv_spec[:, -1]
  1466. pvalues_totest = pvalues[mask]
  1467. # Initialize test array to False
  1468. tests = [False] * len(pvalues)
  1469. if len(pvalues_totest) > 0:
  1470. # Compute significance for only the non trivial tests
  1471. if corr in ['', 'no']: # ...without statistical correction
  1472. tests_selected = pvalues_totest <= alpha
  1473. else:
  1474. try:
  1475. import statsmodels.stats.multitest as sm
  1476. except ModuleNotFoundError:
  1477. raise ModuleNotFoundError(
  1478. "Please run 'pip install statsmodels' if you "
  1479. "want to use multiple testing correction")
  1480. tests_selected = sm.multipletests(pvalues_totest, alpha=alpha,
  1481. method=corr)[0]
  1482. # assign each corrected pvalue to its corresponding entry
  1483. # this breaks
  1484. for index, value in zip(mask.nonzero()[0], tests_selected):
  1485. tests[index] = value
  1486. # Return the specified results:
  1487. if spectrum == '#':
  1488. if report == 'spectrum':
  1489. sig_spectrum = [(size, occ, test)
  1490. for (size, occ, pv), test in zip(pv_spec, tests)]
  1491. elif report == 'significant':
  1492. sig_spectrum = [(size, occ) for ((size, occ, pv), test)
  1493. in zip(pv_spec, tests) if test]
  1494. else: # report == 'non_significant'
  1495. sig_spectrum = [(size, occ)
  1496. for ((size, occ, pv), test) in zip(pv_spec, tests)
  1497. if not test]
  1498. else: # spectrum == '3d#'
  1499. if report == 'spectrum':
  1500. sig_spectrum =\
  1501. [(size, occ, l, test)
  1502. for (size, occ, l, pv), test in zip(pv_spec, tests)]
  1503. elif report == 'significant':
  1504. sig_spectrum = [(size, occ, l) for ((size, occ, l, pv), test)
  1505. in zip(pv_spec, tests) if test]
  1506. else: # report == 'non_significant'
  1507. sig_spectrum =\
  1508. [(size, occ, l)
  1509. for ((size, occ, l, pv), test) in zip(pv_spec, tests)
  1510. if not test]
  1511. return sig_spectrum
  1512. def _pattern_spectrum_filter(concept, ns_signatures, spectrum, winlen):
  1513. """
  1514. Filter for significant concepts
  1515. """
  1516. if spectrum == '#':
  1517. keep_concept = (len(concept[0]), len(concept[1])) not in ns_signatures
  1518. else: # spectrum == '3d#':
  1519. # duration is fixed as the maximum lag
  1520. duration = max(np.array(concept[0]) % winlen)
  1521. keep_concept = (len(concept[0]), len(concept[1]),
  1522. duration) not in ns_signatures
  1523. return keep_concept
  1524. def approximate_stability(concepts, rel_matrix, n_subsets=0,
  1525. delta=0., epsilon=0.):
  1526. r"""
  1527. Approximate the stability of concepts. Uses the algorithm described
  1528. in Babin, Kuznetsov (2012): Approximating Concept Stability
  1529. Parameters
  1530. ----------
  1531. concepts : list
  1532. All the pattern candidates (concepts) found in the spiketrains. Each
  1533. pattern is represented as a tuple containing (spike IDs,
  1534. discrete times (window position)
  1535. of the occurrences of the pattern). The spike IDs are defined as:
  1536. `spike_id=neuron_id*bin_id` with `neuron_id` in `[0, len(spiketrains)]`
  1537. and `bin_id` in `[0, winlen]`.
  1538. rel_matrix : sparse.coo_matrix
  1539. A binary matrix with shape (number of windows,
  1540. winlen*len(spiketrains)). Each row corresponds to a window (order
  1541. according to their position in time).
  1542. Each column corresponds to one bin and one neuron and it is 0 if
  1543. no spikes or 1 if one or more spikes occurred in that bin for that
  1544. particular neuron. For example, the entry [0,0] of this matrix
  1545. corresponds to the first bin of the first window position for the first
  1546. neuron, the entry `[0, winlen]` to the first bin of the first window
  1547. position for the second neuron.
  1548. n_subsets : int
  1549. Number of subsets of a concept used to approximate its stability.
  1550. If `n_subsets` is 0, it is calculated according to to the formula
  1551. given in Babin, Kuznetsov (2012), proposition 6:
  1552. .. math::
  1553. n_{\text{subset}} = \frac{1}{2 \cdot \epsilon^2}
  1554. \ln{\left( \frac{2}{\delta} \right)} +1
  1555. Default: 0
  1556. delta : float, optional
  1557. delta: probability with at least :math:`1-\delta`
  1558. Default: 0.
  1559. epsilon : float, optional
  1560. epsilon: absolute error
  1561. Default: 0.
  1562. Returns
  1563. -------
  1564. output : list
  1565. List of all the pattern candidates (concepts) given in input, each with
  1566. the correspondent intensional and extensional stability. Each
  1567. pattern is represented as a tuple (spike IDs,
  1568. discrete times of the occurrences of the pattern, intensional
  1569. stability of the pattern, extensional stability of the pattern).
  1570. The spike IDs are defined as:
  1571. `spike_id=neuron_id*bin_id` with `neuron_id` in `[0, len(spiketrains)]`
  1572. and `bin_id` in `[0, winlen]`.
  1573. Notes
  1574. -----
  1575. If n_subset is larger than the extent all subsets are directly
  1576. calculated, otherwise for small extent size an infinite
  1577. loop can be created while doing the recursion,
  1578. since the random generation will always contain the same
  1579. numbers and the algorithm will be stuck searching for
  1580. other (random) numbers.
  1581. """
  1582. if HAVE_MPI: # pragma: no cover
  1583. comm = MPI.COMM_WORLD # create MPI communicator
  1584. rank = comm.Get_rank() # get rank of current MPI task
  1585. size = comm.Get_size() # get tot number of MPI tasks
  1586. else:
  1587. rank = 0
  1588. size = 1
  1589. if not (isinstance(n_subsets, int) and n_subsets >= 0):
  1590. raise ValueError('n_subsets must be an integer >=0')
  1591. if n_subsets == 0 and not (isinstance(delta, float) and delta > 0. and
  1592. isinstance(epsilon, float) and epsilon > 0.):
  1593. raise ValueError('delta and epsilon must be floats > 0., '
  1594. 'given that n_subsets = 0')
  1595. if len(concepts) == 0:
  1596. return []
  1597. if len(concepts) <= size:
  1598. rank_idx = [0] * (size + 1) + [len(concepts)]
  1599. else:
  1600. rank_idx = list(
  1601. range(0, len(concepts) - len(concepts) % size + 1,
  1602. len(concepts) // size)) + [len(concepts)]
  1603. # Calculate optimal n
  1604. if n_subsets == 0:
  1605. n_subsets = int(round(np.log(2. / delta) / (2 * epsilon ** 2) + 1))
  1606. if rank == 0:
  1607. concepts_on_partition = concepts[rank_idx[rank]:rank_idx[rank + 1]] + \
  1608. concepts[rank_idx[-2]:rank_idx[-1]]
  1609. else:
  1610. concepts_on_partition = concepts[rank_idx[rank]:rank_idx[rank + 1]]
  1611. output = []
  1612. for concept in concepts_on_partition:
  1613. intent, extent = np.array(concept[0]), np.array(concept[1])
  1614. stab_int = _calculate_single_stability_parameter(
  1615. intent, extent, n_subsets, rel_matrix, look_at='intent')
  1616. stab_ext = _calculate_single_stability_parameter(
  1617. intent, extent, n_subsets, rel_matrix, look_at='extent')
  1618. output.append((intent, extent, stab_int, stab_ext))
  1619. if size != 1:
  1620. recv_list = comm.gather(output, root=0)
  1621. if rank == 0:
  1622. for i in range(1, len(recv_list)):
  1623. output.extend(recv_list[i])
  1624. return output
  1625. def _calculate_single_stability_parameter(intent, extent,
  1626. n_subsets, rel_matrix,
  1627. look_at='intent'):
  1628. """
  1629. Calculates the stability parameter for extent or intent.
  1630. For detailed describtion see approximate_stabilty
  1631. Parameters
  1632. ----------
  1633. extent: np.array
  1634. 2nd element of concept
  1635. intent: np.array
  1636. 1st element of concept
  1637. n_subsets: int
  1638. See approximate_stabilty
  1639. rel_matrix: sparse.coo_matrix
  1640. See approximate_stabilty
  1641. look_at: {'extent', 'intent'}
  1642. whether to determine stability for extent or intent.
  1643. Default: 'intent'
  1644. Returns
  1645. -------
  1646. stability: float
  1647. Stability parameter for given extent, intent depending on which to look
  1648. """
  1649. if look_at == 'intent':
  1650. element_1, element_2 = intent, extent
  1651. else: # look_at == 'extent':
  1652. element_1, element_2 = extent, intent
  1653. if n_subsets > 2 ** len(element_1):
  1654. subsets = chain.from_iterable(
  1655. combinations(element_1, subset_index)
  1656. for subset_index in range(len(element_1) + 1))
  1657. else:
  1658. subsets = _select_random_subsets(element_1, n_subsets)
  1659. stability = 0
  1660. excluded_subsets = []
  1661. for subset in subsets:
  1662. if any([set(subset).issubset(excluded_subset)
  1663. for excluded_subset in excluded_subsets]):
  1664. continue
  1665. # computation of the ' operator for the subset
  1666. if look_at == 'intent':
  1667. subset_prime = \
  1668. np.where(np.all(rel_matrix[:, subset], axis=1) == 1)[0]
  1669. else: # look_at == 'extent':
  1670. subset_prime = \
  1671. np.where(np.all(rel_matrix[subset, :], axis=0) == 1)[0]
  1672. # Condition holds if the closure of the subset of element_1 given in
  1673. # element_2 is equal to element_2 given in input
  1674. if set(subset_prime) == set(element_2):
  1675. stability += 1
  1676. else:
  1677. excluded_subsets.append(subset)
  1678. stability /= min(n_subsets, 2 ** len(element_1))
  1679. return stability
  1680. def _select_random_subsets(element_1, n_subsets):
  1681. """
  1682. Creates a list of random_subsets of element_1.
  1683. Parameters
  1684. ----------
  1685. element_1: np.array
  1686. intent or extent
  1687. n_subsets: int
  1688. see approximate_stability
  1689. Returns
  1690. -------
  1691. subsets : list
  1692. each element a subset of element_1
  1693. """
  1694. subsets_indices = [set()] * (len(element_1) + 1)
  1695. subsets = []
  1696. while len(subsets) < n_subsets:
  1697. num_indices = np.random.binomial(n=len(element_1), p=1 / 2)
  1698. random_indices = sorted(np.random.choice(
  1699. len(element_1), size=num_indices, replace=False))
  1700. random_tuple = tuple(random_indices)
  1701. if random_tuple not in subsets_indices[num_indices]:
  1702. subsets_indices[num_indices].add(random_tuple)
  1703. subsets.append(element_1[random_indices])
  1704. return subsets
  1705. def pattern_set_reduction(concepts, ns_signatures, winlen, spectrum,
  1706. h_subset_filtering=0, k_superset_filtering=0,
  1707. l_covered_spikes=0, min_spikes=2, min_occ=2):
  1708. r"""
  1709. Takes a list concepts and performs pattern set reduction (PSR).
  1710. PSR determines which patterns in concepts_psf are statistically significant
  1711. given any other pattern, on the basis of the pattern size and
  1712. occurrence count ("support"). Only significant patterns are retained.
  1713. The significance of a pattern A is evaluated through its signature
  1714. :math:`(z_a, c_A)`, where :math:`z_A = |A|` is the size and :math:`c_A` -
  1715. the support of A, by either of:
  1716. * subset filtering: any pattern B is discarded if *concepts* contains a
  1717. superset A of B such that
  1718. :math:`(z_B, c_B - c_A + h) \in \text{ns}_{\text{signatures}}`
  1719. * superset filtering: any pattern A is discarded if *concepts* contains a
  1720. subset B of A such that
  1721. :math:`(z_A - z_B + k, c_A) \in \text{ns}_{\text{signatures}}`
  1722. * covered-spikes criterion: for any two patterns A, B with
  1723. :math:`A \subset B`, B is discarded if
  1724. :math:`(z_B-l) \cdot c_B \le c_A \cdot (z_A - l)`, A is discarded
  1725. otherwise;
  1726. * combined filtering: combines the three procedures above:
  1727. takes a list concepts (see output psf function) and performs
  1728. combined filtering based on the signature (z, c) of each pattern, where
  1729. z is the pattern size and c the pattern support.
  1730. For any two patterns A and B in concepts_psf such that :math:`B \subset A`,
  1731. check:
  1732. 1) :math:`(z_B, c_B - c_A + h) \in \text{ns}_{\text{signatures}}`, and
  1733. 2) :math:`(z_A - z_B + k, c_A) \in \text{ns}_{\text{signatures}}`.
  1734. Then:
  1735. * if 1) and not 2): discard B
  1736. * if 2) and not 1): discard A
  1737. * if 1) and 2): discard B if
  1738. :math:`c_B \cdot (z_B - l) \le c_A \cdot (z_A - l)`,
  1739. otherwise discard A
  1740. * if neither 1) nor 2): keep both patterns
  1741. Assumptions/Approximations:
  1742. * a pair of concepts cannot cause one another to be rejected
  1743. * if two concepts overlap more than min_occ times, one of them can
  1744. account for all occurrences of the other one if it passes the
  1745. filtering
  1746. Parameters
  1747. ----------
  1748. concepts : list
  1749. List of concepts, each consisting in its intent and extent
  1750. ns_signatures : list
  1751. A list of non-significant pattern signatures (z, c)
  1752. winlen : int
  1753. The size (number of bins) of the sliding window used for the analysis.
  1754. The maximal length of a pattern (delay between first and last spike) is
  1755. then given by `winlen*bin_size`.
  1756. spectrum : {'#', '3d#'}
  1757. Define the signature of the patterns.
  1758. '#': pattern spectrum using the as signature the pair:
  1759. (number of spikes, number of occurrences)
  1760. '3d#': pattern spectrum using the as signature the triplets:
  1761. (number of spikes, number of occurrence, difference between last
  1762. and first spike of the pattern)
  1763. h_subset_filtering : int, optional
  1764. Correction parameter for subset filtering
  1765. Default: 0
  1766. k_superset_filtering : int, optional
  1767. Correction parameter for superset filtering
  1768. Default: 0
  1769. l_covered_spikes : int, optional
  1770. Correction parameter for covered-spikes criterion
  1771. Default: 0
  1772. min_spikes : int, optional
  1773. Minimum pattern size
  1774. Default: 2
  1775. min_occ : int, optional
  1776. Minimum number of pattern occurrences
  1777. Default: 2
  1778. Returns
  1779. -------
  1780. tuple
  1781. A tuple containing the elements of the input argument
  1782. that are significant according to combined filtering.
  1783. """
  1784. additional_measures = []
  1785. # Extracting from the extent and intent the spike and window times
  1786. for concept in concepts:
  1787. intent = concept[0]
  1788. extent = concept[1]
  1789. additional_measures.append((len(extent), len(intent)))
  1790. # by default, select all elements in conc to be returned in the output
  1791. selected = [True] * len(concepts)
  1792. # scan all conc and their subsets
  1793. for id1, id2 in combinations(range(len(concepts)), r=2):
  1794. # immediately continue if both concepts have already been rejected
  1795. if not selected[id1] and not selected[id2]:
  1796. continue
  1797. intent1, extent1 = concepts[id1][:2]
  1798. intent2, extent2 = concepts[id2][:2]
  1799. occ1, size1 = additional_measures[id1]
  1800. occ2, size2 = additional_measures[id2]
  1801. dur1 = max(np.array(intent1) % winlen)
  1802. dur2 = max(np.array(intent2) % winlen)
  1803. intent2 = set(intent2)
  1804. # Collecting all the possible distances between the windows
  1805. # of the two concepts
  1806. time_diff_all = np.array(
  1807. [w2 - w1 for w2 in extent2 for w1 in extent1])
  1808. # sort time differences by ascending absolute value
  1809. time_diff_sorting = np.argsort(np.abs(time_diff_all))
  1810. sorted_time_diff, sorted_time_diff_occ = np.unique(
  1811. time_diff_all[time_diff_sorting],
  1812. return_counts=True)
  1813. # only consider time differences that are smaller than winlen
  1814. # and that correspond to intersections that occur at least min_occ
  1815. # times
  1816. time_diff_mask = np.logical_and(
  1817. np.abs(sorted_time_diff) < winlen,
  1818. sorted_time_diff_occ >= min_occ)
  1819. # Rescaling the spike times to realign to real time
  1820. for time_diff in sorted_time_diff[time_diff_mask]:
  1821. intent1_new = [t_old - time_diff for t_old in intent1]
  1822. # from here on we will only need the intents as sets
  1823. intent1_new = set(intent1_new)
  1824. # if intent1 and intent2 are disjoint, skip this step
  1825. if len(intent1_new & intent2) == 0:
  1826. continue
  1827. # Test the case intent1 is a superset of intent2
  1828. if intent1_new.issuperset(intent2):
  1829. reject1, reject2 = _perform_combined_filtering(
  1830. occ_superset=occ1,
  1831. size_superset=size1,
  1832. dur_superset=dur1,
  1833. occ_subset=occ2,
  1834. size_subset=size2,
  1835. dur_subset=dur2,
  1836. spectrum=spectrum,
  1837. ns_signatures=ns_signatures,
  1838. h_subset_filtering=h_subset_filtering,
  1839. k_superset_filtering=k_superset_filtering,
  1840. l_covered_spikes=l_covered_spikes,
  1841. min_spikes=min_spikes,
  1842. min_occ=min_occ)
  1843. elif intent2.issuperset(intent1_new):
  1844. reject2, reject1 = _perform_combined_filtering(
  1845. occ_superset=occ2,
  1846. size_superset=size2,
  1847. dur_superset=dur2,
  1848. occ_subset=occ1,
  1849. size_subset=size1,
  1850. dur_subset=dur1,
  1851. spectrum=spectrum,
  1852. ns_signatures=ns_signatures,
  1853. h_subset_filtering=h_subset_filtering,
  1854. k_superset_filtering=k_superset_filtering,
  1855. l_covered_spikes=l_covered_spikes,
  1856. min_spikes=min_spikes,
  1857. min_occ=min_occ)
  1858. else:
  1859. # none of the intents is a superset of the other one
  1860. # we compare both concepts to the intersection
  1861. # if one of them is not significant given the
  1862. # intersection, it is rejected
  1863. inter_size = len(intent1_new & intent2)
  1864. reject1 = _superset_filter(
  1865. occ_superset=occ1,
  1866. size_superset=size1,
  1867. dur_superset=dur1,
  1868. size_subset=inter_size,
  1869. spectrum=spectrum,
  1870. ns_signatures=ns_signatures,
  1871. k_superset_filtering=k_superset_filtering,
  1872. min_spikes=min_spikes)
  1873. reject2 = _superset_filter(
  1874. occ_superset=occ2,
  1875. size_superset=size2,
  1876. dur_superset=dur2,
  1877. size_subset=inter_size,
  1878. spectrum=spectrum,
  1879. ns_signatures=ns_signatures,
  1880. k_superset_filtering=k_superset_filtering,
  1881. min_spikes=min_spikes)
  1882. # Reject accordingly:
  1883. if reject1 and reject2:
  1884. reject1, reject2 = _covered_spikes_criterion(
  1885. occ_superset=occ1,
  1886. size_superset=size1,
  1887. occ_subset=occ2,
  1888. size_subset=size2,
  1889. l_covered_spikes=l_covered_spikes)
  1890. selected[id1] &= not reject1
  1891. selected[id2] &= not reject2
  1892. # skip remaining time-shifts if both concepts have been rejected
  1893. if (not selected[id1]) and (not selected[id2]):
  1894. break
  1895. # Return the selected concepts
  1896. return [p for i, p in enumerate(concepts) if selected[i]]
  1897. def _perform_combined_filtering(occ_superset,
  1898. size_superset,
  1899. dur_superset,
  1900. occ_subset,
  1901. size_subset,
  1902. dur_subset,
  1903. spectrum,
  1904. ns_signatures,
  1905. h_subset_filtering,
  1906. k_superset_filtering,
  1907. l_covered_spikes,
  1908. min_spikes,
  1909. min_occ):
  1910. """
  1911. perform combined filtering
  1912. (see pattern_set_reduction)
  1913. """
  1914. reject_subset = _subset_filter(
  1915. occ_superset=occ_superset,
  1916. occ_subset=occ_subset,
  1917. size_subset=size_subset,
  1918. dur_subset=dur_subset,
  1919. spectrum=spectrum,
  1920. ns_signatures=ns_signatures,
  1921. h_subset_filtering=h_subset_filtering,
  1922. min_occ=min_occ)
  1923. reject_superset = _superset_filter(
  1924. occ_superset=occ_superset,
  1925. size_superset=size_superset,
  1926. dur_superset=dur_superset,
  1927. size_subset=size_subset,
  1928. spectrum=spectrum,
  1929. ns_signatures=ns_signatures,
  1930. k_superset_filtering=k_superset_filtering,
  1931. min_spikes=min_spikes)
  1932. # Reject the superset and/or the subset accordingly:
  1933. if reject_superset and reject_subset:
  1934. reject_superset, reject_subset = _covered_spikes_criterion(
  1935. occ_superset=occ_superset,
  1936. size_superset=size_superset,
  1937. occ_subset=occ_subset,
  1938. size_subset=size_subset,
  1939. l_covered_spikes=l_covered_spikes)
  1940. return reject_superset, reject_subset
  1941. def _subset_filter(occ_superset, occ_subset, size_subset, dur_subset, spectrum,
  1942. ns_signatures=None, h_subset_filtering=0, min_occ=2):
  1943. """
  1944. perform subset filtering
  1945. (see pattern_set_reduction)
  1946. """
  1947. if ns_signatures is None:
  1948. ns_signatures = []
  1949. occ_diff = occ_subset - occ_superset + h_subset_filtering
  1950. if spectrum == '#':
  1951. signature_to_test = (size_subset, occ_diff)
  1952. else: # spectrum == '3d#':
  1953. signature_to_test = (size_subset, occ_diff, dur_subset)
  1954. reject_subset = occ_diff < min_occ or signature_to_test in ns_signatures
  1955. return reject_subset
  1956. def _superset_filter(occ_superset, size_superset, dur_superset, size_subset,
  1957. spectrum, ns_signatures=None, k_superset_filtering=0,
  1958. min_spikes=2):
  1959. """
  1960. perform superset filtering
  1961. (see pattern_set_reduction)
  1962. """
  1963. if ns_signatures is None:
  1964. ns_signatures = []
  1965. size_diff = size_superset - size_subset + k_superset_filtering
  1966. if spectrum == '#':
  1967. signature_to_test = (size_diff, occ_superset)
  1968. else: # spectrum == '3d#':
  1969. signature_to_test = (size_diff, occ_superset, dur_superset)
  1970. reject_superset = \
  1971. size_diff < min_spikes or signature_to_test in ns_signatures
  1972. return reject_superset
  1973. def _covered_spikes_criterion(occ_superset,
  1974. size_superset,
  1975. occ_subset,
  1976. size_subset,
  1977. l_covered_spikes):
  1978. """
  1979. evaluate covered spikes criterion
  1980. (see pattern_set_reduction)
  1981. """
  1982. reject_superset = True
  1983. reject_subset = True
  1984. score_superset = (size_superset - l_covered_spikes) * occ_superset
  1985. score_subset = (size_subset - l_covered_spikes) * occ_subset
  1986. if score_superset >= score_subset:
  1987. reject_superset = False
  1988. else:
  1989. reject_subset = False
  1990. return reject_superset, reject_subset
  1991. @deprecated_alias(binsize='bin_size')
  1992. def concept_output_to_patterns(concepts, winlen, bin_size, pv_spec=None,
  1993. spectrum='#', t_start=0 * pq.ms):
  1994. """
  1995. Construction of dictionaries containing all the information about a pattern
  1996. starting from a list of concepts and its associated pvalue_spectrum.
  1997. Parameters
  1998. ----------
  1999. concepts: tuple
  2000. Each element of the tuple corresponds to a pattern which it turn is a
  2001. tuple of (spikes in the pattern, occurrences of the patterns)
  2002. winlen: int
  2003. Length (in bins) of the sliding window used for the analysis.
  2004. bin_size: pq.Quantity
  2005. The time precision used to discretize the `spiketrains` (binning).
  2006. pv_spec: None or tuple
  2007. Contains a tuple of signatures and the corresponding p-value. If equal
  2008. to None all p-values are set to -1.
  2009. spectrum: {'#', '3d#'}
  2010. '#': pattern spectrum using the as signature the pair:
  2011. (number of spikes, number of occurrences)
  2012. '3d#': pattern spectrum using the as signature the triplets:
  2013. (number of spikes, number of occurrence, difference between last
  2014. and first spike of the pattern)
  2015. Default: '#'
  2016. t_start: pq.Quantity
  2017. t_start of the analyzed spike trains
  2018. Returns
  2019. -------
  2020. output : list
  2021. List of dictionaries. Each dictionary corresponds to a pattern and
  2022. has the following entries:
  2023. 'itemset':
  2024. A list of the spikes in the pattern, expressed in theform of
  2025. itemset, each spike is encoded by
  2026. `spiketrain_id * winlen + bin_id`.
  2027. 'windows_ids':
  2028. The ids of the windows in which the pattern occurred
  2029. in discretized time (given byt the binning).
  2030. 'neurons':
  2031. An array containing the idx of the neurons of the pattern.
  2032. 'lags':
  2033. An array containing the lags (integers corresponding to the
  2034. number of bins) between the spikes of the patterns. The first
  2035. lag is always assumed to be 0 and corresponds to the first
  2036. spike.
  2037. 'times':
  2038. An array containing the times (integers corresponding to the
  2039. bin idx) of the occurrences of the patterns.
  2040. 'signature':
  2041. A tuple containing two integers (number of spikes of the
  2042. patterns, number of occurrences of the pattern).
  2043. 'pvalue':
  2044. The p-value corresponding to the pattern. If `n_surr==0`,
  2045. all p-values are set to -1.
  2046. """
  2047. if pv_spec is not None:
  2048. pvalue_dict = defaultdict(float)
  2049. # Creating a dictionary for the pvalue spectrum
  2050. for entry in pv_spec:
  2051. if spectrum == '3d#':
  2052. pvalue_dict[(entry[0], entry[1], entry[2])] = entry[-1]
  2053. if spectrum == '#':
  2054. pvalue_dict[(entry[0], entry[1])] = entry[-1]
  2055. # Initializing list containing all the patterns
  2056. t_start = t_start.rescale(bin_size.units)
  2057. output = []
  2058. for concept in concepts:
  2059. itemset, window_ids = concept[:2]
  2060. # Vocabulary for each of the patterns, containing:
  2061. # - The pattern expressed in form of Itemset, each spike in the pattern
  2062. # is represented as spiketrain_id * winlen + bin_id
  2063. # - The ids of the windows in which the pattern occurred in discretized
  2064. # time (clipping)
  2065. output_dict = {'itemset': itemset, 'windows_ids': window_ids}
  2066. # Bins relative to the sliding window in which the spikes of patt fall
  2067. itemset = np.array(itemset)
  2068. bin_ids_unsort = itemset % winlen
  2069. order_bin_ids = np.argsort(bin_ids_unsort)
  2070. bin_ids = bin_ids_unsort[order_bin_ids]
  2071. # id of the neurons forming the pattern
  2072. output_dict['neurons'] = list(itemset[order_bin_ids] // winlen)
  2073. # Lags (in bin_sizes units) of the pattern
  2074. output_dict['lags'] = bin_ids[1:] * bin_size
  2075. # Times (in bin_size units) in which the pattern occurs
  2076. output_dict['times'] = sorted(window_ids) * bin_size + t_start
  2077. # pattern dictionary appended to the output
  2078. if spectrum == '#':
  2079. # Signature (size, n occ) of the pattern
  2080. signature = (len(itemset), len(window_ids))
  2081. else: # spectrum == '3d#':
  2082. # Signature (size, n occ, duration) of the pattern
  2083. # duration is position of the last bin
  2084. signature = (len(itemset), len(window_ids), bin_ids[-1])
  2085. output_dict['signature'] = signature
  2086. # If None is given in input to the pval spectrum the pvalue
  2087. # is set to -1 (pvalue spectrum not available)
  2088. if pv_spec is None:
  2089. output_dict['pvalue'] = -1
  2090. else:
  2091. # p-value assigned to the pattern from the pvalue spectrum
  2092. output_dict['pvalue'] = pvalue_dict[signature]
  2093. output.append(output_dict)
  2094. return output