assembly.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. '''
  2. Codes for PCA/ICA methods described in Detecting cell assemblies in large neuronal populations, Lopes-dos-Santos et al (2013).
  3. https://doi.org/10.1016/j.jneumeth.2013.04.010
  4. This implementation was written in Feb 2019.
  5. Please e-mail me if you have comments, doubts, bug reports or criticism (Vítor, vtlsantos@gmail.com / vitor.lopesdossantos@pharm.ox.ac.uk).
  6. '''
  7. from sklearn.decomposition import PCA
  8. from scipy import stats
  9. import numpy as np
  10. from numpy import matlib as mb
  11. np.matlib = mb
  12. __author__ = "Vítor Lopes dos Santos"
  13. __version__ = "2019.1"
  14. def toyExample(assemblies, nneurons = 10, nbins = 1000, rate = 1.):
  15. np.random.seed()
  16. actmat = np.random.poisson(rate,nneurons*nbins).reshape(nneurons,nbins)
  17. assemblies.actbins = [None]*len(assemblies.membership)
  18. for (ai,members) in enumerate(assemblies.membership):
  19. members = np.array(members)
  20. nact = int(nbins*assemblies.actrate[ai])
  21. actstrength_ = rate*assemblies.actstrength[ai]
  22. actbins = np.argsort(np.random.rand(nbins))[0:nact]
  23. actmat[members.reshape(-1,1),actbins] = np.ones((len(members),nact))+actstrength_
  24. assemblies.actbins[ai] = np.sort(actbins)
  25. return actmat
  26. class toyassemblies:
  27. def __init__(self, membership, actrate, actstrength):
  28. self.membership = membership
  29. self.actrate = actrate
  30. self.actstrength = actstrength
  31. def marcenkopastur(significance):
  32. nbins = significance.nbins
  33. nneurons = significance.nneurons
  34. tracywidom = significance.tracywidom
  35. # calculates statistical threshold from Marcenko-Pastur distribution
  36. q = float(nbins)/float(nneurons) # note that silent neurons are counted too
  37. lambdaMax = pow((1+np.sqrt(1/q)),2)
  38. lambdaMax += tracywidom*pow(nneurons,-2./3) # Tracy-Widom correction
  39. return lambdaMax
  40. def getlambdacontrol(zactmat_):
  41. significance_ = PCA()
  42. significance_.fit(zactmat_.T)
  43. lambdamax_ = np.max(significance_.explained_variance_)
  44. return lambdamax_
  45. def binshuffling(zactmat,significance):
  46. np.random.seed()
  47. lambdamax_ = np.zeros(significance.nshu)
  48. for shui in range(significance.nshu):
  49. zactmat_ = np.copy(zactmat)
  50. for (neuroni,activity) in enumerate(zactmat_):
  51. randomorder = np.argsort(np.random.rand(significance.nbins))
  52. zactmat_[neuroni,:] = activity[randomorder]
  53. lambdamax_[shui] = getlambdacontrol(zactmat_)
  54. lambdaMax = np.percentile(lambdamax_,significance.percentile)
  55. return lambdaMax
  56. def circshuffling(zactmat,significance):
  57. np.random.seed()
  58. lambdamax_ = np.zeros(significance.nshu)
  59. for shui in range(significance.nshu):
  60. zactmat_ = np.copy(zactmat)
  61. for (neuroni,activity) in enumerate(zactmat_):
  62. cut = int(np.random.randint(significance.nbins*2))
  63. zactmat_[neuroni,:] = np.roll(activity,cut)
  64. lambdamax_[shui] = getlambdacontrol(zactmat_)
  65. lambdaMax = np.percentile(lambdamax_,significance.percentile)
  66. return lambdaMax
  67. def runSignificance(zactmat,significance):
  68. if significance.nullhyp == 'mp':
  69. lambdaMax = marcenkopastur(significance)
  70. elif significance.nullhyp == 'bin':
  71. lambdaMax = binshuffling(zactmat,significance)
  72. elif significance.nullhyp == 'circ':
  73. lambdaMax = circshuffling(zactmat,significance)
  74. else:
  75. print('ERROR !')
  76. print(' nyll hypothesis method '+str(nullhyp)+' not understood')
  77. significance.nassemblies = np.nan
  78. nassemblies = np.sum(significance.explained_variance_>lambdaMax)
  79. significance.nassemblies = nassemblies
  80. significance.lambdaMax = lambdaMax
  81. return significance
  82. def extractPatterns(actmat,significance,method):
  83. nassemblies = significance.nassemblies
  84. if method == 'pca':
  85. idxs = np.argsort(-significance.explained_variance_)[0:nassemblies]
  86. patterns = significance.components_[idxs,:]
  87. elif method == 'ica':
  88. from sklearn.decomposition import FastICA
  89. ica = FastICA(n_components=nassemblies)
  90. ica.fit(actmat.T)
  91. patterns = ica.components_
  92. else:
  93. print('ERROR !')
  94. print(' assembly extraction method '+str(method)+' not understood')
  95. patterns = np.nan
  96. if patterns is not np.nan:
  97. patterns = patterns.reshape(nassemblies,-1)
  98. # sets norm of assembly vectors to 1
  99. norms = np.linalg.norm(patterns,axis=1)
  100. patterns /= np.matlib.repmat(norms,np.size(patterns,1),1).T
  101. return patterns
  102. def runPatterns(actmat, method='ica', nullhyp = 'mp', nshu = 1000, percentile = 99, tracywidom = False):
  103. '''
  104. INPUTS
  105. actmat: activity matrix - numpy array (neurons, time bins)
  106. nullhyp: defines how to generate statistical threshold for assembly detection.
  107. 'bin' - bin shuffling, will shuffle time bins of each neuron independently
  108. 'circ' - circular shuffling, will shift time bins of each neuron independently
  109. obs: mantains (virtually) autocorrelations
  110. 'mp' - Marcenko-Pastur distribution - analytical threshold
  111. nshu: defines how many shuffling controls will be done (n/a if nullhyp is 'mp')
  112. percentile: defines which percentile to be used use when shuffling methods are employed.
  113. (n/a if nullhyp is 'mp')
  114. tracywidow: determines if Tracy-Widom is used. See Peyrache et al 2010.
  115. (n/a if nullhyp is NOT 'mp')
  116. OUTPUTS
  117. patterns: co-activation patterns (assemblies) - numpy array (assemblies, neurons)
  118. significance: object containing general information about significance tests
  119. zactmat: returns z-scored actmat
  120. '''
  121. nneurons = np.size(actmat,0)
  122. nbins = np.size(actmat,1)
  123. silentneurons = np.var(actmat,axis=1)==0
  124. actmat_ = actmat[~silentneurons,:]
  125. # z-scoring activity matrix
  126. zactmat_ = stats.zscore(actmat_,axis=1)
  127. # running significance (estimating number of assemblies)
  128. significance = PCA()
  129. significance.fit(zactmat_.T)
  130. significance.nneurons = nneurons
  131. significance.nbins = nbins
  132. significance.nshu = nshu
  133. significance.percentile = percentile
  134. significance.tracywidom = tracywidom
  135. significance.nullhyp = nullhyp
  136. significance = runSignificance(zactmat_,significance)
  137. if np.isnan(significance.nassemblies):
  138. return
  139. if significance.nassemblies<1:
  140. print('WARNING !')
  141. print(' no assembly detecded!')
  142. patterns = []
  143. else:
  144. # extracting co-activation patterns
  145. patterns_ = extractPatterns(zactmat_,significance,method)
  146. if patterns_ is np.nan:
  147. return
  148. # putting eventual silent neurons back (their assembly weights are defined as zero)
  149. patterns = np.zeros((np.size(patterns_,0),nneurons))
  150. patterns[:,~silentneurons] = patterns_
  151. zactmat = np.copy(actmat)
  152. zactmat[~silentneurons,:] = zactmat_
  153. return patterns,significance,zactmat
  154. def computeAssemblyActivity(patterns,zactmat,zerodiag = True):
  155. nassemblies = len(patterns)
  156. nbins = np.size(zactmat,1)
  157. assemblyAct = np.zeros((nassemblies,nbins))
  158. for (assemblyi,pattern) in enumerate(patterns):
  159. projMat = np.outer(pattern,pattern)
  160. projMat -= zerodiag*np.diag(np.diag(projMat))
  161. for bini in range(nbins):
  162. assemblyAct[assemblyi,bini] = \
  163. np.dot(np.dot(zactmat[:,bini],projMat),zactmat[:,bini])
  164. return assemblyAct