capipeline.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. import numpy as np
  2. import numpy.ma as ma
  3. import matplotlib.pyplot as plt
  4. import pandas as pd
  5. import seaborn as sns
  6. import pickle
  7. import os
  8. sns.set()
  9. sns.set_style("whitegrid")
  10. from scipy.signal import medfilt
  11. from scipy.stats import skew, kurtosis, zscore
  12. from scipy import signal
  13. from sklearn.linear_model import LinearRegression, TheilSenRegressor
  14. from sys import path
  15. ### set path to where OASIS is located
  16. ### make sure to run ' python setup.py build_ext --inplace ' in OASIS-master folder
  17. path.append(r'/media/andrey/My Passport/OASIS-master')
  18. from oasis.functions import deconvolve
  19. from oasis.functions import estimate_time_constant
  20. def get_recordings_for_animals(animals, path):
  21. recordings = []
  22. for animal in animals:
  23. meta_data = pd.read_excel(path)
  24. meta_animal = meta_data[meta_data['Mouse'] == animal]
  25. recs = meta_animal['Number'].to_list()
  26. for r in recs:
  27. recordings.append(r)
  28. return recordings
  29. def get_animal_from_recording(recording, path):
  30. meta_data = pd.read_excel(path)
  31. meta_recording = meta_data[meta_data['Number'] == recording]
  32. animal = (meta_recording['Mouse'])
  33. animal = animal.to_numpy()[0]
  34. return animal
  35. def get_condition(recording, path):
  36. meta_data = pd.read_excel(path)
  37. condition = meta_data['Condition'][meta_data['Number'] == recording].values[0]
  38. return condition
  39. def traces_and_npils(recording, path, concatenation=True):
  40. meta_data = pd.read_excel(path)
  41. if (concatenation==True):
  42. path_excel_rec = str(meta_data['Folder'][recording]) + str(meta_data['Subfolder'][recording]) + 'suite2p/plane0'
  43. stats = np.load(path_excel_rec + '/stat.npy',allow_pickle=True)
  44. Traces = np.load(path_excel_rec + '/F.npy')
  45. Npil = np.load(path_excel_rec + '/Fneu.npy')
  46. iscell = np.load(path_excel_rec + '/iscell.npy')
  47. print("Total trace length: " + str(Traces.shape[1]))
  48. starting_frame = int(meta_data['Starting frame'][recording])
  49. recording_length = int(meta_data['Recording length'][recording])
  50. #analysis_period = meta_data['Analysis period'][recording]
  51. #analysis_period = [int(s) for s in analysis_period.split(',')]
  52. n_accepted_rejected = Traces.shape[0]
  53. #print("Period for the analysis (absolute): " + str(analysis_period[0]+starting_frame)+" "+str(analysis_period[1]+starting_frame))
  54. print("Recording length: " + str(recording_length))
  55. #print("Period for the analysis (relative): " + str(analysis_period[0])+" "+str(analysis_period[1]))
  56. good_recording = np.zeros(shape=(recording_length))
  57. if isinstance(meta_data['Analysis period'][recording], str): # as previous script
  58. #good_recording = np.zeros((18000))
  59. recording2keep = [int(s) for s in meta_data['Analysis period'][recording].split(',')]
  60. print("Analysis periods: " + str(recording2keep))
  61. begin = recording2keep[0::2]
  62. ending = recording2keep[1::2]
  63. for idx_begin in range(int(len(begin))):
  64. good_recording[begin[idx_begin] : ending[idx_begin]] = 1
  65. #else:
  66. # good_recording = np.ones_like(Spikes[0, :])
  67. good_recording = good_recording > 0
  68. Traces = Traces[:,starting_frame:starting_frame+recording_length]
  69. Npil = Npil[:,starting_frame:starting_frame+recording_length]
  70. Traces = Traces[:, good_recording]
  71. Npil = Npil[:, good_recording]
  72. print("Analysis period total frames: ", Traces.shape[1])
  73. #Traces = Traces[:,analysis_period[0]+starting_frame:analysis_period[1]+starting_frame]
  74. #Npil = Npil[:,analysis_period[0]+starting_frame:analysis_period[1]+starting_frame]
  75. Traces = Traces[iscell[:, 0].astype(bool), :]
  76. Npil = Npil[iscell[:, 0].astype(bool), :]
  77. else:
  78. print("record",recording)
  79. path_excel_rec = str(meta_data['Folder'][recording]) + str(meta_data['Subfolder'][recording]) + '/suite2p/plane0'
  80. stats = np.load(path_excel_rec + '/stat.npy',allow_pickle=True)
  81. Traces = np.load(path_excel_rec + '/F.npy')
  82. Npil = np.load(path_excel_rec + '/Fneu.npy')
  83. iscell = np.load(path_excel_rec + '/iscell.npy')
  84. n_accepted_rejected = Traces.shape[0]
  85. Traces = Traces[iscell[:, 0].astype(bool), :]
  86. Npil = Npil[iscell[:, 0].astype(bool), :]
  87. return Traces, Npil, n_accepted_rejected # n_accepted_rejected = Accepted + Rejected
  88. def median_stabilities(Npils):
  89. number_of_neurons = Npils.shape[0]
  90. length = Npils.shape[1]
  91. #print(length)
  92. l = int(length / 1000)
  93. Npils = Npils[:,:l*1000]
  94. npils_medians = ma.median(Npils, axis=1)
  95. #Npils = Npils - np.tile(ma.expand_dims(ma.median(Npils, axis=1), axis=1),
  96. # (1, ma.shape(Npils)[1]))
  97. Npils = Npils.reshape(Npils.shape[0],l,1000)
  98. #npils_medians = npils_medians.reshape(Npils.shape[0],l)
  99. #print(Npils.shape)
  100. ###TODO npils_median
  101. median_stabilities = ma.median(Npils,axis=2)
  102. median_for_all_trace = ma.median(Npils,axis=[1,2])
  103. median_stabilities = ma.abs(median_stabilities-median_for_all_trace[:,np.newaxis])
  104. median_stabilities = ma.sum(median_stabilities,axis=1)/l
  105. return median_stabilities
  106. def get_data_frame(recording, path, threshold=200, baseline_correction=True, concatenation=True, correlations = True):
  107. df_estimators = pd.DataFrame()
  108. df_corr = pd.DataFrame()
  109. r = recording
  110. animal = get_animal_from_recording(r, path)
  111. #condition = get_condition(r, path)
  112. #print(str(r)+" "+str(animal)+" "+str(condition))
  113. plt.cla()
  114. Traces, Npils, n_accepted_and_rejected = traces_and_npils(r, path, concatenation)
  115. Tm0p7N = Traces - 0.7*Npils
  116. if (baseline_correction==True):
  117. #median of all traces
  118. med_of_all_traces = np.median(Tm0p7N,axis=0)
  119. plt.plot(med_of_all_traces,'k')
  120. #filtering
  121. Tm0p7N_midfilt = medfilt(med_of_all_traces,31)
  122. plt.plot( Tm0p7N_midfilt, 'b')
  123. #regression
  124. TSreg = TheilSenRegressor(random_state=42)
  125. x = np.arange(Tm0p7N.shape[1])
  126. X = x[:, np.newaxis]
  127. fit = TSreg.fit(X, Tm0p7N_midfilt)
  128. #print(fit.get_params())
  129. y_pred = TSreg.predict(X)
  130. plt.plot( y_pred, 'w')
  131. #subtract
  132. y_pred = y_pred - y_pred[0]
  133. ab,resid,_,_,_ = ma.polyfit(x, y_pred, 1,full = True)
  134. #print(ab,resid)
  135. Tm0p7N[:,:] -= y_pred[np.newaxis, :]
  136. plt.title("median for all traces in recording")
  137. plt.show()
  138. recording_lenght = Traces.shape[1]
  139. # old baseline, worked well
  140. baseline = np.quantile(Tm0p7N,0.25,axis=1)
  141. print("Median baseline: {:.2f}".format(np.median(baseline)))
  142. Baseline_subtracted = Tm0p7N.copy()
  143. for i in range(Baseline_subtracted.shape[0]):
  144. Baseline_subtracted[i,:] -= baseline[i]
  145. integral = Baseline_subtracted.sum(axis=1)/recording_lenght
  146. #print(r)
  147. #Npil_regression = np.polyfit(np.arange(Npil.shape[1]),Npil,1)
  148. #Npil_stability[neuron_id] = Npil_regression[0]*9000/Npil_regression[1]
  149. #for npil in Npil:
  150. # print(npil.shape)
  151. n_accepted = Traces.shape[0]
  152. neuronID = ma.arange(n_accepted)
  153. n_accepted_and_rejected = n_accepted_and_rejected
  154. Traces_median = ma.median(Traces, axis=1)
  155. Npils_median = ma.median(Npils, axis=1)
  156. Tm0p7N_median = ma.median(Tm0p7N, axis=1)
  157. Traces_std = ma.std(Npils, axis=1)
  158. Npils_std = ma.std(Npils, axis=1)
  159. Tm0p7N_std = ma.std(Tm0p7N, axis=1)
  160. Traces_mins = ma.min(Traces, axis=1)
  161. Traces_maxs = ma.max(Traces, axis=1)
  162. Traces_peak_to_peak = Traces_maxs - Traces_mins
  163. Npils_mins = ma.min(Npils, axis=1)
  164. Npils_maxs = ma.max(Npils, axis=1)
  165. Npils_peak_to_peak = Npils_maxs - Npils_mins
  166. #median subtraction
  167. #Traces = Traces - np.tile(ma.expand_dims(ma.median(Traces, axis=1), axis=1),
  168. # (1, ma.shape(Traces)[1]))
  169. #Npil = Npil - np.tile(ma.expand_dims(ma.median(Npil, axis=1), axis=1),
  170. # (1, ma.shape(Npil)[1]))
  171. Traces_skewness = skew(Traces,axis=1)
  172. Npils_skewness = skew(Npils,axis=1)
  173. Tm0p7N_skewness = skew(Tm0p7N,axis=1)
  174. Traces_kurtosis = kurtosis(Traces, axis=1)
  175. Npils_kurtosis = kurtosis(Npils, axis=1)
  176. Npils_median_stabilities = median_stabilities(Npils)
  177. slope = ma.zeros(Npils.shape[0])
  178. intercept = ma.zeros(Npils.shape[0])
  179. residuals = ma.zeros(Npils.shape[0])
  180. if correlations:
  181. #Replace with smarter solution to take into account correlations with other neurons.
  182. Tcorr = np.corrcoef(Traces).flatten()
  183. Ncorr = np.corrcoef(Npils).flatten()
  184. Tm0p7Ncorr_mean = np.mean(np.corrcoef(Tm0p7N),axis=1)
  185. Tm0p7Ncorr = np.corrcoef(Tm0p7N).flatten()
  186. Tm0p7Ncorr[Tm0p7Ncorr>0.99999] = np.nan
  187. Tm0p7Ncorr_first100 = np.corrcoef(Tm0p7N[:100,:]).flatten()
  188. Tm0p7Ncorr_first100 [Tm0p7Ncorr_first100>0.99999] = np.nan
  189. #quick trick
  190. #print(Tm0p7N.shape)
  191. #Tm0p7N_10bins = Tm0p7N.reshape(Tm0p7N.shape[0],int(Tm0p7N.shape[1]/10),10).mean(axis=2)
  192. # print(Tm0p7N_10bins.shape)
  193. #Tm0p7Ncorr_10bins = np.corrcoef(Tm0p7N_10bins).flatten()
  194. # Tm0p7Ncorr_10bins[Tm0p7Ncorr_10bins>0.99999] = np.nan
  195. df_corr = pd.DataFrame({ "animal":animal,
  196. "recording":r,
  197. #"condition":condition,
  198. #"Tcorr":Tcorr,
  199. #"Ncorr":Ncorr,
  200. #"Tm0p7Ncorr":Tm0p7Ncorr,
  201. #"Tm0p7Ncorr.abs":np.absolute(Tm0p7Ncorr)
  202. "Tm0p7Ncorr":Tm0p7Ncorr,
  203. "Tm0p7Ncorr.abs":np.absolute(Tm0p7Ncorr)
  204. # "Tm0p7Ncorr_10bins":Tm0p7Ncorr_10bins,
  205. #"Tm0p7Ncorr_10bins.abs":np.absolute(Tm0p7Ncorr_10bins)
  206. })
  207. i=0
  208. for npil in Npils:
  209. ab,resid,_,_,_ = ma.polyfit(np.arange(npil.shape[0]), npil, 1,full = True)
  210. slope[i] = ab[0]
  211. intercept[i] = ab[1]
  212. residuals[i] = resid
  213. i=i+1
  214. slope_per_median = ma.divide(slope,Npils_median)
  215. slope_in_percent = ma.divide(ma.multiply(slope,Npils.shape[1]),Npils_median)
  216. print("Number of neurons accepted: " + str(Npils_std.shape[0]))
  217. ### decay constant and peak characterization
  218. num_cells = np.shape(Traces)[0]
  219. decay_isol = np.zeros((num_cells))
  220. decay_no_isol = np.zeros((num_cells))
  221. n_peaks = np.zeros((num_cells))
  222. height = np.zeros((num_cells))
  223. width = np.zeros((num_cells))
  224. baseline_oasis = np.zeros((num_cells))
  225. Baseline_subtracted = Tm0p7N.copy()
  226. integral = np.zeros((num_cells))
  227. #from oasis.functions import deconvolve
  228. #from oasis.functions import estimate_time_constant
  229. for neuron in range(num_cells):
  230. fs=30
  231. _, _, b, decay_neuron_isolated10, _ = deconvolve(np.double(Tm0p7N[neuron, ]),
  232. penalty = 0, sn=25, optimize_g = 10)
  233. _, _, _, decay_neuron_no_isolated, _ = deconvolve(np.double(Tm0p7N[neuron, ]), sn=25,
  234. penalty = 0)
  235. baseline_oasis[neuron] = b
  236. Baseline_subtracted[neuron] -= b
  237. integral[neuron] = Baseline_subtracted[neuron].sum()/recording_lenght
  238. #ARcoef = estimate_time_constant(Tm0p7N[neuron, ], p=2, sn=None, lags=10, fudge_factor=1.0, nonlinear_fit=False)
  239. #print(ARcoef)
  240. peak_ind, peaks = signal.find_peaks(Baseline_subtracted[neuron, ], height = threshold,
  241. distance = 10, prominence = threshold,
  242. width = (None, None),
  243. rel_height = 0.9)
  244. decay_isol[neuron] = - 1 / (fs * np.log(decay_neuron_isolated10))
  245. decay_no_isol[neuron] = - 1 / (fs * np.log(decay_neuron_no_isolated))
  246. if decay_isol[neuron]>0:
  247. pass
  248. else:
  249. decay_isol[neuron]== np.nan
  250. #print(decay_neuron_isolated10,decay_isol[neuron]," s")
  251. fs = 30
  252. trace_len = np.shape(Traces)[1] / (fs * 60) # in minutes
  253. n_peaks[neuron] = len(peaks['peak_heights']) / trace_len
  254. if n_peaks[neuron] > 0:
  255. height[neuron, ] = np.median(peaks['peak_heights'])
  256. width[neuron, ] = np.median(peaks['widths'])
  257. else:
  258. height[neuron, ] = np.nan
  259. width[neuron, ] = np.nan
  260. df_estimators = pd.DataFrame({ "animal":animal,
  261. "recording":r,
  262. #"condition":condition,
  263. "neuronID":neuronID,
  264. "n.accepted":n_accepted,
  265. "length.frames":recording_lenght,
  266. "length.minutes":trace_len,
  267. "n.accepted_and_rejected":n_accepted_and_rejected,
  268. "traces.median":Traces_median,
  269. "npil.median":Npils_median,
  270. "trace.std":Traces_std,
  271. "npil.std":Npils_std,
  272. "trace.mins":Traces_mins,
  273. "trace.maxs":Traces_maxs,
  274. "trace.peak_to_peak":Traces_peak_to_peak,
  275. "npil.mins":Npils_mins,
  276. "npil.maxs":Npils_maxs,
  277. "npil.peak_to_peak":Npils_peak_to_peak,
  278. "trace.skewness":Traces_skewness,
  279. "npil.skewness":Npils_skewness,
  280. "Tm0p7N.skewness":Tm0p7N_skewness,
  281. "Tm0p7N.median":Tm0p7N_median,
  282. "Tm0p7N.std":Tm0p7N_std,
  283. "trace.kurtosis":Traces_kurtosis,
  284. "npil.kurtosis": Npils_kurtosis,
  285. "npil.slope":slope,
  286. "npil.intercept":intercept,
  287. "npil.residual":residuals,
  288. "npil.slope_per_median":slope_in_percent,
  289. "npil.slope_in_percent":slope_in_percent,
  290. "npil.mstab.1000":Npils_median_stabilities,
  291. "baseline.quantile.25":baseline,
  292. "baseline.oasis":baseline_oasis,
  293. "integral":integral,
  294. #"Tm0p7Ncorr.mean":Tm0p7Ncorr_mean,
  295. ### decay constant and peak characterization
  296. "peak_detection_threshold":threshold,
  297. "decay_isol":decay_isol,
  298. "decay_no_isol":decay_no_isol,
  299. "n_peaks":n_peaks,
  300. "n_peaks_per_recording":n_peaks*trace_len,
  301. "height.median":height,
  302. "width.median":width
  303. })
  304. return df_estimators ,df_corr
  305. def get_raster(starting_recording, n_neurons_max,database_path, concatenation):
  306. traces, npil, number_of_neruons = traces_and_npils(starting_recording, database_path, concatenation)
  307. Tm0p7N = traces - 0.7*npil
  308. Tm0p7N = zscore(Tm0p7N,axis=1)
  309. Tm0p7N = np.reshape(Tm0p7N,(Tm0p7N.shape[0], 500,10)).mean(axis=2)
  310. Tm0p7N = np.maximum(-4, np.minimum(8, Tm0p7N)) + 4
  311. Tm0p7N/= 12
  312. return Tm0p7N[:n_neurons_max,:]