size_tuning.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. import argparse
  2. import numpy as np
  3. import pandas as pd
  4. from tqdm import tqdm
  5. import statsmodels.api as sm
  6. from statsmodels.formula.api import ols
  7. from statsmodels.stats.anova import anova_lm
  8. #from pygam import PoissonGAM, LinearGAM, GammaGAM
  9. #from pygam import l as linear_term
  10. #from pygam import s as spline_term
  11. from parameters import DATAPATH, MINRATE, NPUPILAREABINS, NGAMSPLINES
  12. from util import load_data, unbiased_variance, times_to_counts, _resample_data, filter_units, sort_data
  13. def arousal_rate_matrix(spk_rates, pupil_area, **kwargs):
  14. """
  15. Get a matrix of firing rate histograms for different levels of arousal.
  16. """
  17. area_bins, binned_area, binned_rates = get_binned_rates(spk_rates, pupil_area, **kwargs)
  18. rate_bins = np.linspace(spk_rates.min(), spk_rates.max() + np.finfo('float').eps, kwargs['nbins'] + 1)
  19. # Fill pupil area rate matrix
  20. rate_mat = np.zeros((len(rate_bins) - 1, len(area_bins) - 1))
  21. for bin_i, rates in enumerate(binned_rates):
  22. rate_hist = np.histogram(rates, bins=rate_bins, density=True)[0] / (len(rate_bins) - 1)
  23. rate_mat[:, bin_i] += rate_hist
  24. return rate_bins, area_bins, rate_mat
  25. if __name__ == "__main__":
  26. parser = argparse.ArgumentParser()
  27. parser.add_argument('e_name')
  28. parser.add_argument('spk_type')
  29. args = parser.parse_args()
  30. df_pupil = load_data('pupil', [args.e_name])
  31. df_spikes = load_data('spikes', [args.e_name])
  32. df_spikes = filter_units(df_spikes, MINRATE)
  33. df = pd.merge(df_pupil, df_spikes, on=['m', 's', 'e'])
  34. seriess = []
  35. for idx, row in tqdm(df.iterrows(), total=len(df)):
  36. data = {
  37. 'm': row['m'],
  38. 's': row['s'],
  39. 'e': row['e'],
  40. 'u': row['u'],
  41. }
  42. if args.spk_type == 'ratio':
  43. row = times_to_counts(row, ['tonicspk', 'burstspk'], t0='pupil_tpts')
  44. row['ratio_counts'] = (row['burstspk_counts'] + np.finfo('float').eps) / row['tonicspk_counts']
  45. else:
  46. row = times_to_counts(row, [args.spk_type], t0='pupil_tpts')
  47. row = _resample_data(row, ['pupil_area'], t0='pupil_tpts')
  48. # Get pupil area gradient
  49. row['pupil_gradient'] = np.gradient(row['pupil_area'])
  50. spk_counts = row['{}_counts'.format(args.spk_type)]
  51. data['rate_max'] = spk_counts.max()
  52. data['rate_min'] = spk_counts.min()
  53. for var in ['area', 'gradient']:
  54. regressor = row['pupil_{}'.format(var)]
  55. data['{}_max'.format(var)] = regressor.max()
  56. data['{}_min'.format(var)] = regressor.min()
  57. if var == 'area':
  58. # Bin rates
  59. bin_min, bin_max = np.percentile(regressor, [2.5, 97.5])
  60. #bin_min, bin_max = regressor.min(), regressor.max()
  61. bins = np.linspace(bin_min, bin_max, NPUPILAREABINS + 1)
  62. elif var == 'gradient':
  63. bin_max = np.percentile(np.abs(regressor), 97.5)
  64. bins_neg = np.linspace(-1 * bin_max, 0, NPUPILAREABINS + 1)
  65. bins_pos = np.linspace(0, bin_max, NPUPILAREABINS + 1)
  66. bins = np.concatenate([bins_neg[:-1], bins_pos])
  67. binned_counts = sort_data(spk_counts, regressor, bins=bins)
  68. data[f'{var}_means'] = np.array([counts.mean() for counts in binned_counts])
  69. # ANOVA (linear OLS with categorical pupil size)
  70. digitized_regressor = np.digitize(regressor, bins=bins).clip(1, NPUPILAREABINS)
  71. df_anova = pd.DataFrame(np.column_stack([spk_counts, digitized_regressor]), columns=['count', 'bin'])
  72. anova_model = ols('count ~ C(bin)', data=df_anova).fit() # use formula to specify model
  73. data['{}_p'.format(var)] = anova_model.f_pvalue
  74. # Linear model (linear OLS with sorted categorical pupil size)
  75. mean_counts = np.array([counts.mean() for counts in binned_counts if len(counts) > 0])
  76. mean_counts.sort()
  77. y = mean_counts
  78. X = sm.add_constant(np.arange(len(mean_counts))) # use design matrix to specify model
  79. linear_model = sm.OLS(y, X).fit()
  80. intercept, slope = linear_model.params
  81. data['{}_slope'.format(var)] = slope
  82. """
  83. # Poisson GAM model for rates
  84. gam_model = PoissonGAM(spline_term(0, n_splines=NGAMSPLINES), fit_intercept=True)
  85. # Search over parameter controlling smoothness (lambda) for best fit
  86. gam_model.gridsearch(regressor[..., np.newaxis], spk_counts, keep_best=True, progress=False)
  87. data['{}_fit'.format(var)] = gam_model.predict(gam_model.generate_X_grid(term=0))
  88. data['{}_dev'.format(var)] = gam_model.score(regressor[..., np.newaxis], spk_counts)
  89. """
  90. # Get rate variance in pupil area bins
  91. binned_counts = sort_data(spk_counts, regressor, bins=bins)
  92. data['{}_var'.format(var)] = np.array([unbiased_variance(rates) for rates in binned_counts])
  93. seriess.append(pd.Series(data=data))
  94. df_tuning = pd.DataFrame(seriess)
  95. filename = 'sizetuning_{}_{}.pkl'.format(args.e_name, args.spk_type)
  96. df_tuning.to_pickle(DATAPATH + filename)