123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- import argparse
- import numpy as np
- import pandas as pd
- from tqdm import tqdm
- import statsmodels.api as sm
- from statsmodels.formula.api import ols
- from statsmodels.stats.anova import anova_lm
- #from pygam import PoissonGAM, LinearGAM, GammaGAM
- #from pygam import l as linear_term
- #from pygam import s as spline_term
- from parameters import DATAPATH, MINRATE, NPUPILAREABINS, NGAMSPLINES
- from util import load_data, unbiased_variance, times_to_counts, _resample_data, filter_units, sort_data
- def arousal_rate_matrix(spk_rates, pupil_area, **kwargs):
- """
- Get a matrix of firing rate histograms for different levels of arousal.
- """
- area_bins, binned_area, binned_rates = get_binned_rates(spk_rates, pupil_area, **kwargs)
- rate_bins = np.linspace(spk_rates.min(), spk_rates.max() + np.finfo('float').eps, kwargs['nbins'] + 1)
- # Fill pupil area rate matrix
- rate_mat = np.zeros((len(rate_bins) - 1, len(area_bins) - 1))
- for bin_i, rates in enumerate(binned_rates):
- rate_hist = np.histogram(rates, bins=rate_bins, density=True)[0] / (len(rate_bins) - 1)
- rate_mat[:, bin_i] += rate_hist
- return rate_bins, area_bins, rate_mat
- if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument('e_name')
- parser.add_argument('spk_type')
- args = parser.parse_args()
- df_pupil = load_data('pupil', [args.e_name])
- df_spikes = load_data('spikes', [args.e_name])
- df_spikes = filter_units(df_spikes, MINRATE)
- df = pd.merge(df_pupil, df_spikes, on=['m', 's', 'e'])
- seriess = []
- for idx, row in tqdm(df.iterrows(), total=len(df)):
- data = {
- 'm': row['m'],
- 's': row['s'],
- 'e': row['e'],
- 'u': row['u'],
- }
- if args.spk_type == 'ratio':
- row = times_to_counts(row, ['tonicspk', 'burstspk'], t0='pupil_tpts')
- row['ratio_counts'] = (row['burstspk_counts'] + np.finfo('float').eps) / row['tonicspk_counts']
- else:
- row = times_to_counts(row, [args.spk_type], t0='pupil_tpts')
- row = _resample_data(row, ['pupil_area'], t0='pupil_tpts')
- # Get pupil area gradient
- row['pupil_gradient'] = np.gradient(row['pupil_area'])
- spk_counts = row['{}_counts'.format(args.spk_type)]
- data['rate_max'] = spk_counts.max()
- data['rate_min'] = spk_counts.min()
- for var in ['area', 'gradient']:
- regressor = row['pupil_{}'.format(var)]
- data['{}_max'.format(var)] = regressor.max()
- data['{}_min'.format(var)] = regressor.min()
- if var == 'area':
- # Bin rates
- bin_min, bin_max = np.percentile(regressor, [2.5, 97.5])
- #bin_min, bin_max = regressor.min(), regressor.max()
- bins = np.linspace(bin_min, bin_max, NPUPILAREABINS + 1)
- elif var == 'gradient':
- bin_max = np.percentile(np.abs(regressor), 97.5)
- bins_neg = np.linspace(-1 * bin_max, 0, NPUPILAREABINS + 1)
- bins_pos = np.linspace(0, bin_max, NPUPILAREABINS + 1)
- bins = np.concatenate([bins_neg[:-1], bins_pos])
- binned_counts = sort_data(spk_counts, regressor, bins=bins)
- data[f'{var}_means'] = np.array([counts.mean() for counts in binned_counts])
- # ANOVA (linear OLS with categorical pupil size)
- digitized_regressor = np.digitize(regressor, bins=bins).clip(1, NPUPILAREABINS)
- df_anova = pd.DataFrame(np.column_stack([spk_counts, digitized_regressor]), columns=['count', 'bin'])
- anova_model = ols('count ~ C(bin)', data=df_anova).fit() # use formula to specify model
- data['{}_p'.format(var)] = anova_model.f_pvalue
- # Linear model (linear OLS with sorted categorical pupil size)
- mean_counts = np.array([counts.mean() for counts in binned_counts if len(counts) > 0])
- mean_counts.sort()
- y = mean_counts
- X = sm.add_constant(np.arange(len(mean_counts))) # use design matrix to specify model
- linear_model = sm.OLS(y, X).fit()
- intercept, slope = linear_model.params
- data['{}_slope'.format(var)] = slope
- """
- # Poisson GAM model for rates
- gam_model = PoissonGAM(spline_term(0, n_splines=NGAMSPLINES), fit_intercept=True)
- # Search over parameter controlling smoothness (lambda) for best fit
- gam_model.gridsearch(regressor[..., np.newaxis], spk_counts, keep_best=True, progress=False)
- data['{}_fit'.format(var)] = gam_model.predict(gam_model.generate_X_grid(term=0))
- data['{}_dev'.format(var)] = gam_model.score(regressor[..., np.newaxis], spk_counts)
- """
- # Get rate variance in pupil area bins
- binned_counts = sort_data(spk_counts, regressor, bins=bins)
- data['{}_var'.format(var)] = np.array([unbiased_variance(rates) for rates in binned_counts])
- seriess.append(pd.Series(data=data))
- df_tuning = pd.DataFrame(seriess)
- filename = 'sizetuning_{}_{}.pkl'.format(args.e_name, args.spk_type)
- df_tuning.to_pickle(DATAPATH + filename)
|