123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- import argparse
- import numpy as np
- import pandas as pd
- from tqdm import tqdm
- from sklearn.svm import SVC
- from sklearn.model_selection import RepeatedStratifiedKFold, StratifiedKFold, cross_val_score
- from util import (load_data, get_trials, filter_units, get_psth, get_responses, circmean,
- angle_subtract)
- from phase_tuning import HHT
- from parameters import DATAPATH, NIMFCYCLES, NSPIKES, MINRATE
- if __name__ == "__main__":
- parser = argparse.ArgumentParser()
- parser.add_argument('e_name')
- args = parser.parse_args()
- NSPLITS = 5
- NPHASEBINS = 2
- PHASEBINS = np.linspace(-np.pi, np.pi, NPHASEBINS + 1)
- df_pupil = load_data('pupil', [args.e_name])
- df_trials = load_data('trials', [args.e_name])
- df_trials.rename(columns={'trial_on_time':'trial_on_times', 'trial_off_time':'trial_off_times'}, inplace=True)
- df_trials = df_trials.apply(get_trials, stim_id=0, axis='columns')
- df = pd.merge(df_pupil, df_trials).set_index(['m', 's', 'e'])
- df_spikes = load_data('spikes', [args.e_name]).set_index(['m', 's', 'e'])
- #df_spikes = df_spikes[df_spikes.index.isin(df_pupil.index)]
- df_spikes = filter_units(df_spikes, MINRATE)
- df_tuning = load_data('phasetuning', [args.e_name], tranges='noopto').set_index(['m', 's', 'e', 'u'])
- seriess = []
- for idx, row in tqdm(df.iterrows(), total=len(df)):
- pupil_area = row['pupil_area']
- pupil_tpts = row['pupil_tpts']
- pupil_fs = 1 / np.diff(pupil_tpts).mean()
- # Get IMFs
- hht = HHT(pupil_area, pupil_fs)
- hht.emd()
- hht.hsa()
- hht.check_number_of_phasebin_visits(ncycles=NIMFCYCLES, remove_invalid=True)
- imf_freqs = hht.characteristic_frequency
- imf_phases = hht.phase.T
- trial_starts = row['trial_on_times']
- #trial_stops = row['trial_off_times']
- #trial_duration = (trial_stops - trial_starts).mean()
- trial_duration = 5
- trial_starts = trial_starts[(trial_starts + trial_duration) < pupil_tpts.max()]
- stim_labels = np.repeat(np.arange(NSPLITS), len(trial_starts))
- # Get units for this experiment
- try:
- df_units = df_spikes.loc[idx]
- df_units = df_units.reset_index().set_index(['m', 's', 'e', 'u'])
- except KeyError:
- print("Spikes missing for {}".format(idx))
- continue
- for u_idx, unit in df_units.iterrows():
- print(u_idx)
- for imf_i, phase in enumerate(imf_phases):
- # Skip if no phase tuning analysis was done for this unit
- df_unittuning = df_tuning.loc[u_idx].query('imf == %d' % (imf_i + 1))
- if len(df_unittuning) < 1:
- continue
- print(imf_i)
- data = {
- 'm': idx[0],
- 's': idx[1],
- 'e': idx[2],
- 'u': u_idx[-1],
- 'imf': imf_i + 1,
- 'freq': imf_freqs[imf_i]
- }
- phase_raster, raster_tpts = get_responses(trial_starts, phase, pupil_tpts, post=trial_duration)
- split_inds = np.linspace(0, len(raster_tpts), NSPLITS + 1).astype(int)
- # Mean phase for each stimulus segment
- phase_means = np.concatenate(([circmean(phase_raster[:, i0:i1], axis=1)[1] for i0, i1 in zip(split_inds[:-1], split_inds[1:])]))
- # Decode IMF phase using each spike type
- for spike_type in ['tonicspk', 'burst']:
- print(spike_type)
- # get raster for spike type and split into segments
- spike_times = unit['%s_times' % spike_type]
- if len(spike_times) <= NSPIKES:
- continue
- spike_raster, spike_tpts = get_psth(trial_starts, spike_times, post=trial_duration)
- split_inds = np.linspace(0, len(spike_tpts), NSPLITS + 1).astype(int)
- X = np.row_stack([spike_raster[:, i0:i1] for i0, i1 in zip(split_inds[:-1], split_inds[1:])])
- # use tuning phase to set phase bins
- tuning_phase = df_unittuning['%s_phase' % spike_type][0]
- if np.isnan(tuning_phase):
- continue
- phase_shift = angle_subtract(tuning_phase, -1 * np.pi / 2) - np.pi
- phase_means_shifted = angle_subtract(phase_means, phase_shift) - np.pi
- phase_labels = np.digitize(phase_means_shifted, bins=PHASEBINS)
- phase_labels = phase_labels.clip(1, NPHASEBINS) - 1
- if len(np.unique(phase_labels)) < 2:
- raise RuntimeError
- # predict phase bin
- print("decoding phase")
- classifier = SVC(kernel='rbf')
- crossval = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
- scores = cross_val_score(classifier, X, phase_labels, cv=crossval)
- data['%s_phase' % spike_type] = scores.mean()
- ## Decode stimulus across phase bins using all spike times
- spike_times = unit['spk_times']
- if len(spike_times) <= NSPIKES:
- continue
- spike_raster, spike_tpts = get_psth(trial_starts, spike_times, post=trial_duration)
- #i0, i1 = spike_tpts.searchsorted([0, trial_duration])
- #spike_raster = spike_raster[:, i0:i1]
- #spike_tpts = spike_tpts[i0:i1]
- split_inds = np.linspace(0, len(spike_tpts), NSPLITS + 1).astype(int)
- X = np.row_stack([spike_raster[:, i0:i1] for i0, i1 in zip(split_inds[:-1], split_inds[1:])])
- # use tonic phase to set the phase bins
- tuning_phase = df_unittuning['tonicspk_phase'][0]
- if np.isnan(tuning_phase):
- continue
- phase_shift = angle_subtract(tuning_phase, -1 * np.pi / 2) - np.pi
- phase_means_shifted = angle_subtract(phase_means, phase_shift) - np.pi
- phase_labels = np.digitize(phase_means_shifted, bins=PHASEBINS)
- phase_labels = phase_labels.clip(1, NPHASEBINS) - 1
- if ((phase_labels == 0).mean() < 0.25) or ((phase_labels == 1).mean() < 0.25):
- print("Phase split biased")
- continue
- # split segments based on phase bin
- X1 = X[phase_labels.astype(bool)]
- y1 = stim_labels[phase_labels.astype(bool)]
- if len(np.unique(y1)) < 5:
- raise RuntimeError
- X2 = X[~phase_labels.astype(bool)]
- y2 = stim_labels[~phase_labels.astype(bool)]
- if len(np.unique(y2)) < 5:
- raise RuntimeError
- # train on phase bin 2 & test
- print("decoding stimulus, set 2")
- classifier = SVC(kernel='linear').fit(X2, y2)
- data['stim_train2_test2'] = classifier.score(X2, y2)
- data['stim_train2_test1'] = classifier.score(X1, y1)
- # train on the second phase bin & test
- print("decoding stimulus, set 1")
- classifier = SVC(kernel='linear').fit(X1, y1)
- data['stim_train1_test1'] = classifier.score(X1, y1)
- data['stim_train1_test2'] = classifier.score(X2, y2)
- # random
- splitter = RepeatedStratifiedKFold(n_splits=2, n_repeats=5, random_state=0)
- shf_diffs = np.full(10, np.nan)
- y = stim_labels
- for shf_i, (train, test) in enumerate(splitter.split(X, stim_labels)):
- classifier = SVC(kernel='linear').fit(X[train], y[train])
- shf_diffs[shf_i] = classifier.score(X[test], y[test]) - classifier.score(X[train], y[train])
- data['stim_testshf'] = shf_diffs
- seriess.append(pd.Series(data=data))
- df_decoding = pd.DataFrame(seriess)
- filename = 'imfdecoding_{}_norm.pkl'.format(args.e_name)
- df_decoding.to_pickle(DATAPATH + filename)
|