laura_busse
/
pupil_timescales_dLGN


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
							import argparse
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.svm import SVC
from sklearn.model_selection import RepeatedStratifiedKFold, StratifiedKFold, cross_val_score

from util import (load_data, get_trials, filter_units, get_psth, get_responses, circmean,
                  angle_subtract)
from phase_tuning import HHT
from parameters import DATAPATH, NIMFCYCLES, NSPIKES, MINRATE


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('e_name')
    args = parser.parse_args()

    NSPLITS = 5
    NPHASEBINS = 2
    PHASEBINS = np.linspace(-np.pi, np.pi, NPHASEBINS + 1)

    df_pupil = load_data('pupil', [args.e_name])
    df_trials = load_data('trials', [args.e_name])
    df_trials.rename(columns={'trial_on_time':'trial_on_times', 'trial_off_time':'trial_off_times'}, inplace=True)
    df_trials = df_trials.apply(get_trials, stim_id=0, axis='columns')
    df = pd.merge(df_pupil, df_trials).set_index(['m', 's', 'e'])

    df_spikes = load_data('spikes', [args.e_name]).set_index(['m', 's', 'e'])
    #df_spikes = df_spikes[df_spikes.index.isin(df_pupil.index)]
    df_spikes = filter_units(df_spikes, MINRATE)

    df_tuning = load_data('phasetuning', [args.e_name], tranges='noopto').set_index(['m', 's', 'e', 'u'])

    seriess = []
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        pupil_area = row['pupil_area']
        pupil_tpts = row['pupil_tpts']
        pupil_fs = 1 / np.diff(pupil_tpts).mean()

        # Get IMFs
        hht = HHT(pupil_area, pupil_fs)
        hht.emd()
        hht.hsa()
        hht.check_number_of_phasebin_visits(ncycles=NIMFCYCLES, remove_invalid=True)
        imf_freqs = hht.characteristic_frequency
        imf_phases = hht.phase.T

        trial_starts = row['trial_on_times']
        #trial_stops = row['trial_off_times']
        #trial_duration = (trial_stops - trial_starts).mean()
        trial_duration = 5
        trial_starts = trial_starts[(trial_starts + trial_duration) < pupil_tpts.max()]
        stim_labels = np.repeat(np.arange(NSPLITS), len(trial_starts))

        # Get units for this experiment
        try:
            df_units = df_spikes.loc[idx]
            df_units = df_units.reset_index().set_index(['m', 's', 'e', 'u'])
        except KeyError:
            print("Spikes missing for {}".format(idx))
            continue

        for u_idx, unit in df_units.iterrows():
            print(u_idx)

            for imf_i, phase in enumerate(imf_phases):
                # Skip if no phase tuning analysis was done for this unit
                df_unittuning = df_tuning.loc[u_idx].query('imf == %d' % (imf_i + 1))
                if len(df_unittuning) < 1:
                    continue
                print(imf_i)

                data = {
                    'm':        idx[0],
                    's':        idx[1],
                    'e':        idx[2],
                    'u':        u_idx[-1],
                    'imf':      imf_i + 1,
                    'freq':     imf_freqs[imf_i]
                    }

                phase_raster, raster_tpts = get_responses(trial_starts, phase, pupil_tpts, post=trial_duration)
                split_inds = np.linspace(0, len(raster_tpts), NSPLITS + 1).astype(int)
                # Mean phase for each stimulus segment
                phase_means = np.concatenate(([circmean(phase_raster[:, i0:i1], axis=1)[1] for i0, i1 in zip(split_inds[:-1], split_inds[1:])]))

                # Decode IMF phase using each spike type
                for spike_type in ['tonicspk', 'burst']:
                    print(spike_type)
                    # get raster for spike type and split into segments
                    spike_times = unit['%s_times' % spike_type]
                    if len(spike_times) <= NSPIKES:
                        continue
                    spike_raster, spike_tpts = get_psth(trial_starts, spike_times, post=trial_duration)
                    split_inds = np.linspace(0, len(spike_tpts), NSPLITS + 1).astype(int)
                    X = np.row_stack([spike_raster[:, i0:i1] for i0, i1 in zip(split_inds[:-1], split_inds[1:])])
                    # use tuning phase to set phase bins
                    tuning_phase = df_unittuning['%s_phase' % spike_type][0]
                    if np.isnan(tuning_phase):
                        continue
                    phase_shift = angle_subtract(tuning_phase, -1 *  np.pi / 2) - np.pi
                    phase_means_shifted = angle_subtract(phase_means, phase_shift) - np.pi
                    phase_labels = np.digitize(phase_means_shifted, bins=PHASEBINS)
                    phase_labels = phase_labels.clip(1, NPHASEBINS) - 1
                    if len(np.unique(phase_labels)) < 2:
                        raise RuntimeError
                    # predict phase bin
                    print("decoding phase")
                    classifier = SVC(kernel='rbf')
                    crossval = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)
                    scores = cross_val_score(classifier, X, phase_labels, cv=crossval)
                    data['%s_phase' % spike_type] = scores.mean()

                ## Decode stimulus across phase bins using all spike times
                spike_times = unit['spk_times']
                if len(spike_times) <= NSPIKES:
                    continue
                spike_raster, spike_tpts = get_psth(trial_starts, spike_times, post=trial_duration)
                #i0, i1 = spike_tpts.searchsorted([0, trial_duration])
                #spike_raster = spike_raster[:, i0:i1]
                #spike_tpts = spike_tpts[i0:i1]
                split_inds = np.linspace(0, len(spike_tpts), NSPLITS + 1).astype(int)
                X = np.row_stack([spike_raster[:, i0:i1] for i0, i1 in zip(split_inds[:-1], split_inds[1:])])
                # use tonic phase to set the phase bins
                tuning_phase = df_unittuning['tonicspk_phase'][0]
                if np.isnan(tuning_phase):
                    continue
                phase_shift = angle_subtract(tuning_phase, -1 *  np.pi / 2) - np.pi
                phase_means_shifted = angle_subtract(phase_means, phase_shift) - np.pi
                phase_labels = np.digitize(phase_means_shifted, bins=PHASEBINS)
                phase_labels = phase_labels.clip(1, NPHASEBINS) - 1
                if ((phase_labels == 0).mean() < 0.25) or ((phase_labels == 1).mean() < 0.25):
                    print("Phase split biased")
                    continue
                # split segments based on phase bin
                X1 = X[phase_labels.astype(bool)]
                y1 = stim_labels[phase_labels.astype(bool)]
                if len(np.unique(y1)) < 5:
                    raise RuntimeError
                X2 = X[~phase_labels.astype(bool)]
                y2 = stim_labels[~phase_labels.astype(bool)]
                if len(np.unique(y2)) < 5:
                    raise RuntimeError
                # train on phase bin 2 & test
                print("decoding stimulus, set 2")
                classifier = SVC(kernel='linear').fit(X2, y2)
                data['stim_train2_test2'] = classifier.score(X2, y2)
                data['stim_train2_test1'] = classifier.score(X1, y1)
                # train on the second phase bin & test
                print("decoding stimulus, set 1")
                classifier = SVC(kernel='linear').fit(X1, y1)
                data['stim_train1_test1'] = classifier.score(X1, y1)
                data['stim_train1_test2'] = classifier.score(X2, y2)
                # random
                splitter = RepeatedStratifiedKFold(n_splits=2, n_repeats=5, random_state=0)
                shf_diffs = np.full(10, np.nan)
                y = stim_labels
                for shf_i, (train, test) in enumerate(splitter.split(X, stim_labels)):
                    classifier = SVC(kernel='linear').fit(X[train], y[train])
                    shf_diffs[shf_i] = classifier.score(X[test], y[test]) - classifier.score(X[train], y[train])
                data['stim_testshf'] = shf_diffs

                seriess.append(pd.Series(data=data))
    df_decoding = pd.DataFrame(seriess)
    filename = 'imfdecoding_{}_norm.pkl'.format(args.e_name)
    df_decoding.to_pickle(DATAPATH + filename)