"""Collect rasters of responses for all LGN opto experiments as a function of stimulus type
(movies, gratings), run state, and opto state. Then, calculate various measures and save
results to DataFrames. Some variables (DataFrames and mseu string lists) can be optionally
saved to and loaded from .pickle files to avoid long and unnecessary recalculation times.

After connecting to server with `djd`, run this script at the IPython/Jupyter command line;
the `-i` runs scripts in IPython's namespace instead of an empty one:

`run -i main.py`

`pvmvis` (default), `ntsrmvis` or `negntsrmvis` can be provided as an `--exptype=` argument
to this script, to load and later plot PV-Cre (most of the paper), Ntsr1-Cre (Fig1S4),
or Ntsr negative (Fig1S5) data, respectively.

This script can also be run without a server connection, if you choose to load from pickles.
"""

import sys
import argparse

from numpy import pi
from numpy.random import choice
import scipy.stats
from scipy.stats import (linregress, ttest_rel, ttest_ind, ttest_1samp, wilcoxon,
                         ks_2samp)
from scipy.stats.mstats import gmean
from scipy.optimize import least_squares
#import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
from matplotlib.ticker import ScalarFormatter, FormatStrFormatter
import seaborn as sns
import pandas as pd

#from pycircstat.tests import kuiper

from collections import OrderedDict as odict

import expio

from djd.util import (seq2sql, intround, split_tranges, wrap_raster, issubset, key2mseustr,
                      mseustr2key, key2msestr)
from djd.plot import wintitle, simpletraster, cf, saveall, simple_plot_rfs
from djd.signal import (raster2psth, raster2freqcomp, sparseness, reliability, snr_baden2016,
                        get_psth_peaks_gac, get_trialmat, pairwisecorr, rfs_for_cdata)
from djd.model import fit_MUA_RFs
from djd.stats import vector_OSI, percentile_ci, rayleigh_test, niell_DSI
from djd.model import sum_of_gaussians
from djd.eye import i_clean

from util import (get_exps, fitmodel, desat, get_max_snr, ms2msstr,
                  msu2msustr, mseustr2msustr, mseustrs2msustrs, mseustr2mstr, mseustrs2mstrs,
                  mseustr2msestr, mseustrs2msestrs, findmse,
                  load, load_all, save, export2csv, axes_disable_scientific, linear_loss,
                  residual_rsquared)
#import rf # deprecated


## Define constants:

EXPTYPE = 'pvmvis' # pvmvis, ntsrmvis, negntsrmvis, pgnpvmvis
if __name__ == '__main__':
    # parse command-line arguments:
    parser = argparse.ArgumentParser(description='Calculate script for natfeedback paper')
    parser.add_argument("--exptype", help="pvmvis, ntsrmvis, negntsrmvis, pgnpvmvis")
    args = parser.parse_args()
    EXPTYPE = args.exptype or EXPTYPE
print('*** EXPTYPE: %r' % EXPTYPE)

PAPERPATH = '/home/mspacek/blab/natfb/paper'

VERBOSE = False # only used for gac at the moment

RATETHRESH = 0.01 # mean firing rate threshold for unit inclusion, across all trial types, Hz
SNRTHRESH = 0.015
#RSQTHRESH = 0.2
# p-value thresh for significance of each point's deviance from diagonal in meanrate, burst
# ratio, and reliability scatter plots:
SCATTERPTHRESH = 0.05
SGNF2ALPHA = {True: 1, False: 0.4}
# visual drive unit inclusion criteria for gratings, ignores st8 and opto:
VDNCRIT = 1 # min num points in tuning curve that are at least VDZCRIT sems away from spont rate
VDZCRIT = 2.5 # 1.95, 2.5, 3.2

## TODO: these need to be optimized for mouse instead of cat:
PSTHBASELINEMEDIANX = 2 # PSTH median multiplier to estimate baseline
PSTHPEAKMINTHRESH = 3 # PSTH peak detection threshold relative to estimated baseline, Hz

STIMTYPES = 'mvi', 'grt'
stimtype2axislabel = {'grt':'grating', 'mvi':'movie'}
# exclude 'clr' kind, since 'nat' is a superset of 'clr' anyway
#MVIKINDS = 'nat', 'pnk', 'shf'
MVIKINDS = 'nat',
ALLSTIMTYPES = MVIKINDS + ('grt',)
STIMTYPESPLUSALL = list(STIMTYPES) + ['mvi+grt'] # strings
STIMTYPESPLUSALLI = list(STIMTYPES) + [slice(None)] # for use as indices

# iterate over run states in this order:
ST8S = ['run', 'sit'] # excludes 'none'
ALLST8S = ['none', 'run', 'sit']
ST8COMBOS = [['none'], ['run', 'sit'], ['run', 'sit']]
OPTOCOMBOS = [[False, True], [False], [True]]
st82crit = odict((('run', '(run_speed > 1).mean() > 0.5'),
                  ('sit', '(run_speed < 0.25).mean() > 0.5'),
                  ('none', 'none')))
rungreen = '#' + ''.join([ format(val, '02x') for val in [66, 168, 117] ])
sitorange = '#' + ''.join([ format(val, '02x') for val in [235, 115, 9] ])
st82clr = {'run':rungreen, 'sit':sitorange, 'none':'black'}
st82alpha = {None:1, 'run':1, 'sit':0.5} # sitting is like suppression, so desaturate colour
st82clrmap = {'run':plt.cm.Greens, 'sit':plt.cm.Oranges, 'none':plt.cm.Greys}

# define example mseus and their colors:
green = '#007f00' # deep green
violet = '#9f3fff' # pure violet (7f00ff) is a little too dark
darkblue = '#000064' # control condition
optoblue = '#2a7fff' # light shade of blue for opto condition
black = '#000000'
asterisk = 5, 2, 0 # 5 sided, asterisk style, 0 deg rotation, has rounded corners though
DEFSZ = 50 # default scatter plot point size (open points)
exmpli2clr = {1:violet, 2:green, 3:optoblue} # example neurons 1, 2, 3
# use different marker shapes to help distinguish example points:
exmpli2mrk = {1:'x', 2:'+', 3:asterisk}
exmpli2sz = {1:60, 2:70, 3:60} # marker size
exmpli2lw = {1:2, 2:2, 3:2} # (marker) line width
# use large closed circles for all example points:
#exmpli2mrk = {1:'.', 2:'.', 3:'.'}
#EXMPLSZ = 70 # example scatter plot point size (closed points)
#exmpli2sz = {1:EXMPLSZ, 2:EXMPLSZ, 3:EXMPLSZ}
fig1exmpli = 1 # fig1 uses example neuron 1
fig5exmpli = 1 # fig5 uses example neuron 1
mvimseu2exmpli = {'PVCre_2018_0003_s03_e03_u51':1, # example neuron 1
                 #'PVCre_2018_0003_s03_e03_u52',
                  'PVCre_2017_0008_s12_e06_u56':2, # example neuron 2
                 #'PVCre_2017_0008_s09_e07_u22',
                 }
grtmseu2exmpli = {'PVCre_2018_0003_s03_e02_u51':1, # example neuron 1
                 #'PVCre_2018_0003_s03_e02_u52',
                 #'PVCre_2018_0003_s02_e02_u11',
                 #'PVCre_2018_0003_s02_e02_u38':
                 #'PVCre_2018_0001_s02_e06_u64':
                 #'PVCre_2018_0001_s02_e06_u61':
                 #'PVCre_2018_0001_s02_e06_u64':
                 #'PVCre_2018_0001_s05_e02_u16':3, # old example neuron 3
                 'PVCre_2018_0003_s05_e02_u164':3, # new example neuron 3 (2019-05-31)
                 }

mvimsu2exmpli = { mseustr2msustr(k):v for k, v in mvimseu2exmpli.items() }
grtmsu2exmpli = { mseustr2msustr(k):v for k, v in grtmseu2exmpli.items() }
msu2exmpli = {**mvimsu2exmpli, **grtmsu2exmpli} # merge the two

burstclr = 'red'

OPTOS = False, True
opto2tuni = {False:0, True:1} # for indexing into tun_params field in Tuning table
opto2fb = {None:'', False:'feedback', True:'suppression'}
opto2clr = {None:darkblue, False:darkblue, True:optoblue}
opto2alpha = {None:1, False:1, True:0.5} # with opto, feedback is removed, so desaturate colour

modmeasures = ['meanrate', 'meanrate02', 'meanrate35', 'blankmeanrate', 'blankcondmeanrate',
               'meanburstratio', 'blankmeanburstratio', 'blankcondmeanburstratio',
               'spars', 'rel', 'meanpkw', 'snr']
modmeasuresnoblankcond = modmeasures.copy()
modmeasuresnoblankcond.remove('blankcondmeanrate')
modmeasuresnoblankcond.remove('blankcondmeanburstratio')
modmeasuresnoblank = modmeasuresnoblankcond.copy()
modmeasuresnoblank.remove('blankmeanrate')
modmeasuresnoblank.remove('blankmeanburstratio')
measure2axislabel = {'meanrate':'FR', 'meanrate02':'FR', 'meanrate35':'FR',
                     'blankmeanrate':'FR', 'blankcondmeanrate':'FR',
                     'meanburstratio':'burst ratio', 'blankmeanburstratio':'burst ratio',
                     'blankcondmeanburstratio':'burst ratio',
                     'spars':'sparseness', 'rel':'reliability',
                     'meanpkw':'mean peak width', 'snr':'SNR'}
measure2axisunits = {'meanrate':' (spk/s)', 'meanrate02':' (spk/s)', 'meanrate35':' (spk/s)',
                     'meanpkw':' (s)'}
short2longaxislabel = {'FR':'Firing rate'}

# restrict FiringPattern to our usual burst definition from Lu et al, 1992:
BURSTCRITERION = ('(fp_dtsilent BETWEEN 0.099 and 0.101) AND '
                  '(fp_dtburst BETWEEN 0.0039 AND 0.0041)')
fmodes = ['', 'nb', 'b', 'nr'] # (narrow) firing modes
fmode2txt = {'':'all', 'nb':'nonburst', 'b':'burst', 'nr':'nonrand'}

# Tuning table keys used for gratings:
ivs_order = 'grat_orientation, opto1'
tun_model = 'sum_of_gaussians'

model = 'threshlin' # linear or threshlin
relaverage = 'mean' # reliability averaging method: mean or median

DEFAULTFIGURESIZE = 2.8, 2.8 # normal
DUMBBELLFIGSIZE = 3.3, 4.4 # dumbbell plots
RASTERSCALEX = 0.75
OFFSETS = -0.5, 0.75 # wide raster time offsets, sec
OFFSETS02 = 0, -3 # limits to spikes from t=0 to 2 s, assuming 5 sec movies
OFFSETS35 = 3, 0 # limits to spikes from t=3 to 5 s
NTRIALSMVIGRT = 120 # limit movies to first 120 trials for more direct comparison with gratings

PSTHHEIGHT = 3
binw, tres, kernel = 0.02, 0.001, 'gauss' # for calculating PSTHs, sec
psthplottres = 0.010 # sec
ssx = intround(psthplottres / tres) # subsample factor to get the desired plot tres

MINNTRIALSAMPLETHRESH = 10

# ON/OFF/transient classification:
PDT = 0.050 # propagation delay time for movie onset (s)
WINDT = 0.20 # duration separating start of pre and PDT, and PDT and end of post (s)
EXCLTR = 0, 2*PDT # time range around PDT to treat as transient response
assert EXCLTR[0] <= PDT <= EXCLTR[1]

## Query the user:

msg = ('Constants defined. What next?\n'
       '0: calculate from scratch, including fits\n'
       '1: calculate from scratch, but load fits from pickle\n'
       '2: load from pickles and exit\n'
       '3: load from pickles and assume connected\n'
       '>> ')
response = int(input(msg))
# load fits from pickle? save heaps of CPU time:
LOADFITSFROMPICKLE = False if response == 0 else True
if response in [2, 3]:
    subfolder = EXPTYPE if EXPTYPE != 'pvmvis' else ''
    name2val = load_all(subfolder=subfolder)
    locals().update(name2val)
    if response == 2:
        sys.exit()


## Collect series, experiments, units from database:

'''
# all single movie opto experiments in LGN that have been sorted:
nats = ((e & 'e_name LIKE "MAS_%%"' & 'e_name NOT LIKE "%%+%%"' & 'e_optowl = 470')
        & (s & 's_depth > 2500' & 's_depth < 3500') # upper limit excludes TRN series
        & u)

# all pink noise opto experiments in LGN that have been sorted:
pnks = ((e & 'e_name LIKE "MAS_%%PNK%%"' & 'e_optowl = 470')
        & (s & 's_depth > 2500' & 's_depth < 3500')
        & u)
'''

"""
PVCre mice:
    * PVCre_2018_0002_s04 - good RFs, OK tuning, excellent opto
    ** PVCre_2019_0001_s05 - excellent RFs, OK tuning, good opto
        - make sure to exclude e01 and e02 due to seizure between e02 and e03
    ** PVCre_2019_0001_s06 - excellent RFs and tuning, OK opto, started by Davide
        - sorted: single unit RFs are quite bad though, excluding
        - this mouse's pupil pointed very high under visible light illumination, in the dark
          with dilated pupil, it pointed forward normally. Another good reason to exclude
    - PVCre_2019_0001_s07 - excellent RFs and tuning, very weak opto
    * PVCre_2019_0001_s08 - excellent RFs and tuning, OK opto. stained with DiI
    * PVCre_2019_0002_s05 - excellent RFs and tuning, good opto
    * PVCre_2019_0002_s06 - excellent RFs, OK tuning, excellent opto
    * PVCre_2019_0002_s07 - excellent RFs, good tuning and opto
    ** PVCre_2019_0002_s08 - excellent RFs, tuning and opto! stained with DiI
        - sorted, single unit RFs are good

As of 2018-12-13, both PVCre_2018_0010_s02 and s05 are also excluded via updated
's_region' values. Both were intended to be in dLGN, but may have somehow missed it, based on
weird RFs

Ntsr1cre mice:
    * Ntsr1Cre_2019_0002_s03: sorted, mov rasters look wrong (bad sorting? asleep mouse?),
      100 trials/cond, no running, but opto effects seem consistent
        - check itracking movies, resort this one
    - Ntsr1Cre_2019_0002_s04: no mov
    * Ntsr1Cre_2019_0002_s05: sorted but only 3 units, only 2 have mov PSTH peaks, ~ consistent
    - Ntsr1Cre_2019_0003: no mov
    - Ntsr1Cre_2019_0004 has big cortical hole:
    - Ntsr1Cre_2019_0004_s02: nice mov PSTHs, but no opto effect at all for mov or grt
    - Ntsr1Cre_2019_0004_s03: ??
    ** Ntsr1Cre_2019_0007_s04 - good
    - Ntsr1Cre_2019_0007_s05 - suspicious about if it's really LGN, SU results look poor
    ** Ntsr1Cre_2019_0008_s05 - good
    - do older mice just for their grating data, do shell/core and other separation
"""

mvis = get_exps(locals(), EXPTYPE)

mviseries = s & mvis
mviunits = up & mvis
mvispikes = spk & mvis
if EXPTYPE == 'negntsrmvis': # force inclusion of a V1 movie recording
    mvieye = EyeIxtract() & [{'m':'Ntsr1Cre_2020_0001', 's':2, 'e':11}, # V1
                             {'m':'Ntsr1Cre_2020_0001', 's':4, 'e':10}] # dLGN
else:
    mvieye = EyeIxtract() & mvis
mvirun = rt & mvis

# fetch unique animals included in data set:
#np.unique(mviseries.fetch('m'))

## TODO: need to check which units are plausibly in dLGN, not all units on the shank will be!
## - this is mostly done ahead of time during spike sorting, MUA envelope RF analysis suggests
##   some single units have maxchans that are outside the chan range of significant RFs of
##   the population, though there are some caveats to that - maybe maxchan is too stringent,
##   or maybe it's possible for one or two single units to have visual responses even when
##   the population doesn't show much response to sparse noise

# all grating oritun opto experiments in LGN that have been sorted that occurred during the
# same series as movie experiments:

grts = ((e & 'e_name LIKE "%%oritun%%"' & 'e_optowl IN (465, 470)')
         & mviseries
         & u)

grtseries = mviseries & grts
# use the same units for gratings as in movies:
#grtunits = mviunits & grts
# use units active during a grating experiment, not necessarily the same units as in movies:
grtunits = up & grts
grtspikes = spk & grts
grttun = tun & grts

spons = ((e & 'e_name LIKE "%%spont%%"' & 'e_optowl IN (465, 470)')
          & mviseries
          & u)

# get all possible movie mseus, regardless of rate:
mvimseus = []
for mse in mvis.fetch(dj.key):
    uids = (up & mse).fetch('u')
    for uid in uids:
        mseu = mse.copy()
        mseu['u'] = uid
        mvimseus.append(mseu)

# get all possible grating mseus, regardless of rate:
grtmseus = []
for mse in grts.fetch(dj.key):
    uids = (up & mse).fetch('u')
    for uid in uids:
        mseu = mse.copy()
        mseu['u'] = uid
        grtmseus.append(mseu)
## TODO: grtmseu filtering by visual responsiveness should be done here, but would currently
## result in index errors in the big grtresp loop below, so it's done piecemeal in the loop:
#grtmseus = (grts * up).fetch(dj.key)
#grtmseus = (grts * up) & (vd.Units() & 'n_crit=1' & 'z_crit > 2.5')


# get all possible spontaneous mseus, regardless of rate:
sponmseus = []
for mse in spons.fetch(dj.key):
    uids = (up & mse).fetch('u')
    for uid in uids:
        mseu = mse.copy()
        mseu['u'] = uid
        sponmseus.append(mseu)


msefmt = '{m}_s{s:02}_e{e:02}'
msufmt = '{m}_s{s:02}_u{u:02}'
mseufmt = '{m}_s{s:02}_e{e:02}_u{u:02}'

mvimseustrs = [ mseufmt.format(**mseu) for mseu in mvimseus ]
grtmseustrs = [ mseufmt.format(**mseu) for mseu in grtmseus ]
sponmseustrs = [ mseufmt.format(**mseu) for mseu in sponmseus ]

mvimsustrs = [ msufmt.format(**unit) for unit in mviunits ]
grtmsustrs = [ msufmt.format(**unit) for unit in grtunits ]
mvigrtmsustrs = list(np.union1d(mvimsustrs, grtmsustrs)) # superset of the two lists

if response == 3:
    sys.exit()

## Calculate:

## collect movie rasters, meanrates, burst info, PSTHs, sparseness, reliability and PSTH
## peak info by mseu, movie kind, run state, and opto condition:
levels = [mvimseustrs, MVIKINDS, ALLST8S, OPTOS]
names = ['mseu', 'kind', 'st8', 'opto']
mi = pd.MultiIndex.from_product(levels, names=names)
columns = ['dt', 'optotrange', 'trialis',
           'raster', 'braster', 'nbraster', 'wraster', 'wbraster', 'wnbraster', # rasters
           'rates', 'rate02s', 'rate35s', 'blankrates', # single trial FRs
           'meanrate', 'meanrate02', 'meanrate35', 'blankmeanrate', # trial-averaged FRs
           'burstis', 'wburstis', 'burstratios', 'blankburstratios', # single trial BRs
           'meanburstratio', 'blankmeanburstratio', # trial-averaged BRs
           'psth', 'bpsth', 'nbpsth', 'wpsth', 'wbpsth', 'wnbpsth', 't', 'wt', 'bins', 'wbins',
           'spars', 'rhos', 'rel', 'snr', 'pkts', 'pkws', 'meanpkw']
mviresp = pd.DataFrame(index=mi, columns=columns)
ntotpeaks = 0 # total number of peaks detected
for mvi in mvis:
    msestr = msefmt.format(**mvi)
    print(msestr)
    exp = mvi['e']
    moviecond = mov & mvi
    kind2stimis = moviecond.moviekind2stimis() # movie kind to stimi mapping
    trials = trial & mvi
    alltrialis = np.sort(trials.fetch('trial_id')) # trialis corresponding to rows in tranges
    #rates = ra & mvi
    uids = (up & mvi).fetch('u')
    # all trials, in trial ID order:
    rasters, tranges, _, opto, _ = (spk & mvi).get_rasters()
    wrasters, *_ = (spk & mvi).get_rasters(offsets=OFFSETS) # w = wide, same returned tranges
    rasters02, *_ = (spk & mvi).get_rasters(offsets=OFFSETS02) # from 0 to 2 s, same tranges
    rasters35, *_ = (spk & mvi).get_rasters(offsets=OFFSETS35) # from 3 to 5 s, same tranges
    wtranges = tranges.copy() + OFFSETS
    #tranges02 = tranges.copy() + OFFSETS02
    #tranges35 = tranges.copy() + OFFSETS35
    # make sure trial duration is very consistent:
    dts = tranges.ptp(axis=1)
    dt = dts.mean() # mean trial duration
    dtstd = dts.std()
    assert dtstd / dt < 0.005
    # make sure opto tranges are consistent relative to trial tranges:
    optotrialstimt0s = tranges[opto == True][:, 0]
    evtranges = (evtimes & mvi).fetch1('ev_tranges')
    alloptotranges = evtranges - np.reshape(optotrialstimt0s, (-1, 1))
    optotrange = alloptotranges.mean(axis=0)
    optotrangestd = alloptotranges.std(axis=0)
    assert (optotrangestd / optotrange < 0.005).all()
    # get tranges and rasters for just the period before stimulus onset during which opto
    # can be on, i.e. from optotrange[0] (-0.272 s) to 0 s before each stimulus onset:
    blankdt = abs(optotrange[0]) # averages to 0.283 s across all experiments
    print('mvi blankdt:', blankdt)
    blanktranges = np.zeros_like(tranges)
    blanktranges[:, 0] = tranges[:, 0] - blankdt
    blanktranges[:, 1] = tranges[:, 0]
    blankrasters, _, _, _, _ = (spk & mvi).get_rasters(tranges=blanktranges)
    mviopto = evcond & mvi & 'ev_chan="opto1"'
    mvioptostimis, mvioptovals = mviopto.fetch('stim_id', 'ec_val')
    # calculate burst ratios for all units:
    firingpatterns = fp & mvi & BURSTCRITERION
    brus = firingpatterns.burst_ratio(tranges=tranges)
    wbrus = firingpatterns.burst_ratio(tranges=wtranges)
    blankbrus = firingpatterns.burst_ratio(tranges=blanktranges)
    for kind, kindstimis in kind2stimis.items():
        print(kind)
        if not kindstimis or kind not in MVIKINDS: # exclude empty stimis and 'clr' kind
            continue
        kindtrials = (trials & 'stim_id IN %s' % seq2sql(kindstimis))
        kindtrialis = kindtrials.fetch('trial_id')
        trialiis = np.isin(alltrialis, kindtrialis) # bool with same shape as alltrialis
        # calculate narrow and wide PSTH bins for all units:
        tmin, tmax = 0, dt
        bins = split_tranges([(tmin, tmax)], binw, tres) # narrow
        midbins = bins.mean(axis=1) # narrow
        wbins = split_tranges([(tmin+OFFSETS[0], tmax+OFFSETS[1])], binw, tres) # wide
        wmidbins = wbins.mean(axis=1) # wide
        for st8, st8crit in st82crit.items():
            if st8crit == 'none': # 'none' is not in the State.Trial table
                st8trialis = alltrialis
            else:
                st8trialis = (st8t & mvi & {'st8_crit':st8crit}).fetch('trial_id')
            st8trialis = np.sort(st8trialis)
            for opto in OPTOS:
                optois = mvioptovals == opto
                # limit stimis by opto:
                optostimis = mvioptostimis[optois]
                # also limit stimis by movie kind:
                stimis = np.intersect1d(kindstimis, optostimis)
                if len(stimis) == 0:
                    continue
                optotrials = trials & 'stim_id IN %s' % seq2sql(optostimis)
                optotrialis = optotrials.fetch('trial_id')
                # intersect kind, st8 and opto trialis:
                trialis = np.intersect1d(kindtrialis, st8trialis)
                trialis = np.intersect1d(trialis, optotrialis)
                trialiis = np.isin(alltrialis, trialis) # bool with same shape as alltrialis
                if len(trialis) == 0:
                    print('%s %s %s has no trials, skipping' % (kind, st8, opto))
                    continue
                for uid in uids:
                    if uid not in rasters:
                        continue
                    mseu = {'m':mvi['m'], 's':mvi['s'], 'e':mvi['e'], 'u':uid}
                    mseustr = mseufmt.format(**mseu)
                    raster = rasters[uid][trialiis] # subset of raster trials
                    wraster = wrasters[uid][trialiis] # subset of wraster trials
                    blankraster = blankrasters[uid][trialiis] # subset of blankraster trials
                    raster02 = rasters02[uid][trialiis][:NTRIALSMVIGRT]
                    raster35 = rasters35[uid][trialiis][:NTRIALSMVIGRT]
                    rates = np.array([ len(trialspikes)/dt for trialspikes in raster ])
                    rate02s = np.full(rates.shape, np.nan) # init with full shape with NaN
                    rate35s = np.full(rates.shape, np.nan)
                    blankrates = np.full(rates.shape, np.nan)
                    # fill only the first n trials/blanks:
                    rate02s[:len(raster02)] = np.array([ len(trialspikes)/2 for trialspikes
                                                         in raster02 ])
                    rate35s[:len(raster35)] = np.array([ len(trialspikes)/2 for trialspikes
                                                         in raster35 ])
                    blankrates[:len(blankraster)] = np.array([ len(bspikes)/blankdt for bspikes
                                                               in blankraster ])
                    meanrate = len(np.hstack(raster)) / dt / len(raster)
                    #assert np.isclose(meanrate, rates.mean()) # sanity check
                    meanrate02 = len(np.hstack(raster02)) / 2 / len(raster02)
                    meanrate35 = len(np.hstack(raster35)) / 2 / len(raster35)
                    blankmeanrate = len(np.hstack(blankraster)) / blankdt / len(blankraster)
                    if meanrate < RATETHRESH:
                        print('%s %s %s %s does not meet RATETHRESH, skipping'
                              % (mseustr, kind, st8, opto))
                        continue
                    mviresprow = mviresp.loc[mseustr, kind, st8, opto]
                    # technically, this is the wrong approach, and could result in writing
                    # to a copy (an independent Series), which isn't the intention (see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy)
                    # but it seems to work because the mviresp DataFrame is being filled one
                    # row at a time, and wasn't initialized explicitly by setting everything
                    # to e.g. np.nan in a single vector operation
                    mviresprow['dt'] = dt
                    mviresprow['optotrange'] = optotrange
                    mviresprow['trialis'] = trialis
                    mviresprow['raster'] = raster # narrow raster
                    mviresprow['wraster'] = wraster # wide raster, including OFFSETS
                    mviresprow['rates'] = rates # based on narrow raster
                    mviresprow['rate02s'] = rate02s
                    mviresprow['rate35s'] = rate35s
                    mviresprow['blankrates'] = blankrates
                    mviresprow['meanrate'] = meanrate # based on narrow raster
                    mviresprow['meanrate02'] = meanrate02
                    mviresprow['meanrate35'] = meanrate35
                    mviresprow['blankmeanrate'] = blankmeanrate
                    # fill in the corresponding trial burst ratios:
                    for bru, wbru, blankbru in zip(brus, wbrus, blankbrus):
                        if bru['u'] == uid:
                            assert bru['m'] == mseu['m']
                            assert bru['s'] == mseu['s']
                            assert bru['e'] == mseu['e']
                            burstis = np.array(bru['burst_index'], # narrow, ragged array
                                               dtype=object)[trialiis]
                            wburstis = np.array(wbru['burst_index'], # wide
                                                dtype=object)[trialiis]
                            #blankburstis = np.array(blankbru['burst_index'],
                            #                        dtype=object)[trialiis]
                            burstratios = np.array(bru['burst_ratio'])[trialiis] # narrow
                            blankburstratios = np.array(blankbru['burst_ratio'])[trialiis]
                            mviresprow['burstis'] = burstis # narrow
                            mviresprow['wburstis'] = wburstis # wide, for plotting
                            mviresprow['burstratios'] = burstratios # narrow
                            mviresprow['blankburstratios'] = blankburstratios
                            mviresprow['meanburstratio'] = np.nanmean(burstratios)
                            mviresprow['blankmeanburstratio'] = np.nanmean(blankburstratios)
                            break # out of bru loop
                    # collect narrow burst and non-burst spike rasters:
                    braster, nbraster = [], []
                    for row, bis in zip(raster, burstis): # iterate over trials in raster
                        if len(bis) == 0:
                            bis = [] # none
                            nbis = slice(None) # all
                        else:
                            allis = np.arange(len(row)) # all spikes in this trial
                            nbis = np.setdiff1d(allis, bis)
                        braster.append(row[bis])
                        nbraster.append(row[nbis])
                    # collect wide burst and non-burst spike rasters:
                    wbraster, wnbraster = [], []
                    for row, bis in zip(wraster, wburstis): # iterate over trials in wraster
                        if len(bis) == 0:
                            bis = [] # none
                            nbis = slice(None) # all
                        else:
                            allis = np.arange(len(row)) # all spikes in this trial
                            nbis = np.setdiff1d(allis, bis)
                        wbraster.append(row[bis])
                        wnbraster.append(row[nbis])
                    braster  = np.array(braster, dtype=object) # ragged array
                    nbraster = np.array(nbraster, dtype=object)
                    wbraster = np.array(wbraster, dtype=object)
                    wnbraster = np.array(wnbraster, dtype=object)
                    mviresprow['braster'] = braster # narrow burst raster
                    mviresprow['nbraster'] = nbraster # narrow non-burst raster
                    mviresprow['wbraster'] = wbraster # wide burst raster
                    mviresprow['wnbraster'] = wnbraster # wide non-burst raster
                    # calculate narrow PSTHs:
                    psth = raster2psth(raster, bins, binw, tres, kernel)
                    bpsth = raster2psth(braster, bins, binw, tres, kernel)
                    nbpsth = raster2psth(nbraster, bins, binw, tres, kernel)
                    # calculate wide PSTHs:
                    wpsth = raster2psth(wraster, wbins, binw, tres, kernel)
                    wbpsth = raster2psth(wbraster, wbins, binw, tres, kernel)
                    wnbpsth = raster2psth(nbraster, wbins, binw, tres, kernel)
                    mviresprow['psth'] = psth # narrow PSTH
                    mviresprow['bpsth'] = bpsth # narrow burst PSTH
                    mviresprow['nbpsth'] = nbpsth # narrow non-burst PSTH
                    mviresprow['wpsth'] = wpsth # wide PSTH
                    mviresprow['wbpsth'] = wbpsth # wide burst PSTH
                    mviresprow['wnbpsth'] = wnbpsth # wide non-burst PSTH
                    mviresprow['t'] = midbins # narrow PSTH timepoints
                    mviresprow['wt'] = wmidbins # wide PSTH timepoints
                    mviresprow['bins'] = bins # narrow PSTH bins
                    mviresprow['wbins'] = wbins # wide PSTH bins
                    # calculate PSTH sparseness on narrow PSTH:
                    spars = sparseness(psth)
                    mviresprow['spars'] = spars
                    # collect single trial signals from narrow PSTH, calc response reliability:
                    trialsignals = []
                    for trialspiketimes in raster:
                        trialsignal = raster2psth([trialspiketimes], bins, binw, tres, kernel)
                        trialsignals.append(trialsignal)
                    trialsignals = np.vstack(trialsignals)
                    rel, rhos = reliability(trialsignals, average=relaverage)
                    if not np.isnan(rel):
                        assert rel > -0.05 # can come out weakly negative
                    mviresprow['rhos'] = rhos
                    mviresprow['rel'] = max(rel, 0) # clamp weakly negative values to 0
                    mviresprow['snr'] = snr_baden2016(trialsignals)
                    # detect peaks in narrow PSTH:
                    baseline = PSTHBASELINEMEDIANX * np.median(psth)
                    thresh = baseline + PSTHPEAKMINTHRESH # peak detection threshold
                    spikets = np.sort(np.concatenate(raster)) # flatten raster spike times
                    peakis, lis, ris = get_psth_peaks_gac(spikets, midbins, psth, thresh,
                                                          verbose=VERBOSE)
                    if VERBOSE:
                        print() # new line
                    npeaks = len(peakis)
                    if npeaks > 0:
                        mviresprow['pkts'] = peakis * tres # PSTH peak times (s)
                        pkws = (ris - lis) * tres # PSTH peak widths (s)
                        mviresprow['pkws'] = pkws
                        mviresprow['meanpkw'] = pkws.mean() # mean PSTH peak width (s)
                        ## TODO: use the peakis themselves for some other kind of measure,
                        ## maybe plot distributions of peaks relative to stimulus start, or
                        ## interpeak intervals, e.g.
                        # use absence of PSTH peaks to decide which units/conditions to exclude?
                        # this would obviously only work for movies, but that's what mviresp
                        # is all about...
                        #if npeaks == 0:
                        #    nrnids[state].append(nid) # save nonresponsive nids by state
                        #    continue # this PSTH has no peaks, skip all subsequent measures
                        ntotpeaks += npeaks
print('Detected %d movie PSTH peaks' % ntotpeaks)


## collect grating meanrates, rasters, tuning, and burst info by mseu,
## run state, and opto condition:
levels = [grtmseustrs, ALLST8S, OPTOS]
names = ['mseu', 'st8', 'opto']
mi = pd.MultiIndex.from_product(levels, names=names)
columns = ['dt', 'optotrange', 'trialis',
           'raster', 'braster', 'nbraster', 'wraster', # rasters
           'rates', 'blankrates', 'blankcondrates', # single trial FRs
           'meanrate', 'blankmeanrate', 'blankcondmeanrate', # trial-averaged FRs
           'burstis', 'wburstis', 'burstratios', # single trial BRs
           'blankburstratios', 'blankcondburstratios', # single trial BRs
           'meanburstratio', 'blankmeanburstratio', 'blankcondmeanburstratio', # trial-averaged BRs
           'stimis', 'oris', 'tfreq', 'tun', 'tunrsq']
grtresp = pd.DataFrame(index=mi, columns=columns)
for grt in grts:
    msestr = msefmt.format(**grt)
    print(msestr)
    exp = grt['e']
    alltrials = trial & grt # all trials, including blanks
    # all stimis and trialis, in stimis order:
    allstimis, alltrialis = alltrials.fetch('stim_id', 'trial_id')
    # map triali to stimi for all trials:
    triali2stimi = { triali:stimi for stimi, triali in zip(allstimis, alltrialis) }
    alltrialis = np.sort(alltrialis) # trialis corresponding to rows in tranges
    trials = alltrials & (grat & 'grat_contrast != 0.0') # filter out blank trials
    blankcondtrials = alltrials & (grat & 'grat_contrast = 0.0') # blank trials only
    uids = (up & grt).fetch('u')
    # all trials, in trial ID (temporal) order:
    rasters, tranges, _, opto, _ = (spk & grt).get_rasters()
    wrasters, *_ = (spk & grt).get_rasters(offsets=OFFSETS) # w = wide, same returned tranges
    wtranges = tranges.copy() + OFFSETS
    # make sure opto tranges are consistent relative to trial tranges:
    optotrialstimt0s = tranges[opto == True][:, 0]
    evtranges = (evtimes & grt).fetch1('ev_tranges')
    alloptotranges = evtranges - np.reshape(optotrialstimt0s, (-1, 1))
    optotrange = alloptotranges.mean(axis=0)
    optotrangestd = alloptotranges.std(axis=0)
    assert (optotrangestd / optotrange < 0.005).all()
    # get tranges and rasters for just the period before stimulus onset during which opto
    # can be on, i.e. from optotrange[0] to 0 s before each stimulus onset:
    blankdt = abs(optotrange[0]) # averages to 0.285 s across all experiments, as in movies
    print('grt blankdt:', blankdt)
    blanktranges = np.zeros_like(tranges)
    blanktranges[:, 0] = tranges[:, 0] - blankdt
    blanktranges[:, 1] = tranges[:, 0]
    blankrasters, _, _, _, _ = (spk & grt).get_rasters(tranges=blanktranges)
    grtopto = evcond & grt & 'ev_chan="opto1"'
    grtoptostimis, grtoptovals = grtopto.fetch('stim_id', 'ec_val')
    # calculate burst ratios for all units:
    firingpatterns = fp & grt & BURSTCRITERION
    brus = firingpatterns.burst_ratio(tranges=tranges)
    wbrus = firingpatterns.burst_ratio(tranges=wtranges)
    blankbrus = firingpatterns.burst_ratio(tranges=blanktranges)
    # make sure trial duration is very consistent:
    dts = tranges.ptp(axis=1)
    dt = dts.mean() # mean trial duration, for all trials (non-blank and blank)
    dtstd = dts.std()
    assert dtstd / dt < 0.005
    # make sure opto tranges are consistent relative to trial tranges:
    optotrialstimt0s = tranges[opto == True][:, 0]
    evtranges = (evtimes & grt).fetch1('ev_tranges')
    alloptotranges = evtranges - np.reshape(optotrialstimt0s, (-1, 1))
    optotrange = alloptotranges.mean(axis=0)
    optotrangestd = alloptotranges.std(axis=0)
    assert (optotrangestd / optotrange < 0.005).all()
    # make sure temporal freq is very consistent:
    tfreqs = (grat & grt).fetch('grat_temp_freq')
    tfreq = tfreqs.mean()
    tfreqstd = tfreqs.std()
    assert tfreqstd / tfreq < 0.005
    for st8, st8crit in st82crit.items():
        if st8crit == 'none': # 'none' is not in the State.Trial table
            st8trialis = alltrialis
        else:
            st8trialis = (st8t & grt & {'st8_crit':st8crit}).fetch('trial_id')
        st8trialis = np.sort(st8trialis)
        for opto in OPTOS:
            optois = grtoptovals == opto
            # limit stimis by opto:
            optostimis = grtoptostimis[optois]
            optotrials = trials & 'stim_id IN %s' % seq2sql(optostimis)
            blankcondoptotrials = blankcondtrials & 'stim_id IN %s' % seq2sql(optostimis)
            optotrialis = optotrials.fetch('trial_id')
            blankcondoptotrialis = blankcondoptotrials.fetch('trial_id')
            # intersect st8 and opto trialis:
            trialis = np.intersect1d(st8trialis, optotrialis)
            blankcondtrialis = np.intersect1d(st8trialis, blankcondoptotrialis)
            if len(trialis) == 0:
                print('%s %s has no trials, skipping' % (st8, opto))
                continue
            stimis = np.array([ triali2stimi[triali] for triali in trialis ]) # triali order
            oristimis, oris = (grat & grt).fetch('stim_id', 'grat_orientation')
            oris = np.hstack([ oris[oristimis == stimi] for stimi in stimis ]) # triali order
            stimsortis = stimis.argsort(kind='stable')
            trialis = trialis[stimsortis] # sorted by stimi
            stimis = stimis[stimsortis] # sorted by stimi
            oris = oris[stimsortis] # sorted by stimi
            for uid in uids:
                if uid not in rasters:
                    continue
                mseu = {'m':grt['m'], 's':grt['s'], 'e':grt['e'], 'u':uid}
                mseustr = mseufmt.format(**mseu)
                ## TODO: this should really be done once, where grtmseus are defined,
                ## but I don't want to break things right now (2019-07-15)
                if not (vd.Units() & mseu & ('n_crit=%d' % VDNCRIT)
                                          & ('z_crit > %f' % VDZCRIT)):
                    # not a visually responsive mseu (this query ignores st8 and opto)
                    print('%s is not visually responsive, skipping' % mseustr)
                    continue
                raster = rasters[uid][trialis] # subset of trials, sorted by stimi
                wraster = wrasters[uid][trialis] # subset of trials, sorted by stimi
                blankraster = blankrasters[uid][trialis] # subset of trials, sorted by stimi
                blankcondraster = rasters[uid][blankcondtrialis] # subset of blank condition trials
                rates = np.array([ len(trialspikes)/dt for trialspikes in raster ])
                blankrates = np.array([ len(bspikes)/blankdt for bspikes in blankraster ])
                assert len(blankrates) == len(rates) # should be same shape as rates
                blankcondrates = np.array([ len(bcspikes)/dt for bcspikes in blankcondraster ])
                meanrate = len(np.hstack(raster)) / dt / len(raster)
                #assert np.isclose(meanrate, rates.mean()) # sanity check
                blankmeanrate = len(np.hstack(blankraster)) / blankdt / len(blankraster)
                if len(blankcondraster) == 0:
                    blankcondmeanrate = np.nan
                else:
                    blankcondmeanrate = len(np.hstack(blankcondraster)) / dt / len(blankcondraster)
                if meanrate < RATETHRESH:
                    print('%s %s %s does not meet RATETHRESH, skipping'
                          % (mseustr, st8, opto))
                    continue
                grtresprow = grtresp.loc[mseustr, st8, opto]
                grtresprow['dt'] = dt
                grtresprow['optotrange'] = optotrange
                # overload 'trialis' name in df - includes non-blank trials and blank condition
                # trials, in that order:
                grtresprow['trialis'] = np.concatenate([trialis, blankcondtrialis])
                grtresprow['raster'] = raster # sorted by stimi
                grtresprow['wraster'] = wraster
                grtresprow['rates'] = rates
                grtresprow['blankrates'] = blankrates
                grtresprow['blankcondrates'] = blankcondrates
                grtresprow['meanrate'] = meanrate
                grtresprow['blankmeanrate'] = blankmeanrate
                grtresprow['blankcondmeanrate'] = blankcondmeanrate
                grtresprow['stimis'] = stimis # one entry per row in raster
                grtresprow['oris'] = oris # one entry per row in raster
                grtresprow['rates'] = rates
                grtresprow['tfreq'] = tfreq
                tunrow = (tun & mseu & {'ivs_order':ivs_order, 'tun_model':tun_model,
                                        'st8_crit':st8crit})
                if tunrow: # not empty
                    tunparams = tunrow.fetch1('tun_pars') # 2x5 array of tuning params
                    tunrsq = tunrow.fetch1('tun_rsq') # len 2 array of R2 values
                    tunspon = tunrow.fetch1('tun_spon_mean') # len 2 array of spon activity
                    oti = opto2tuni[opto] # opto Tuning index
                    params = list(tunparams[oti]) + [(tunspon[oti])]
                    grtresprow['tun'] = params # len 6 array: 5 tuning params + spon
                    """[dp, rp, rn, r0, sigma, spon]:
                        oripref, gauss ampl @ oripref, gauss ampl @ antipref, gauss offset,
                        gauss width, spon level
                    so index 1 wrt 3 and 5 is a measure of ori tuning strength"""
                    grtresprow['tunrsq'] = tunrsq[oti]
                else:
                    print('WARNING: empty Tuning row for %s, %s' % (mseu, st8crit))
                # fill in the corresponding trial burst ratios:
                for bru, wbru, blankbru in zip(brus, wbrus, blankbrus):
                    if bru['u'] == uid:
                        assert bru['m'] == mseu['m']
                        assert bru['s'] == mseu['s']
                        assert bru['e'] == mseu['e']
                        burstis = np.array(bru['burst_index'], # narrow, ragged array
                                           dtype=object)[trialis]
                        wburstis = np.array(wbru['burst_index'], # wide
                                            dtype=object)[trialis]
                        burstratios = np.array(bru['burst_ratio'])[trialis] # narrow
                        blankburstratios = np.array(blankbru['burst_ratio'])[trialis]
                        blankcondburstratios = np.array(bru['burst_ratio'])[blankcondtrialis]
                        grtresprow['burstis'] = burstis # narrow
                        grtresprow['wburstis'] = wburstis # wide, for plotting
                        grtresprow['burstratios'] = burstratios # narrow
                        grtresprow['blankburstratios'] = blankburstratios # narrow
                        grtresprow['blankcondburstratios'] = blankcondburstratios # narrow
                        # nanmean can raise "Mean of empty slice" warnings if argument
                        # is empty or is all nan, but that's OK, returns a nan in both cases:
                        grtresprow['meanburstratio'] = np.nanmean(burstratios)
                        grtresprow['blankmeanburstratio'] = np.nanmean(blankburstratios)
                        if len(blankcondburstratios) == 0:
                            grtresprow['blankcondmeanburstratio'] = np.nan
                        else:
                            grtresprow['blankcondmeanburstratio'] = np.nanmean(blankcondburstratios)
                        break # out of bru loop
                # collect narrow burst and non-burst spike rasters:
                braster, nbraster = [], []
                for row, bis in zip(raster, burstis): # iterate over trials in raster
                    if len(bis) == 0:
                        bis = [] # none
                        nbis = slice(None) # all
                    else:
                        allis = np.arange(len(row)) # all spikes in this trial
                        nbis = np.setdiff1d(allis, bis)
                    braster.append(row[bis])
                    nbraster.append(row[nbis])
                braster = np.array(braster, dtype=object) # ragged array
                nbraster = np.array(nbraster, dtype=object)
                grtresprow['braster'] = braster # narrow burst raster
                grtresprow['nbraster'] = nbraster # narrow non-burst raster


## Generate grating ori tuning dataframe for saving to a .pickle, and for convenient
## offline tuning plots:
grttundf = grttun.fetch(format='frame')
grttuninfo = (TuningInfo() & grttun).fetch(format='frame') # contains stimulus oris
# convert separate m, s, e, u columns in grttundf to a single mseu column in a new temporary
# df without a MultiIndex:
grttunresprows = []
for rowindex, rowvalues in grttundf.iterrows():
    mid, sid, eid, ivs_order, tun_model, st8_type, st8_crit, uid = rowindex
    mean, sem, spon_mean, pars, rsq = (rowvalues['tun_mean'],
                                       rowvalues['tun_sem'],
                                       rowvalues['tun_spon_mean'],
                                       rowvalues['tun_pars'],
                                       rowvalues['tun_rsq'])
    mseustr = key2mseustr({'m':mid, 's':sid, 'e':eid, 'u':uid})
    ori = grttuninfo.loc[mid, sid, eid, ivs_order]['ti_axes'][:, 0]
    newrow = {'mseu':mseustr, 'ivs_order':ivs_order, 'tun_model':tun_model,
              'st8_type':st8_type, 'st8_crit':st8_crit,
              'ori':ori, 'mean':mean, 'sem':sem, 'spon_mean':spon_mean,
              'pars':pars, 'rsq':rsq}
    grttunresprows.append(newrow)
newindex = ['mseu', 'ivs_order', 'tun_model', 'st8_type', 'st8_crit']
grttunresp = pd.DataFrame(grttunresprows).set_index(newindex)


## collect spontaneous meanrates and meanburstratios by mseu and opto cond,
## ignore run st8 for now:
levels = [sponmseustrs, OPTOS]
names = ['mseu', 'opto']
mi = pd.MultiIndex.from_product(levels, names=names)
columns = ['meanrate', 'meanburstratio']
sponresp = pd.DataFrame(index=mi, columns=columns)
for spon in spons:
    msestr = msefmt.format(**spon)
    print(msestr)
    #exp = spon['e']
    optotranges = (evtimes & spon).fetch1('ev_tranges')
    # ensure that intervals between end and start of consecutive opto pulses are at least
    # as long as the pulse length:
    optodts = np.diff(optotranges, axis=1) # opto pulse durations (sec)
    maxoptodt = optodts.max() # sec
    isitranges = np.vstack([optotranges[:-1, 0], optotranges[1:, 1]]).T # shape: npulses-1, 2
    assert np.diff(isitranges, axis=1).min() > maxoptodt # sec
    # use one opto pulse duration before each pulse as the control trange for that pulse:
    ctrltranges = optotranges - optodts
    ctrldts = np.diff(ctrltranges, axis=1) # control trange durations (sec)
    spkspon = spk & spon
    fpspon = fp & spon & BURSTCRITERION
    optorasters, _, _, optovals, _ = spkspon.get_rasters(optotranges, offsets=[0, 0])
    ctrlrasters, _, _, ctrlvals, _ = spkspon.get_rasters(ctrltranges, offsets=[0, 0])
    optobrus = fpspon.burst_ratio(tranges=optotranges)
    ctrlbrus = fpspon.burst_ratio(tranges=ctrltranges)
    assert optovals.all() == True
    assert ctrlvals.all() == False
    assert list(optorasters) == list(ctrlrasters) # should have exact same set of units
    uids = list(optorasters)
    opto2rasters = {False:ctrlrasters, True:optorasters}
    opto2brus = {False:ctrlbrus, True:optobrus}
    opto2totaldt = {False:ctrldts.sum(), True:optodts.sum()}
    for opto in OPTOS:
        rasters = opto2rasters[opto]
        brus = opto2brus[opto]
        totaldt = opto2totaldt[opto] # sec
        for uid, bru in zip(uids, brus):
            trialspikets = np.concatenate(rasters[uid]) # concatenate spikes from all "trials"
            mseu = {'m':spon['m'], 's':spon['s'], 'e':spon['e'], 'u':uid}
            mseustr = mseufmt.format(**mseu)
            sponresprow = sponresp.loc[mseustr, opto]
            nspikes = len(trialspikets)
            meanrate = nspikes / totaldt
            sponresprow['meanrate'] = meanrate
            assert bru['u'] == uid # sanity check
            burstratios = bru['burst_ratio'] # one per "trial"
            meanburstratio = np.nanmean(burstratios)
            sponresprow['meanburstratio'] = meanburstratio


## find best movie and grating response parameters per unit, across experiments:
mvilevels = [mvimsustrs, MVIKINDS, ALLST8S, OPTOS]
grtlevels = [grtmsustrs, ALLST8S, OPTOS]
mvinames = ['msu', 'kind', 'st8', 'opto']
grtnames = ['msu', 'st8', 'opto']
mvimi = pd.MultiIndex.from_product(mvilevels, names=mvinames)
grtmi = pd.MultiIndex.from_product(grtlevels, names=grtnames)
bestmvicolumns = modmeasuresnoblankcond
bestgrtcolumns = ['meanrate', 'blankmeanrate', 'meanburstratio', 'blankmeanburstratio']
bestmviresp = pd.DataFrame(index=mvimi, columns=bestmvicolumns)
bestgrtresp = pd.DataFrame(index=grtmi, columns=bestgrtcolumns)
stimtype2resp = {'mvi':mviresp, 'grt':grtresp}
stimtype2bestresp = {'mvi':bestmviresp, 'grt':bestgrtresp}
for stimtype in STIMTYPES:
    resp, bestresp = stimtype2resp[stimtype], stimtype2bestresp[stimtype]
    columns = bestresp.columns
    for index, row in resp.iterrows():
        mseu = index[0]
        msu = mseustr2msustr(mseu)
        bestindex = msu, *index[1:]
        for column in columns:
            newval = row[column]
            oldbestval = bestresp[column][bestindex]
            # the biggest value isn't always the best, e.g. lower meanpkw values are better:
            if column in ['meanpkw']:
                if pd.isna(oldbestval) or (newval < oldbestval):
                    bestresp[column][bestindex] = newval # overwrite
            else:
                if pd.isna(oldbestval) or (newval > oldbestval):
                    bestresp[column][bestindex] = newval # overwrite


## collect movie model fits by kind, state and opto:
#levels = [mvimseustrs, MVIKINDS, ALLST8S]
levels = [mvimseustrs, ['nat'], ['none']]
names = ['mseu', 'kind', 'st8']
mi = pd.MultiIndex.from_product(levels, names=names)
fitcolumns = ['m', 'th', 'rsq', 'nbm', 'nbth', 'nbrsq', 'bm', 'bth', 'brsq']
sampfitcolumns = ['ms', 'ths', 'rsqs', 'nbms', 'nbths', 'nbrsqs', 'bms', 'bths', 'brsqs',
                  'nrms', 'nrths', 'nrrsqs']
fits = pd.DataFrame(index=mi, columns=fitcolumns)
sampfits = pd.DataFrame(index=mi, columns=sampfitcolumns)
nsamples = 1000
np.random.seed(0) # fix random seed for identical results from np.random.choice() on each run
if LOADFITSFROMPICKLE:
    print('Loading fits')
    fits = load('fits')
    sampfits = load('sampfits')
    nsamples = len(sampfits[sampfits['ms'].notnull()]['ms'].iloc[0]) # convoluted, but works
    mseustrfititerator = []
else:
    print('Calculating fits')
    mseustrfititerator = mvimseustrs
for mseustr in mseustrfititerator:
    for kind in ['nat']: #MVIKINDS:
        for st8 in ['none']: #ALLST8S:
            print(mseustr, kind, st8)
            psth = mviresp.loc[mseustr, kind, st8]['psth']
            if psth.isna().any(): # test both ctrl and opto
                continue
            # init sampfits lists:
            sampfits.loc[mseustr, kind, st8] = [], [], [], [], [], [], [], [], [], [], [], []
            # iterate over (narrow) firing modes (all, non-burst, burst, non-rand):
            for fmode in fmodes:
                # first do single fit of all relevant trials, fit and test data are
                # identical (no trial resampling):
                if fmode != 'nr': # don't bother filling nr column in the fits array
                    psth = mviresp.loc[mseustr, kind, st8][fmode+'psth']
                    ctrlpsth, optopsth = psth[False], psth[True]
                    mm, b, rsq = fitmodel(ctrlpsth, optopsth, ctrlpsth, optopsth,
                                          model=model)
                    th = -b / mm # threshold, i.e., x intercept
                    fits.loc[mseustr, kind, st8][fmode+'m'] = mm
                    fits.loc[mseustr, kind, st8][fmode+'th'] = th
                    fits.loc[mseustr, kind, st8][fmode+'rsq'] = rsq
                # now sample trials multiple times, separate into fit and test data:
                if fmode != 'nr':
                    ctrlraster = mviresp.loc[mseustr, kind, st8, False][fmode+'raster']
                    optoraster = mviresp.loc[mseustr, kind, st8, True][fmode+'raster']
                else: # fmode == 'nr'
                    # find number of burst spikes per trial and condition,
                    # remove that same number randomly from each trial in each condition:
                    ctrlraster, optoraster = [], []
                    ctrlbraster = mviresp.loc[mseustr, kind, st8, False]['braster']
                    optobraster = mviresp.loc[mseustr, kind, st8, True]['braster']
                    # find number of burst spikes per trial in both conditions:
                    bctrlcounts = [ len(ctrlbtrial) for ctrlbtrial in ctrlbraster ]
                    boptocounts = [ len(optobtrial) for optobtrial in optobraster ]
                    # build nr rasters from full rasters, trial by trial:
                    ctrlfullraster = mviresp.loc[mseustr, kind, st8, False]['raster']
                    optofullraster = mviresp.loc[mseustr, kind, st8, True]['raster']
                    for bctrlcount, ctrltrial in zip(bctrlcounts, ctrlfullraster):
                        nspikes = len(ctrltrial) - bctrlcount # num spikes to sample
                        resampctrltrial = choice(ctrltrial, size=nspikes, replace=False)
                        ctrlraster.append(resampctrltrial)
                    for boptocount, optotrial in zip(boptocounts, optofullraster):
                        nspikes = len(optotrial) - boptocount # num spikes to sample
                        resampoptotrial = choice(optotrial, size=nspikes, replace=False)
                        optoraster.append(resampoptotrial)
                    ctrlraster, optoraster = np.array(ctrlraster), np.array(optoraster)
                nctrltrials = len(ctrlraster) # typically 200 trials when st8 == 'none'
                noptotrials = len(optoraster) # typically 200 trials when st8 == 'none'
                for ntrials in [nctrltrials, noptotrials]:
                    if ntrials < MINNTRIALSAMPLETHRESH:
                        print('Not enough trials to sample:', mseustr, kind, st8, fmode)
                        continue # don't bother sampling, leave entry in sampfits empty
                ctrltrialis = np.arange(nctrltrials)
                optotrialis = np.arange(noptotrials)
                ctrlsamplesize = intround(nctrltrials / 2) # half for fitting, half for testing
                optosamplesize = intround(noptotrials / 2) # half for fitting, half for testing
                # probably doesn't matter if use ctrl or opto bins, should be the same:
                bins = mviresp.loc[mseustr, kind, st8, False]['bins']
                for samplei in range(nsamples):
                    # randomly sample half the ctrl and opto trials, without replacement:
                    ctrlfitis = np.sort(choice(ctrltrialis, size=ctrlsamplesize, replace=False))
                    optofitis = np.sort(choice(optotrialis, size=optosamplesize, replace=False))
                    ctrltestis = np.setdiff1d(ctrltrialis, ctrlfitis) # get the complement
                    optotestis = np.setdiff1d(optotrialis, optofitis) # get the complement
                    ctrlfitraster, ctrltestraster = ctrlraster[ctrlfitis], ctrlraster[ctrltestis]
                    optofitraster, optotestraster = optoraster[optofitis], optoraster[optotestis]
                    # calculate fit and test opto PSTHs, subsampled in time:
                    ctrlfitpsth = raster2psth(ctrlfitraster, bins, binw, tres, kernel)[::ssx]
                    optofitpsth = raster2psth(optofitraster, bins, binw, tres, kernel)[::ssx]
                    ctrltestpsth = raster2psth(ctrltestraster, bins, binw, tres, kernel)[::ssx]
                    optotestpsth = raster2psth(optotestraster, bins, binw, tres, kernel)[::ssx]
                    #if np.isnan(ctrlfitpsth).any() or np.isnan(optofitpsth).any():
                    #    continue
                    #assert not (np.isnan(ctrltestpsth).any() or np.isnan(optotestpsth).any())
                    mm, b, rsq = fitmodel(ctrlfitpsth, optofitpsth, ctrltestpsth, optotestpsth,
                                          model=model)
                    #if np.isnan(rsq) or rsq < RSQTHRESH: # poor fit
                    #    continue # skip this sample
                    th = -b / mm # threshold, i.e., x intercept
                    sampfits.loc[mseustr, kind, st8][fmode+'ms'].append(mm)
                    sampfits.loc[mseustr, kind, st8][fmode+'ths'].append(th)
                    sampfits.loc[mseustr, kind, st8][fmode+'rsqs'].append(rsq)


## FMI (feedback modulation index: (feedback - suppression) / (feedback + suppression)) and
## RMI (run modulation index: (run - sit) / (run + sit)) for each mseu,
## for nat mvi and grt experiments, for all measures.
# set up mviFMI DataFrame:
levels = [mvimseustrs, ALLST8S]
names = ['mseu', 'st8']
mi = pd.MultiIndex.from_product(levels, names=names)
mviFMI = pd.DataFrame(index=mi, columns=modmeasures)
# set up grtFMI DataFrame:
levels = [grtmseustrs, ALLST8S]
names = ['mseu', 'st8']
mi = pd.MultiIndex.from_product(levels, names=names)
grtFMI = pd.DataFrame(index=mi, columns=modmeasures) # lots of unused columns
# set up mviRMI DataFrame:
levels = [mvimseustrs, OPTOS]
names = ['mseu', 'opto']
mi = pd.MultiIndex.from_product(levels, names=names)
mviRMI = pd.DataFrame(index=mi, columns=modmeasures)
# set up grtRMI DataFrame:
levels = [grtmseustrs, OPTOS]
names = ['mseu', 'opto']
mi = pd.MultiIndex.from_product(levels, names=names)
grtRMI = pd.DataFrame(index=mi, columns=modmeasures)
stimtype2FMI = {'mvi': mviFMI, 'grt': grtFMI}
stimtype2RMI = {'mvi': mviRMI, 'grt': grtRMI}
for stimtype in STIMTYPES:
    if stimtype == 'mvi':
        resp = mviresp.xs('nat', level='kind') # keep only nat movie measures
    else:
        resp = grtresp
    fmidf = stimtype2FMI[stimtype]
    rmidf = stimtype2RMI[stimtype]
    for st8 in ALLST8S:
        for measure in modmeasures:
            if measure not in resp:
                continue
            measurevals = resp[measure][(slice(None), st8)] # mseustr, opto
            feedback = measurevals[(slice(None), False)] # mseustr
            suppress = measurevals[(slice(None), True)] # mseustr
            assert (feedback.dropna() >= 0).all() # negative values violate -1 <= FMI <= 1
            assert (suppress.dropna() >= 0).all()
            # filter out cases where measureval in both conditions is 0:
            keepis = (feedback != 0) | (suppress != 0)
            if stimtype == 'mvi': # also filter by SNRTHRESH:
                maxsnrs = resp['snr'][(slice(None), st8)].max(level='mseu') # max SNR per mseu
                keepis = keepis & (maxsnrs >= SNRTHRESH)
            feedback, suppress = feedback[keepis], suppress[keepis]
            fmis = (feedback - suppress) / (feedback + suppress) # indexed by mseustr
            for mseustr, fmi in zip(fmis.index.values, fmis):
                fmidf.loc[mseustr, st8][measure] = fmi # fill in the appropriate rows
    for opto in OPTOS:
        for measure in modmeasures:
            if measure not in resp:
                continue
            measurevals = resp[measure][(slice(None), slice(None), opto)] # mseustr, st8
            runvals = measurevals[(slice(None), 'run')] # mseustr
            sitvals = measurevals[(slice(None), 'sit')] # mseustr
            assert (runvals.dropna() >= 0).all() # negative values violate -1 <= RMI <= 1
            assert (sitvals.dropna() >= 0).all()
            # filter out cases where measureval in both conditions is 0:
            keepis = (runvals != 0) | (sitvals != 0)
            if stimtype == 'mvi': # also filter by SNRTHRESH:
                maxsnrs = resp['snr'][(slice(None), slice(None), opto)].max(level='mseu')
                keepis = keepis & (maxsnrs >= SNRTHRESH)
            runvals, sitvals = runvals[keepis], sitvals[keepis]
            rmis = (runvals - sitvals) / (runvals + sitvals) # indexed by mseustr
            for mseustr, rmi in zip(rmis.index.values, rmis):
                rmidf.loc[mseustr, opto][measure] = rmi # fill in the appropriate rows


## maxFMI and maxRMI for each unit (msu), for nat mvi and grt experiments, for all measures.
## For each stimtype+st8+measure+unit combination, take the abs max across experiments. Also
## store the argmax (i.e., the mseu). Among other things, this is for comparison
## of movie and grating responses.
# set up maxFMI DataFrame:
levels = [mvigrtmsustrs, ALLST8S, STIMTYPES]
names = ['msu', 'st8', 'stimtype']
mi = pd.MultiIndex.from_product(levels, names=names)
columns = ['mseu'] + modmeasures
maxFMI = pd.DataFrame(index=mi, columns=columns)
# set up maxRMI DataFrame:
levels = [mvigrtmsustrs, OPTOS, STIMTYPES]
names = ['msu', 'opto', 'stimtype']
mi = pd.MultiIndex.from_product(levels, names=names)
columns = ['mseu'] + modmeasures
maxRMI = pd.DataFrame(index=mi, columns=columns)
for stimtype in STIMTYPES:
    fmidf = stimtype2FMI[stimtype]
    rmidf = stimtype2RMI[stimtype]
    for st8 in ALLST8S:
        for measure in modmeasures:
            mseustrs = fmidf.index.levels[0].values
            for mseustr in mseustrs:
                fmi = fmidf.loc[mseustr, st8][measure]
                if pd.isna(fmi):
                    continue
                msustr = mseustr2msustr(mseustr)
                oldfmi = maxFMI.loc[msustr, st8, stimtype][measure] # check existing val
                if pd.isna(oldfmi) or (abs(fmi) > abs(oldfmi)):
                    maxFMI.loc[msustr, st8, stimtype][measure] = fmi # overwrite
                    maxFMI.loc[msustr, st8, stimtype]['mseu'] = mseustr # overwrite
    for opto in OPTOS:
        for measure in modmeasures:
            mseustrs = rmidf.index.levels[0].values
            for mseustr in mseustrs:
                rmi = rmidf.loc[mseustr, opto][measure]
                if pd.isna(rmi):
                    continue
                msustr = mseustr2msustr(mseustr)
                oldrmi = maxRMI.loc[msustr, opto, stimtype][measure] # check existing val
                if pd.isna(oldrmi) or (abs(rmi) > abs(oldrmi)):
                    maxRMI.loc[msustr, opto, stimtype][measure] = rmi # overwrite
                    maxRMI.loc[msustr, opto, stimtype]['mseu'] = mseustr # overwrite


## calculate eye position stdev and pupil area during all 3 stimtypes,
## as a function of run state and opto conditions, store in ipos_st8 and ipos_opto DataFrames:
# Requires huxley file system to be mounted. Author: Davide Crombie
# parameters for cleaning up the pupil displacement data:
min0t = 1 # only interpolate over stretches missing data shorter than min0t (s)
min1t = 1 # only keep stretches of continuous data longer than min1t (s)
min1prop = .8 # only keep stretches of data with at least 80% non-NaN
st8_rows = [] # to be filled with dictionaries containing relevant data
opto_rows = []
print('Calculating ipos_st8 and ipos_opto DataFrames...')
for stimtype, stims in zip(['mvi', 'grt', 'spon'], [mvis, grts, spons]):
    print('...for stimtype %r' % stimtype)
    stimeye = EyeIxtract() & stims
    for key in stimeye.get_keys(): # these are mse keys
        if stimtype == 'mvi':
            moviecond = mov & key
            kind2stimis = moviecond.moviekind2stimis() # movie kind to stimi mapping
            # stim_id values considered in this analysis:
            kindstimis = tuple(kind2stimis['nat'])
            if len(kindstimis) == 0:
                continue # wrong kind of movie, e.g. PNK or SHF
        msestr = "%s_s%02d_e%02d" % (key['m'], key['s'], key['e'])
        print(msestr)
        # fetch and clean the eye position data:
        irow = EyeIxtract() & key
        ts = irow.fetch1('i_frame_times')
        eyedt = np.diff(ts).mean() # eye sampling period
        # x & y pupil positions relative to baseline (median) in deg. visual angle,
        # after subtraction of corneal reflection position:
        xpos, ypos = irow.fetch_position(cr_sub=True, deg=True) # in deg
        area = irow.fetch_area(mm=False, normalize=True) # in pix
        # position timecourses after elimintation of unclean stretches, smoothing,
        # and interpolation of remaining NaN values within clean stretches:
        min0len, min1len = min0t/eyedt, min1t/eyedt # convert from s to n timepoints
        xpos_clean = i_clean(xpos, min0len=min0len, min1len=min1len, min1prop=min1prop)
        ypos_clean = i_clean(ypos, min0len=min0len, min1len=min1len, min1prop=min1prop)
        area_clean = i_clean(area, min0len=min0len, min1len=min1len, min1prop=min1prop)
        conditions = {}
        # get keys for trial table restriction based on st8:
        conditions['run'] = (st8t & key & {'st8_crit':st82crit['run']}).get_keys()
        conditions['sit'] = (st8t & key & {'st8_crit':st82crit['sit']}).get_keys()
        if len(conditions['run']) == 0: # perhaps run data wasn't acquired for this exp
            assert len(conditions['sit']) == 0 # sanity check
            del conditions['run'] # del empty list
            del conditions['sit'] # del empty list
        # get keys for trial table restriction based on opto condition:
        if stimtype == 'mvi':
            if len(kindstimis) > 1: # only include opto conditions correspond to nat movie
                conditions[False] = (evcond & key & 'ev_chan="opto1"' & 'ec_val=0' &
                                     'stim_id IN {}'.format(kindstimis)).get_keys()
                conditions[True]  = (evcond & key & 'ev_chan="opto1"' & 'ec_val=1' &
                                     'stim_id IN {}'.format(kindstimis)).get_keys()
        else:
            conditions[False] = (evcond & key & 'ev_chan="opto1"' & 'ec_val=0').get_keys()
            conditions[True]  = (evcond & key & 'ev_chan="opto1"' & 'ec_val=1').get_keys()
        # compute eye position variance for all conditions:
        for condition in conditions: # iterate over all existing keys in conditions
            row = {}
            row['mse'] = msestr
            row['stimtype'] = stimtype
            # start and stop times for trials of the current state only:
            ## TODO: spons don't have trials, just periods of opto and non-opto
            trialis, t0s, t1s = (trial & conditions[condition]).fetch('trial_id',
                                                                      'trial_on_time',
                                                                      'trial_off_time')
            if len(t0s) == 0 or len(t1s) == 0:
                continue
            # sort pupil displacement data into trial matrices:
            xpos_trialmat, _ = get_trialmat(xpos_clean, ts, t0s, t1s)
            ypos_trialmat, _ = get_trialmat(ypos_clean, ts, t0s, t1s)
            area_trialmat, area_trialts = get_trialmat(area_clean, ts, t0s, t1s)
            area_trialmean = np.nanmean(area_trialmat, axis=1) # mean pupil area of each trial
            # save trial IDs:
            row['trialis'] = trialis
            # save mean pupil area for each trial:
            row['area_trialmean'] = area_trialmean
            # save pupil area signal for each trial:
            row['area_trialmat'] = area_trialmat
            row['area_trialts'] = area_trialts
            # save mean pupil area across trials:
            row['mean_area'] = np.nanmean(area_trialmean)
            # save eye position variability across trials:
            row['std_xpos_cross'] = np.nanstd(xpos_trialmat, axis=0).mean()
            row['std_ypos_cross'] = np.nanstd(ypos_trialmat, axis=0).mean()
            # save eye position variability within trials:
            row['std_xpos_within'] = np.nanmean(np.nanstd(xpos_trialmat, axis=1))
            row['std_ypos_within'] = np.nanmean(np.nanstd(ypos_trialmat, axis=1))
            # add dictionary to appropriate list:
            if condition in ['run', 'sit']:
                row['st8'] = condition
                st8_rows.append(row)
            elif condition in [False, True]:
                row['opto'] = condition
                opto_rows.append(row)
# convert list of dicts to DataFrame:
ipos_st8 = pd.DataFrame(st8_rows)
if len(st8_rows) > 0:
    ipos_st8.set_index(['mse', 'stimtype', 'st8'], inplace=True) # convert to MultiIndex
ipos_opto = pd.DataFrame(opto_rows)
if len(opto_rows) > 0:
    ipos_opto.set_index(['mse', 'stimtype', 'opto'], inplace=True)


## collect unit spatial position, store in upos DataFrame:
rows = [ {'msu':msu2msustr(msu), 'x':x, 'y':y}
         for (msu, x, y) in zip(*mviunits.fetch(dj.key, 'u_xpos', 'u_ypos')) ]
upos = pd.DataFrame(rows)
upos = upos.set_index(['msu'])


## collect runspeed traces, store in runspeed DataFrame:
runspeedrows = [] # to be filled with dictionaries containing relevant data
for mvi in mvis.fetch(dj.key):
    msestr = "%s_s%02d_e%02d" % (mvi['m'], mvi['s'], mvi['e'])
    print(msestr)
    # fetch run data for all trials in this mse, each is an array, sometimes some trials
    # are missing from the run data:
    runtrialis, runt, runspeed = (rt & mvi).fetch('trial_id', 'run_t', 'run_speed')
    if len(runspeed) == 0:
        continue # no run data for this mse
    moviecond = mov & mvi
    kind2stimis = moviecond.moviekind2stimis() # movie kind to stimi mapping
    #trials = trial & mvi
    #alltrialis = np.sort(trials.fetch('trial_id')) # trialis corresponding to rows in tranges
    for kind in ['nat']: #MVIKINDS:
        kindstimis = kind2stimis[kind]
        if len(kindstimis) == 0:
            continue # wrong kind of movie
        # get stimis from evcond table for this mvi and kind, and restrict to opto1 chan:
        mviopto = evcond & mvi & 'stim_id IN %s' % seq2sql(kindstimis) & {'ev_chan':'opto1'}
        mvioptostimis, mvioptovals = mviopto.fetch('stim_id', 'ec_val')
        assert len(mvioptostimis) == 2
        assert len(mvioptovals) == 2
        mvioptovals = np.bool_(mvioptovals) # convert from [0, 1] to [False, True]
        opto2stimi = {False:int(mvioptostimis[mvioptovals == False]),
                      True :int(mvioptostimis[mvioptovals == True])}
        for opto in OPTOS:
            row = {}
            row['mse'] = msestr
            row['kind'] = kind
            row['opto'] = opto
            # extract run trials for the stimi corresponding to this kind and opto:
            stimi = opto2stimi[opto]
            stimtrialis = np.sort((trial & mvi & {'stim_id':stimi}).fetch('trial_id'))
            # make sure that stimtrialis is a subset of runtrialis:
            assert issubset(stimtrialis, runtrialis)
            # now it's safe to ask where stimtrialis are found in runtrialis:
            trialiis = runtrialis.searchsorted(stimtrialis)
            #print(opto, len(stimtrialis), len(runtrialis))
            row['trialis'] = runtrialis[trialiis]
            row['t'] = runt[trialiis]
            row['speed'] = runspeed[trialiis]
            runspeedrows.append(row)
runspeed = pd.DataFrame(runspeedrows)
if len(rows) > 0:
    runspeed = runspeed.set_index(['mse', 'kind', 'opto']) # convert to MultiIndex


## classify units in various ways, store classification in celltype DataFrame:
# use normal Index, not MultiIndex, since there's only a single index column, otherwise
# assigning values with `df.loc[indexval][colname] = foo` won't work
index = pd.Index(mvigrtmsustrs, name='msu')
columns = ['sbczscore', 'sbc', 'dsi', 'depth', 'normdepth', 'onoff', 'trans',
           'mvionsetpsth', 't']
celltype = pd.DataFrame(index=index, columns=columns)

print('Classifying SbC')
# NOTE: seems that the zscores stored in the Condition.ZScores table are only
# for control conditions, i.e. opto == False, which is good
SBCZSCORETHRESH = -3 # 0.0, -1.96, -2.58, -3
# classify by SbC, keep the most negative sbczscore of each msu across grating experiments:
msustr2maxzsmseu = {} # map msustr to most negative sbczscore mseu key
for grt in grts:
    zscores = zs & grt
    uids = (up & grt).fetch('u')
    for uid in uids:
        msu = {'m':grt['m'], 's':grt['s'], 'u':uid}
        mseu = {'m':grt['m'], 's':grt['s'], 'e':grt['e'], 'u':uid}
        msustr = msufmt.format(**msu)
        mseustr = mseufmt.format(**mseu)
        unitzscore = zscores & {'u':uid}
        if len(unitzscore) == 0:
            print('No Condition.Zscores entry for', mseustr)
            continue
        sbckeys = unitzscore.SbC()
        assert len(sbckeys) == 1
        sbczscore = sbckeys[0]['sbc_zscore'] # median, across ori conditions
        if np.isnan(sbczscore):
            print('WARNING: SbC() returned NaN for', mseustr)
        meanrate = grtresp.loc[mseustr, 'none', False]['meanrate']
        if np.isnan(meanrate):
            continue # skip this non visually responsive mseu, based on z_crit and n_crit
        oldsbczscore = celltype.loc[msustr]['sbczscore']
        if np.isnan(oldsbczscore) or sbczscore < oldsbczscore: # keep most -ve sbczscore
            celltype.loc[msustr]['sbczscore'] = sbczscore # update
            msustr2maxzsmseu[msustr] = mseu
# classify units as SbC == [False, True] according to SBCZSCORETHRESH:
for msustr in mvigrtmsustrs:
    sbczscore = celltype.loc[msustr]['sbczscore']
    if np.isnan(sbczscore):
        sbc = np.nan # this is the default anyway
    elif sbczscore <= SBCZSCORETHRESH:
        sbc = True
    else:
        sbc = False
    celltype.loc[msustr]['sbc'] = sbc
    # plot rasters, for testing SBCZSCORETHRESH, and zscores in general:
    try:
        mseu = msustr2maxzsmseu[msustr]
    except KeyError: # not a single exp with an sbczscore found for this msu
        print('Did not collect an sbczscore for', msustr)
        continue
    '''
    # plot the non-blank, opto == False trials of this msu:
    offsets = [-1, 1]
    rasters, tranges, allstimis, opto, ukeys = (spk & mseu).get_rasters(offsets=offsets)
    assert len(rasters) == 1
    uid, raster = rasters.popitem()
    stimis = allstimis[opto == False]
    raster = raster[opto == False]
    blankstimis = (grat & mseu & 'stim_id IN %s' % seq2sql(stimis)
                   & 'grat_contrast = 0.0').fetch('stim_id')
    assert len(blankstimis) == 1
    blankstimi = blankstimis[0] # should be 24
    raster = raster[stimis != blankstimi] # exclude blank trials
    title = "%s, zscore=%.3f, SbC=%s" % (msustr, sbczscore, sbc)
    ax = simpletraster(raster, dt=5, offsets=offsets, scatter=True, title=title)
    ax.set_title(title)
    '''

# classify by direction selectivity, keep biggest DSI of each msu across grating experiments:
print('Classifying DSI')
resp = grtresp.xs('none', level='st8') # keep only none run state
resp = resp.xs(False, level='opto') # keep only control opto state
for mseustr, row in resp.iterrows():
    print(mseustr)
    tunparams = row['tun']
    if np.isnan(tunparams).all():
        continue # skip this mseustr
    dp, rp, rn, r0, sigma, spon = tunparams
    dsi = niell_DSI(rp, rn, r0)
    msustr = mseustr2msustr(mseustr)
    celltyperow = celltype.loc[msustr]
    olddsi = celltyperow['dsi']
    if np.isnan(olddsi) or dsi > olddsi:
        celltyperow['dsi'] = dsi # update


# estimate dLGN depth of each unit using sparse noise MUA RFs to help delineate
# at what channel dLGN starts and ends:
print('Estimating dLGN depth')
# first plot MUA RFs for each sparse noise experiment in each series, visually inspect them,
# and manually enter estimated start and end channel of dLGN for each series into
# msstr2dLGNchanrange:
for series in mviseries:
    snkeys = (Series.Experiment() & series & 'e_name LIKE "%%Noise%%"').fetch(dj.key)
    for snkey in snkeys:
        print('Sparse noise exp:', snkey)
        rfs, axes, chans = rfs_for_cdata(snkey)
        # plot MUA RFs for inspection
        simple_plot_rfs(snkey, rfs, axes, chans, interpolation=None, nrows=4)
# Now save all plots to disk in MUA_RFs folder:
saveall(format='pdf')
saveall(format='png')
# Next, manually map MOUSE_SERIES string to tuple of uppermost and lowermost clear dLGN
# channel (end inclusive) based on qualitative estimation of start and end of MUA RF
# progression of each series, potentially considering multiple sparse noise experiments
# per series:
## TODO: switch to using LGNChans table and its ms2maxchanrange() method instead of
## duplicating it here:
msstr2dLGNchanrange = {'PVCre_2017_0006_s03': (29, 8),
                       'PVCre_2017_0008_s09': (25, 7),
                       'PVCre_2017_0008_s12': (24, 2),
                       'PVCre_2017_0015_s03': (28, 5),
                       'PVCre_2017_0015_s07': (19, 1),
                       'PVCre_2018_0001_s02': (11, 1),
                       'PVCre_2018_0001_s05': (25, 6),
                       'PVCre_2018_0003_s02': (20, 3),
                       'PVCre_2018_0003_s03': (14, 2),
                       'PVCre_2018_0003_s05': (20, 9),
                       'PVCre_2019_0002_s08': (21, 1),
                       'Ntsr1Cre_2020_0001_s04': (31, 5),
                      }
# get dLGN depth of each unit in each series, add to celltype df:
for series in mviseries:
    msstr = ms2msstr(series)
    hichan, lochan = msstr2dLGNchanrange[msstr] # hi and lo chans (in terms of depth)
    # could use coords entered into ProbeModel table, but easier to use SiteLoc dict
    # in probes.expio:
    #probemodel, chans, coords = (pm & (pr & series)).fetch1('pm', 'pm_chans', 'pm_coords')
    probemodel = (pr & series).fetch1('pm') # string
    probe = expio.probes.getprobe(probemodel)
    # y coord of upper and lowermost dLGN chan, use upper as reference and upper-lower as depth
    # normalization for all units in this series:
    y0 = probe.SiteLoc[hichan][1]
    y1 = probe.SiteLoc[lochan][1]
    uids, ys = (up & series).fetch('u', 'u_ypos')
    depths = ys - y0
    dLGNspan = y1 - y0
    normdepths = depths / dLGNspan
    for uid, depth, normdepth in zip(uids, depths, normdepths):
        msustr = msstr + '_u%02d' % uid
        if depth < 0:
            print('WARNING: %s has depth %.1f < 0, skipping entry in celltype df'
                  % (msustr, depth))
            continue
        if normdepth > 1.5:
            print('WARNING: %s has normdepth %.1f > 1.5, skipping entry in celltype df'
                  % (msustr, normdepth))
            continue
        celltype.loc[msustr, 'depth'] = depth
        celltype.loc[msustr, 'normdepth'] = normdepth

'''
# classify by layer (shell/core), pretty much deprecated, don't trust the code in rf.py
# that does all of this...
# NOTE: several (13/80 as of 2020-02-11) msu end up with no layer classification because
# they fall outside of the detected range of chans with MUA envelope RFs of sufficient score
print('Classifying shell/core')
print('NOTE: something is wrong with this code, hangs on first series at full CPU')
for series in mviseries:
    try:
        unitlayers = rf.get_layer(series, snexpid=-1) # list of dicts
    except RuntimeError as err:
        print(err)
        continue # no .envl.dat file?
    for k in unitlayers:
        msu = {'m':k['m'], 's':k['s'], 'u':k['u']}
        msustr = msufmt.format(**msu)
        celltype.loc[msustr]['layer'] = k['layer']
'''

# classify as ON/OFF/transient according to movie onset response. Movies are generally
# darker than mid grey, so a decrease in PSTH after onset is an ON response.
# After examining lots of raster plots, simplest and most informative may be
# +/- 0.2 sec on either side of movie onset, after taking into account 0.05 s propagation
# delay, i.e. from -0.15 to 0 as the spont (pre) rate, and from 0.05 to 0.25 as the evoked
# (post) rate. Taking just a short period of time around the movie onset helps reduce the
# impact of movie motion on the result
print('Classifying ON/OFF/transient')
resp = mviresp.xs('nat', level='kind') # keep only nat movie kind
resp = resp.xs('none', level='st8') # keep only none run state
resp = resp.xs(False, level='opto') # keep only control opto state
for mseustr, row in resp.iterrows():
    print(mseustr)
    wpsth = row['wpsth']
    wt = row['wt']
    if np.isnan(wt).any():
        continue # skip this mseustr
    prei0, prei1 = wt.searchsorted([PDT-WINDT, EXCLTR[0]])
    posti0, posti1 = wt.searchsorted([EXCLTR[1], PDT+WINDT])
    mvionsetpsth = wpsth[prei0:posti1]
    t = wt[prei0:posti1]
    prepsth = wpsth[prei0:prei1]
    postpsth = wpsth[posti0:posti1]
    insidepsth = wpsth[prei1:posti0]
    outsidepsth = np.concatenate([prepsth, postpsth])
    pre = prepsth.mean()
    post = postpsth.mean()
    ins = insidepsth.mean()
    out = outsidepsth.mean()
    if pre == 0.0 or post == 0.0 or ins == 0.0 or out == 0.0:
        print('No pre/post/inside/outside firing rate, skipping')
        continue
    onoff = (pre - post)/(pre + post)
    trans = (ins - out)/(ins + out)
    Z = max(prepsth.max(), postpsth.max()) # normalization factor, ignore transient period
    msustr = mseustr2msustr(mseustr)
    celltyperow = celltype.loc[msustr]
    oldonoff = celltyperow['onoff']
    oldtrans = celltyperow['trans']
    if np.isnan(oldonoff) or (abs(onoff) > abs(oldonoff)):
        celltyperow['onoff'] = onoff # update
        celltyperow['mvionsetpsth'] = mvionsetpsth / Z # normalize for plotting, update
        celltyperow['t'] = t # update
    if np.isnan(oldtrans) or (abs(trans) > abs(oldtrans)):
        celltyperow['trans'] = trans # update

## code for avoiding annoying dataframe pointer vs copy issues:
'''
# overwriting/resetting values of an existing column:
celltype['sbc'][:] = np.nan # I think the explicit slice makes the difference

# code for adding a new column to an existing dataframe:
celltype = celltype.copy()
celltype['dsi'] = np.nan
celltype['dsi'] = celltype['dsi'].astype(object)

celltype = celltype.copy()
celltype['onoff'] = np.nan
celltype['trans'] = np.nan
celltype['mvionsetpsth'] = np.nan
celltype['t'] = np.nan
celltype['onoff'] = celltype['onoff'].astype(object)
celltype['trans'] = celltype['trans'].astype(object)
celltype['mvionsetpsth'] = celltype['mvionsetpsth'].astype(object)
celltype['t'] = celltype['t'].astype(object)
'''


## collect MUA envelope RF positions for each msu, to use in calculating how well centered
## the screen was on the RF of each unit:
index = pd.Index(mvigrtmsustrs, name='msu')
columns = ['x0', 'y0']
cellscreenpos = pd.DataFrame(index=index, columns=columns)
print('Getting RF params from MUA envelope')
snexpid = -1 # use last one in each series
MUAENVLRSQTHRESH = 0.2 # consider only decent MUA envelope RF fits
series2rffitdf = {} # collect all RF fit params for all series
for series in mviseries:
    erows = Series.Experiment() & series & 'e_name LIKE "%%Noise%%"'
    expvals = erows.fetch(dj.key, 'e_name', 'e_displayazim', 'e_displayelev')
    snkeys, enames, azims, elevs = expvals
    snkey, ename, azim, elev = (snkeys[snexpid], enames[snexpid],
                                azims[snexpid], elevs[snexpid])
    print('Sparse noise exp:', snkey, ename)
    rfs, axes, chans = rfs_for_cdata(snkey)
    #simple_plot_rfs(snkey, rfs, axes, chans, interpolation=None, nrows=4)
    fitdf = fit_MUA_RFs(snkey, rfs, chans)
    msestr = key2msestr(snkey)
    msstr = mseustr2mstr(msestr)
    series2rffitdf[msstr] = fitdf # save
    chans = fitdf.index.levels[0].to_numpy() # all the chans in the df
    chan2screenpos = {}
    for chan in chans:
        chanrow = fitdf.loc[chan, 'mix']
        if chanrow['rsq'] < MUAENVLRSQTHRESH:
            continue
        # remove azim and elev correction to get RF position wrt screen center:
        chan2screenpos[chan] = chanrow['x0']-azim, chanrow['y0']-elev
    urows = up & series
    uids, maxchans = urows.fetch('u', 'u_maxchan')
    for uid, maxchan in zip(uids, maxchans):
        if maxchan in chan2screenpos:
            screenpos = chan2screenpos[maxchan]
            msu = {'m':series['m'], 's':series['s'], 'u':uid}
            msustr = msufmt.format(**msu)
            cellscreenpos.loc[msustr] = screenpos


## TODO: somehow convert series2rffitdf dict to DataFrame multiindexed by msstr, chan, rftype:
#muarfs = pd.DataFrame()

'''
# test reliability measure using sparse trials with randomly placed spikes:
ntrials, nt, nspikes = 200, 1000, 50
trials = np.zeros((ntrials, nt))
for trial in trials:
    for spikei in range(nspikes):
        i = np.random.randint(0, nt)
        trial[i] = 1
print('reliability:', reliability(trials)[0])
'''

'''
# how to get pairs of ON and OFF values from mviresp instead of resp:
# multiindex row slicing is confusing, need the axis=0 for some reason,
# see https://pandas.pydata.org/pandas-docs/stable/advanced.html#using-slicers
li = mviresp.loc(axis=0)
sparsons  = np.float64(li[:, st8, True]['spars'].values)
sparsoffs = np.float64(li[:, st8, False]['spars'].values)
nanis = np.isnan(sparsons)
if not (np.isnan(sparsoffs) == nanis).all():
    raise ValueError("ON and OFF conditions have different nan indices")
sparsons = sparsons[~nanis]
sparsoffs = sparsoffs[~nanis]
'''

## TODO: now that mseu are the row indicies instead of msu, they're all unique, and
## the group kwarg isn't doing any grouping, I think


## TODO: I think the MLM stats don't work because currently the various different MVIKINDS
## and ST8S are disabled, only nat and none are being used. So comment this out for now:
'''
print('ALL MVI MixedLM')
mvirespr = mviresp.reset_index() # make a copy, convert mi to columns
mvirespr = mvirespr.dropna() # drop rows with nans, but not really necessary
mvirespr = mvirespr[mvirespr['st8'] != 'none'] # exclude 'none' state
for col in ['meanrate', 'spars', 'rel', 'meanburstratio']:
    # each of these columns are object arrs for some reason, instead of float, which
    # confuses statsmodels and prevents printout of summary. See:
    # https://stackoverflow.com/questions/29799161/summary-not-working-for-ols-estimation
    mvirespr[col] = mvirespr[col].astype(np.float64) # convert each column to float
formulas = ('meanrate ~ kind * opto * st8',
            'spars ~ kind * opto * st8',
            'rel ~ kind * opto * st8',
            'meanburstratio ~ kind * opto * st8')
for formula in formulas:
    # group kwarg specifies how data should be treated, prevents measures from different units
    # from being lumped together:
    mlm = smf.mixedlm(formula=formula, data=mvirespr, groups=mvirespr['mseu'])
    results = mlm.fit()
    print(formula)
    print('ngroups:', results.mlm.n_groups)
    paramp = pd.concat([results.params, results.pvalues], axis=1,
                        keys=['param', 'p']) # Dataframe from 2 Series
    print(paramp)
    #print(results.summary())
    print()
print('----------------------------------------')
print()


print('NAT only MixedLM')
mvirespr = mviresp.reset_index() # make a copy, convert mi to columns
natrespr = mvirespr[mvirespr['kind'] == 'nat']
natrespr = natrespr.dropna() # drop rows with nans, but not really necessary
natrespr = natrespr[natrespr['st8'] != 'none'] # exclude 'none' state
for col in ['meanrate', 'spars', 'rel', 'meanburstratio']:
    # each of these columns are object arrs for some reason, instead of float, which
    # confuses statsmodels and prevents printout of summary. See:
    # https://stackoverflow.com/questions/29799161/summary-not-working-for-ols-estimation
    natrespr[col] = natrespr[col].astype(np.float64) # convert each column to float
formulas = ('meanrate ~ opto * st8',
            'spars ~ opto * st8',
            'rel ~ opto * st8',
            'meanburstratio ~ opto * st8')
for formula in formulas:
    # group kwarg specifies how data should be treated, prevents measures from different units
    # from being lumped together:
    mlm = smf.mixedlm(formula=formula, data=natrespr, groups=natrespr['mseu'])
    results = mlm.fit()
    print(formula)
    print('ngroups:', results.mlm.n_groups)
    paramp = pd.concat([results.params, results.pvalues], axis=1,
                        keys=['param', 'p']) # Dataframe from 2 Series
    print(paramp)
    #print(results.summary())
    print()
print('----------------------------------------')
print()


print('PNK only MixedLM')
mvirespr = mviresp.reset_index() # make a copy, convert mi to columns
pnkrespr = mvirespr[mvirespr['kind'] == 'pnk']
pnkrespr = pnkrespr.dropna() # drop rows with nans, but not really necessary
pnkrespr = pnkrespr[pnkrespr['st8'] != 'none'] # exclude 'none' state
for col in ['meanrate', 'spars', 'rel', 'meanburstratio']:
    # each of these columns are object arrs for some reason, instead of float, which
    # confuses statsmodels and prevents printout of summary. See:
    # https://stackoverflow.com/questions/29799161/summary-not-working-for-ols-estimation
    pnkrespr[col] = pnkrespr[col].astype(np.float64) # convert each column to float
formulas = ('meanrate ~ opto * st8',
            'spars ~ opto * st8',
            'rel ~ opto * st8',
            'meanburstratio ~ opto * st8')
for formula in formulas:
    # group kwarg specifies how data should be treated, prevents measures from different units
    # from being lumped together:
    mlm = smf.mixedlm(formula=formula, data=pnkrespr, groups=pnkrespr['mseu'])
    results = mlm.fit()
    print(formula)
    print('ngroups:', results.mlm.n_groups)
    paramp = pd.concat([results.params, results.pvalues], axis=1,
                        keys=['param', 'p']) # Dataframe from 2 Series
    print(paramp)
    #print(results.summary())
    print()
print('----------------------------------------')
print()


print('SHF only MixedLM')
mvirespr = mviresp.reset_index() # make a copy, convert mi to columns
shfrespr = mvirespr[mvirespr['kind'] == 'shf']
shfrespr = shfrespr.dropna() # drop rows with nans, but not really necessary
shfrespr = shfrespr[shfrespr['st8'] != 'none'] # exclude 'none' state
for col in ['meanrate', 'spars', 'rel', 'meanburstratio']:
    # each of these columns are object arrs for some reason, instead of float, which
    # confuses statsmodels and prevents printout of summary. See:
    # https://stackoverflow.com/questions/29799161/summary-not-working-for-ols-estimation
    shfrespr[col] = shfrespr[col].astype(np.float64) # convert each column to float
formulas = ('meanrate ~ opto * st8',
            'spars ~ opto * st8',
            'rel ~ opto * st8',
            'meanburstratio ~ opto * st8')
for formula in formulas:
    # group kwarg specifies how data should be treated, prevents measures from different units
    # from being lumped together:
    mlm = smf.mixedlm(formula=formula, data=shfrespr, groups=shfrespr['mseu'])
    results = mlm.fit()
    print(formula)
    print('ngroups:', results.mlm.n_groups)
    paramp = pd.concat([results.params, results.pvalues], axis=1,
                        keys=['param', 'p']) # Dataframe from 2 Series
    print(paramp)
    #print(results.summary())
    print()
print('----------------------------------------')
print()


print('Grating MixedLM')
grtrespr = grtresp.reset_index() # make a copy, convert mi to columns
grtrespr = grtrespr.dropna() # drop rows with nans, but not really necessary
grtrespr = grtrespr[grtrespr['st8'] != 'none'] # exclude 'none' state
for col in ['meanrate', 'meanburstratio']:
    # each of these columns are object arrs for some reason, instead of float, which
    # confuses statsmodels and prevents printout of summary. See:
    # https://stackoverflow.com/questions/29799161/summary-not-working-for-ols-estimation
    grtrespr[col] = grtrespr[col].astype(np.float64) # convert each column to float
formulas = ('meanrate ~ opto * st8',
            'meanburstratio ~ opto * st8')
for formula in formulas:
    # group kwarg specifies how data should be treated, prevents measures from different units
    # from being lumped together:
    mlm = smf.mixedlm(formula=formula, data=grtrespr, groups=grtrespr['mseu'])
    results = mlm.fit()
    print(formula)
    print('ngroups:', results.mlm.n_groups)
    paramp = pd.concat([results.params, results.pvalues], axis=1,
                        keys=['param', 'p']) # Dataframe from 2 Series
    print(paramp)
    #print(results.summary())
    print()
print('----------------------------------------')
print()
'''