123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566 |
- # -*- coding: utf-8 -*-
- """
- GPFA core functionality.
- :copyright: Copyright 2015-2019 by the Elephant team, see AUTHORS.txt.
- :license: Modified BSD, see LICENSE.txt for details.
- """
- from __future__ import division, print_function, unicode_literals
- import time
- import warnings
- import numpy as np
- import scipy.linalg as linalg
- import scipy.optimize as optimize
- import scipy.sparse as sparse
- from sklearn.decomposition import FactorAnalysis
- from tqdm import trange
- from . import gpfa_util
- def fit(seqs_train, x_dim=3, bin_width=20.0, min_var_frac=0.01, em_tol=1.0E-8,
- em_max_iters=500, tau_init=100.0, eps_init=1.0E-3, freq_ll=5,
- verbose=False):
- """
- Fit the GPFA model with the given training data.
- Parameters
- ----------
- seqs_train : np.recarray
- training data structure, whose n-th element (corresponding to
- the n-th experimental trial) has fields
- T : int
- number of bins
- y : (#units, T) np.ndarray
- neural data
- x_dim : int, optional
- state dimensionality
- Default: 3
- bin_width : float, optional
- spike bin width in msec
- Default: 20.0
- min_var_frac : float, optional
- fraction of overall data variance for each observed dimension to set as
- the private variance floor. This is used to combat Heywood cases,
- where ML parameter learning returns one or more zero private variances.
- Default: 0.01
- (See Martin & McDonald, Psychometrika, Dec 1975.)
- em_tol : float, optional
- stopping criterion for EM
- Default: 1e-8
- em_max_iters : int, optional
- number of EM iterations to run
- Default: 500
- tau_init : float, optional
- GP timescale initialization in msec
- Default: 100
- eps_init : float, optional
- GP noise variance initialization
- Default: 1e-3
- freq_ll : int, optional
- data likelihood is computed at every freq_ll EM iterations. freq_ll = 1
- means that data likelihood is computed at every iteration.
- Default: 5
- verbose : bool, optional
- specifies whether to display status messages
- Default: False
- Returns
- -------
- parameter_estimates : dict
- Estimated model parameters.
- When the GPFA method is used, following parameters are contained
- covType: {'rbf', 'tri', 'logexp'}
- type of GP covariance
- gamma: np.ndarray of shape (1, #latent_vars)
- related to GP timescales by 'bin_width / sqrt(gamma)'
- eps: np.ndarray of shape (1, #latent_vars)
- GP noise variances
- d: np.ndarray of shape (#units, 1)
- observation mean
- C: np.ndarray of shape (#units, #latent_vars)
- mapping between the neuronal data space and the latent variable
- space
- R: np.ndarray of shape (#units, #latent_vars)
- observation noise covariance
- fit_info : dict
- Information of the fitting process and the parameters used there
- iteration_time : list
- containing the runtime for each iteration step in the EM algorithm.
- """
- # For compute efficiency, train on equal-length segments of trials
- seqs_train_cut = gpfa_util.cut_trials(seqs_train)
- if len(seqs_train_cut) == 0:
- warnings.warn('No segments extracted for training. Defaulting to '
- 'segLength=Inf.')
- seqs_train_cut = gpfa_util.cut_trials(seqs_train, seg_length=np.inf)
- # ==================================
- # Initialize state model parameters
- # ==================================
- params_init = dict()
- params_init['covType'] = 'rbf'
- # GP timescale
- # Assume binWidth is the time step size.
- params_init['gamma'] = (bin_width / tau_init) ** 2 * np.ones(x_dim)
- # GP noise variance
- params_init['eps'] = eps_init * np.ones(x_dim)
- # ========================================
- # Initialize observation model parameters
- # ========================================
- print('Initializing parameters using factor analysis...')
- y_all = np.hstack(seqs_train_cut['y'])
- fa = FactorAnalysis(n_components=x_dim, copy=True,
- noise_variance_init=np.diag(np.cov(y_all, bias=True)))
- fa.fit(y_all.T)
- params_init['d'] = y_all.mean(axis=1)
- params_init['C'] = fa.components_.T
- params_init['R'] = np.diag(fa.noise_variance_)
- # Define parameter constraints
- params_init['notes'] = {
- 'learnKernelParams': True,
- 'learnGPNoise': False,
- 'RforceDiagonal': True,
- }
- # =====================
- # Fit model parameters
- # =====================
- print('\nFitting GPFA model...')
- params_est, seqs_train_cut, ll_cut, iter_time = em(
- params_init, seqs_train_cut, min_var_frac=min_var_frac,
- max_iters=em_max_iters, tol=em_tol, freq_ll=freq_ll, verbose=verbose)
- fit_info = {'iteration_time': iter_time, 'log_likelihoods': ll_cut}
- return params_est, fit_info
- def em(params_init, seqs_train, max_iters=500, tol=1.0E-8, min_var_frac=0.01,
- freq_ll=5, verbose=False):
- """
- Fits GPFA model parameters using expectation-maximization (EM) algorithm.
- Parameters
- ----------
- params_init : dict
- GPFA model parameters at which EM algorithm is initialized
- covType : {'rbf', 'tri', 'logexp'}
- type of GP covariance
- gamma : np.ndarray of shape (1, #latent_vars)
- related to GP timescales by
- 'bin_width / sqrt(gamma)'
- eps : np.ndarray of shape (1, #latent_vars)
- GP noise variances
- d : np.ndarray of shape (#units, 1)
- observation mean
- C : np.ndarray of shape (#units, #latent_vars)
- mapping between the neuronal data space and the
- latent variable space
- R : np.ndarray of shape (#units, #latent_vars)
- observation noise covariance
- seqs_train : np.recarray
- training data structure, whose n-th entry (corresponding to the n-th
- experimental trial) has fields
- T : int
- number of bins
- y : np.ndarray (yDim x T)
- neural data
- max_iters : int, optional
- number of EM iterations to run
- Default: 500
- tol : float, optional
- stopping criterion for EM
- Default: 1e-8
- min_var_frac : float, optional
- fraction of overall data variance for each observed dimension to set as
- the private variance floor. This is used to combat Heywood cases,
- where ML parameter learning returns one or more zero private variances.
- Default: 0.01
- (See Martin & McDonald, Psychometrika, Dec 1975.)
- freq_ll : int, optional
- data likelihood is computed at every freq_ll EM iterations.
- freq_ll = 1 means that data likelihood is computed at every
- iteration.
- Default: 5
- verbose : bool, optional
- specifies whether to display status messages
- Default: False
- Returns
- -------
- params_est : dict
- GPFA model parameter estimates, returned by EM algorithm (same
- format as params_init)
- seqs_latent : np.recarray
- a copy of the training data structure, augmented with the new
- fields:
- latent_variable : np.ndarray of shape (#latent_vars x #bins)
- posterior mean of latent variables at each time bin
- Vsm : np.ndarray of shape (#latent_vars, #latent_vars, #bins)
- posterior covariance between latent variables at each
- timepoint
- VsmGP : np.ndarray of shape (#bins, #bins, #latent_vars)
- posterior covariance over time for each latent
- variable
- ll : list
- list of log likelihoods after each EM iteration
- iter_time : list
- lisf of computation times (in seconds) for each EM iteration
- """
- params = params_init
- t = seqs_train['T']
- y_dim, x_dim = params['C'].shape
- lls = []
- ll_old = ll_base = ll = 0.0
- iter_time = []
- var_floor = min_var_frac * np.diag(np.cov(np.hstack(seqs_train['y'])))
- seqs_latent = None
- # Loop once for each iteration of EM algorithm
- for iter_id in trange(1, max_iters + 1, desc='EM iteration',
- disable=not verbose):
- if verbose:
- print()
- tic = time.time()
- get_ll = (np.fmod(iter_id, freq_ll) == 0) or (iter_id <= 2)
- # ==== E STEP =====
- if not np.isnan(ll):
- ll_old = ll
- seqs_latent, ll = exact_inference_with_ll(seqs_train, params,
- get_ll=get_ll)
- lls.append(ll)
- # ==== M STEP ====
- sum_p_auto = np.zeros((x_dim, x_dim))
- for seq_latent in seqs_latent:
- sum_p_auto += seq_latent['Vsm'].sum(axis=2) \
- + seq_latent['latent_variable'].dot(
- seq_latent['latent_variable'].T)
- y = np.hstack(seqs_train['y'])
- latent_variable = np.hstack(seqs_latent['latent_variable'])
- sum_yxtrans = y.dot(latent_variable.T)
- sum_xall = latent_variable.sum(axis=1)[:, np.newaxis]
- sum_yall = y.sum(axis=1)[:, np.newaxis]
- # term is (xDim+1) x (xDim+1)
- term = np.vstack([np.hstack([sum_p_auto, sum_xall]),
- np.hstack([sum_xall.T, t.sum().reshape((1, 1))])])
- # yDim x (xDim+1)
- cd = gpfa_util.rdiv(np.hstack([sum_yxtrans, sum_yall]), term)
- params['C'] = cd[:, :x_dim]
- params['d'] = cd[:, -1]
- # yCent must be based on the new d
- # yCent = bsxfun(@minus, [seq.y], currentParams.d);
- # R = (yCent * yCent' - (yCent * [seq.latent_variable]') * \
- # currentParams.C') / sum(T);
- c = params['C']
- d = params['d'][:, np.newaxis]
- if params['notes']['RforceDiagonal']:
- sum_yytrans = (y * y).sum(axis=1)[:, np.newaxis]
- yd = sum_yall * d
- term = ((sum_yxtrans - d.dot(sum_xall.T)) * c).sum(axis=1)
- term = term[:, np.newaxis]
- r = d ** 2 + (sum_yytrans - 2 * yd - term) / t.sum()
- # Set minimum private variance
- r = np.maximum(var_floor, r)
- params['R'] = np.diag(r[:, 0])
- else:
- sum_yytrans = y.dot(y.T)
- yd = sum_yall.dot(d.T)
- term = (sum_yxtrans - d.dot(sum_xall.T)).dot(c.T)
- r = d.dot(d.T) + (sum_yytrans - yd - yd.T - term) / t.sum()
- params['R'] = (r + r.T) / 2 # ensure symmetry
- if params['notes']['learnKernelParams']:
- res = learn_gp_params(seqs_latent, params, verbose=verbose)
- params['gamma'] = res['gamma']
- t_end = time.time() - tic
- iter_time.append(t_end)
- # Verify that likelihood is growing monotonically
- if iter_id <= 2:
- ll_base = ll
- elif verbose and ll < ll_old:
- print('\nError: Data likelihood has decreased ',
- 'from {0} to {1}'.format(ll_old, ll))
- elif (ll - ll_base) < (1 + tol) * (ll_old - ll_base):
- break
- if len(lls) < max_iters:
- print('Fitting has converged after {0} EM iterations.)'.format(
- len(lls)))
- if np.any(np.diag(params['R']) == var_floor):
- warnings.warn('Private variance floor used for one or more observed '
- 'dimensions in GPFA.')
- return params, seqs_latent, lls, iter_time
- def exact_inference_with_ll(seqs, params, get_ll=True):
- """
- Extracts latent trajectories from neural data, given GPFA model parameters.
- Parameters
- ----------
- seqs : np.recarray
- Input data structure, whose n-th element (corresponding to the n-th
- experimental trial) has fields:
- y : np.ndarray of shape (#units, #bins)
- neural data
- T : int
- number of bins
- params : dict
- GPFA model parameters whe the following fields:
- C : np.ndarray
- FA factor loadings matrix
- d : np.ndarray
- FA mean vector
- R : np.ndarray
- FA noise covariance matrix
- gamma : np.ndarray
- GP timescale
- eps : np.ndarray
- GP noise variance
- get_ll : bool, optional
- specifies whether to compute data log likelihood (default: True)
- Returns
- -------
- seqs_latent : np.recarray
- a copy of the input data structure, augmented with the new
- fields:
- latent_variable : (#latent_vars, #bins) np.ndarray
- posterior mean of latent variables at each time bin
- Vsm : (#latent_vars, #latent_vars, #bins) np.ndarray
- posterior covariance between latent variables at each
- timepoint
- VsmGP : (#bins, #bins, #latent_vars) np.ndarray
- posterior covariance over time for each latent
- variable
- ll : float
- data log likelihood, np.nan is returned when `get_ll` is set False
- """
- y_dim, x_dim = params['C'].shape
- # copy the contents of the input data structure to output structure
- dtype_out = [(x, seqs[x].dtype) for x in seqs.dtype.names]
- dtype_out.extend([('latent_variable', np.object), ('Vsm', np.object),
- ('VsmGP', np.object)])
- seqs_latent = np.empty(len(seqs), dtype=dtype_out)
- for dtype_name in seqs.dtype.names:
- seqs_latent[dtype_name] = seqs[dtype_name]
- # Precomputations
- if params['notes']['RforceDiagonal']:
- rinv = np.diag(1.0 / np.diag(params['R']))
- logdet_r = (np.log(np.diag(params['R']))).sum()
- else:
- rinv = linalg.inv(params['R'])
- rinv = (rinv + rinv.T) / 2 # ensure symmetry
- logdet_r = gpfa_util.logdet(params['R'])
- c_rinv = params['C'].T.dot(rinv)
- c_rinv_c = c_rinv.dot(params['C'])
- t_all = seqs_latent['T']
- t_uniq = np.unique(t_all)
- ll = 0.
- # Overview:
- # - Outer loop on each element of Tu.
- # - For each element of Tu, find all trials with that length.
- # - Do inference and LL computation for all those trials together.
- for t in t_uniq:
- k_big, k_big_inv, logdet_k_big = gpfa_util.make_k_big(params, t)
- k_big = sparse.csr_matrix(k_big)
- blah = [c_rinv_c for _ in range(t)]
- c_rinv_c_big = linalg.block_diag(*blah) # (xDim*T) x (xDim*T)
- minv, logdet_m = gpfa_util.inv_persymm(k_big_inv + c_rinv_c_big, x_dim)
- # Note that posterior covariance does not depend on observations,
- # so can compute once for all trials with same T.
- # xDim x xDim posterior covariance for each timepoint
- vsm = np.full((x_dim, x_dim, t), np.nan)
- idx = np.arange(0, x_dim * t + 1, x_dim)
- for i in range(t):
- vsm[:, :, i] = minv[idx[i]:idx[i + 1], idx[i]:idx[i + 1]]
- # T x T posterior covariance for each GP
- vsm_gp = np.full((t, t, x_dim), np.nan)
- for i in range(x_dim):
- vsm_gp[:, :, i] = minv[i::x_dim, i::x_dim]
- # Process all trials with length T
- n_list = np.where(t_all == t)[0]
- # dif is yDim x sum(T)
- dif = np.hstack(seqs_latent[n_list]['y']) - params['d'][:, np.newaxis]
- # term1Mat is (xDim*T) x length(nList)
- term1_mat = c_rinv.dot(dif).reshape((x_dim * t, -1), order='F')
- # Compute blkProd = CRinvC_big * invM efficiently
- # blkProd is block persymmetric, so just compute top half
- t_half = np.int(np.ceil(t / 2.0))
- blk_prod = np.zeros((x_dim * t_half, x_dim * t))
- idx = range(0, x_dim * t_half + 1, x_dim)
- for i in range(t_half):
- blk_prod[idx[i]:idx[i + 1], :] = c_rinv_c.dot(
- minv[idx[i]:idx[i + 1], :])
- blk_prod = k_big[:x_dim * t_half, :].dot(
- gpfa_util.fill_persymm(np.eye(x_dim * t_half, x_dim * t) -
- blk_prod, x_dim, t))
- # latent_variableMat is (xDim*T) x length(nList)
- latent_variable_mat = gpfa_util.fill_persymm(
- blk_prod, x_dim, t).dot(term1_mat)
- for i, n in enumerate(n_list):
- seqs_latent[n]['latent_variable'] = \
- latent_variable_mat[:, i].reshape((x_dim, t), order='F')
- seqs_latent[n]['Vsm'] = vsm
- seqs_latent[n]['VsmGP'] = vsm_gp
- if get_ll:
- # Compute data likelihood
- val = -t * logdet_r - logdet_k_big - logdet_m \
- - y_dim * t * np.log(2 * np.pi)
- ll = ll + len(n_list) * val - (rinv.dot(dif) * dif).sum() \
- + (term1_mat.T.dot(minv) * term1_mat.T).sum()
- if get_ll:
- ll /= 2
- else:
- ll = np.nan
- return seqs_latent, ll
- def learn_gp_params(seqs_latent, params, verbose=False):
- """Updates parameters of GP state model, given neural trajectories.
- Parameters
- ----------
- seqs_latent : np.recarray
- data structure containing neural trajectories;
- params : dict
- current GP state model parameters, which gives starting point
- for gradient optimization;
- verbose : bool, optional
- specifies whether to display status messages (default: False)
- Returns
- -------
- param_opt : np.ndarray
- updated GP state model parameter
- Raises
- ------
- ValueError
- If `params['covType'] != 'rbf'`.
- If `params['notes']['learnGPNoise']` set to True.
- """
- if params['covType'] != 'rbf':
- raise ValueError("Only 'rbf' GP covariance type is supported.")
- if params['notes']['learnGPNoise']:
- raise ValueError("learnGPNoise is not supported.")
- param_name = 'gamma'
- fname = 'gpfa_util.grad_betgam'
- param_init = params[param_name]
- param_opt = {param_name: np.empty_like(param_init)}
- x_dim = param_init.shape[-1]
- precomp = gpfa_util.make_precomp(seqs_latent, x_dim)
- # Loop once for each state dimension (each GP)
- for i in range(x_dim):
- const = {'eps': params['eps'][i]}
- initp = np.log(param_init[i])
- res_opt = optimize.minimize(eval(fname), initp,
- args=(precomp[i], const),
- method='L-BFGS-B', jac=True)
- param_opt['gamma'][i] = np.exp(res_opt.x)
- if verbose:
- print('\n Converged p; xDim:{}, p:{}'.format(i, res_opt.x))
- return param_opt
- def orthonormalize(params_est, seqs):
- """
- Orthonormalize the columns of the loading matrix C and apply the
- corresponding linear transform to the latent variables.
- Parameters
- ----------
- params_est : dict
- First return value of extract_trajectory() on the training data set.
- Estimated model parameters.
- When the GPFA method is used, following parameters are contained
- covType : {'rbf', 'tri', 'logexp'}
- type of GP covariance
- Currently, only 'rbf' is supported.
- gamma : np.ndarray of shape (1, #latent_vars)
- related to GP timescales by 'bin_width / sqrt(gamma)'
- eps : np.ndarray of shape (1, #latent_vars)
- GP noise variances
- d : np.ndarray of shape (#units, 1)
- observation mean
- C : np.ndarray of shape (#units, #latent_vars)
- mapping between the neuronal data space and the latent variable
- space
- R : np.ndarray of shape (#units, #latent_vars)
- observation noise covariance
- seqs : np.recarray
- Contains the embedding of the training data into the latent variable
- space.
- Data structure, whose n-th entry (corresponding to the n-th
- experimental trial) has fields
- T : int
- number of timesteps
- y : np.ndarray of shape (#units, #bins)
- neural data
- latent_variable : np.ndarray of shape (#latent_vars, #bins)
- posterior mean of latent variables at each time bin
- Vsm : np.ndarray of shape (#latent_vars, #latent_vars, #bins)
- posterior covariance between latent variables at each
- timepoint
- VsmGP : np.ndarray of shape (#bins, #bins, #latent_vars)
- posterior covariance over time for each latent variable
- Returns
- -------
- params_est : dict
- Estimated model parameters, including `Corth`, obtained by
- orthonormalizing the columns of C.
- seqs : np.recarray
- Training data structure that contains the new field
- `latent_variable_orth`, the orthonormalized neural trajectories.
- """
- C = params_est['C']
- X = np.hstack(seqs['latent_variable'])
- latent_variable_orth, Corth, _ = gpfa_util.orthonormalize(X, C)
- seqs = gpfa_util.segment_by_trial(
- seqs, latent_variable_orth, 'latent_variable_orth')
- params_est['Corth'] = Corth
- return Corth, seqs
|