estimate_likelihood.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. import numpy as np
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. from data_analysis.fit_bstrp import fit_bstrp
  5. import scipy.stats as st
  6. from scipy.special import i0
  7. import json
  8. import pickle
  9. from lmfit import Model
  10. def rep_end(dat):
  11. """
  12. Duplicate the first and the last stimuli for visualization.
  13. """
  14. first = dat[dat['Hue Angle'] == dat['Hue Angle'].min()]
  15. first_copy = first.copy()
  16. first_copy['Hue Angle'] = first_copy['Hue Angle'].apply(lambda x: x + 360)
  17. last = dat[dat['Hue Angle'] == dat['Hue Angle'].max()]
  18. last_copy = last.copy()
  19. last_copy['Hue Angle'] = last_copy['Hue Angle'].apply(lambda x: x - 360)
  20. rep_dat = pd.concat([dat, first_copy, last_copy],
  21. ignore_index=False).sort_values('Hue Angle')
  22. return rep_dat
  23. def rect_sin(x, a, b):
  24. """
  25. Rectified sine function.
  26. param x: angle in degree
  27. param a: amplitude
  28. param b: offset
  29. """
  30. shift = -112 # peak at 112
  31. return a * abs(np.sin((x + shift) * np.pi / 180.0)) + b
  32. def mixed_sin(x, s1, s2, a, b):
  33. """
  34. Mixed sine function
  35. param x: angle in degree
  36. param s1, s2: shifts in degree
  37. param a: amplitudes
  38. param b: offsets
  39. """
  40. return a * (np.sin((x + s1) * np.pi / 90.0) + np.sin((x + s2) * np.pi / 180.0)) + b
  41. def fit_mixed_sin(x, y):
  42. """
  43. Fit mixed sine function with lmfit, return predicted values for x grids
  44. :param x: x values
  45. :param y: y values
  46. :return:
  47. """
  48. model = Model(mixed_sin)
  49. res = model.fit(y, x=x, s1=45, s2=90, a=1, b=0)
  50. pars = np.array(list(res.params.valuesdict().values()))
  51. # stds = np.sqrt(np.diagonal(res.covar))
  52. # lower = mixed_sin(x_grid, *(pars - stds))
  53. # upper = mixed_sin(x_grid, *(pars + stds))
  54. dely = res.eval_uncertainty(x=x_grid, sigma=.68) # 68% CI
  55. pred = res.eval(x=x_grid, params=res.params)
  56. lower = pred - dely
  57. upper = pred + dely
  58. fit = {'params': pars,
  59. 'bounds': [lower, upper]}
  60. return fit
  61. def vonmises_sample(t, jnd, n=100):
  62. """
  63. Sample from a customized Von Mises distribution.
  64. P(m|t) = exp(kappa * cos(t - m)) / (2 * pi * i0(kappa))
  65. # Note that
  66. # - The unit of jnd is degree or radian: radian
  67. # - The relationship between kappa and jnd: kappa = 1/var = 1/(JND**2)
  68. #Example usage: Test on vonmises_sample function...
  69. # m = 180
  70. # jnd = 4.
  71. # S = vonmises_sample(m, jnd=jnd, n=1000)
  72. # weights = np.ones_like(S) / len(S)
  73. # plt.hist(S, bins=10, weights=weights)
  74. # plt.show()
  75. :param t hue angle, in degree, center of the distribution
  76. :param jnd kappa = 1/var = 1/(JND**2), the concentration level of the distribution
  77. :param n sample size
  78. """
  79. # Convert degree to radian
  80. t = t / 180 * np.pi
  81. jnd = jnd / 180 * np.pi
  82. class VonMisesPDF(st.rv_continuous):
  83. def _pdf(self, m, t, kappa):
  84. return np.exp(kappa * np.cos(t - m)) / (2 * np.pi * i0(kappa))
  85. # Par a: Lower bound of the support of the distribution
  86. # Par b: Upper bound of the support of the distribution
  87. von_mises = VonMisesPDF(a=0, b=2 * np.pi, name='VonMisesPDF')
  88. # Determine the kappa value to produce JND values matching the fitted j(θ).
  89. profile = {'t': t,
  90. 'kappa': 1.0 / (jnd ** 2)}
  91. if i0(profile['kappa']) == np.inf:
  92. epsilon_values = np.repeat(t, repeats=n)
  93. else:
  94. epsilon_values = von_mises.rvs(t=profile['t'], kappa=profile['kappa'], size=n)
  95. # Convert the angle from rad to deg
  96. epsilon_values = epsilon_values / np.pi * 180
  97. return epsilon_values
  98. def build_matrix(xx, fit):
  99. """
  100. Build a matrix: values in x are histogrammed along the 1st dimension (likelihood function),
  101. and values in y are histogrammed along the 2nd dimension (measurement distribution).
  102. :param xx x gird, should be consistent in the modeling parts
  103. :param fit model fitting params of JND (with low- or high-noise data)
  104. :return: matrix: 2d array, len(xx) x len(xx)
  105. edges: 2d array, [x edges, y edges]
  106. """
  107. n_ss = 100
  108. # ss = []
  109. # for xi in xx:
  110. # # ji = rect_sin(xi, *fit['params'])
  111. # ji = mixed_sin(xi, *fit['params'])
  112. # si = vonmises_sample(t=xi, jnd=ji, n=n_ss)
  113. # ss.append(si)
  114. # ss = np.array(ss)
  115. ss = np.array([vonmises_sample(t=xi, jnd=mixed_sin(xi, *fit['params']), n=n_ss) for xi in xx])
  116. ss_2d = ss.flatten()
  117. xx_2d = np.repeat(xx, n_ss)
  118. # The bi-dimensional histogram of samples x and y.
  119. # Values in x are histogrammed along the 1st dimension and values in y are histogrammed along the 2nd dimension.
  120. matrix, xedges, yedges = np.histogram2d(x=xx_2d, y=ss_2d, bins=len(xx), normed=True)
  121. matrix = matrix.T
  122. xedges = np.round(xedges, 1)
  123. yedges = np.round(yedges, 1)
  124. edges = np.array([xedges, yedges])
  125. return matrix, edges
  126. def estimate_lik(dat, estm, x_grid, n_sample, save_dir):
  127. """
  128. Compute and save all estimates before Bayesian inference, which include:
  129. - fit of JND
  130. - samples from the measurement distribution
  131. - matrix of measurement distribution - likelihood function
  132. - likelihoods
  133. Note the estimates are always paired for low and high noise conditions.
  134. :param dat: data [dataframe]
  135. :param estm: estimates [dataframe]
  136. :param x_grid: grid for generating the matrices
  137. :param n_sample: number of samples for each stimulus pair
  138. :param save_dir: data saving path (and partial name), e.g. 'data_analysis/model_estimates/s0/s0'
  139. :return: None
  140. """
  141. # 1. Load data
  142. lh_data = dat.query('condition == "LH"')
  143. unique_pairs = lh_data[['standard_stim', 'test_stim', 'actual_intensity']]. \
  144. drop_duplicates().sort_values(by=['standard_stim', 'test_stim'])
  145. stim_l = unique_pairs['standard_stim'].values.astype('float')
  146. stim_h = unique_pairs['test_stim'].values.astype('float')
  147. # 2. Fit JND
  148. d_l = rep_end(estm.query('condition == "LL"'))
  149. d_h = rep_end(estm.query('condition == "HH"'))
  150. hue_angles = d_l['Hue Angle'].values
  151. jnd_l = d_l['JND'].values
  152. jnd_h = d_h['JND'].values
  153. # # LL
  154. # fit_l = fit_bstrp(hue_angles, jnd_l, func=rect_sin, n_bs=1000, p_ci=.68) #, p0=[1., 1, -112.5])
  155. # jnd_stim_l = rect_sin(stim_l, *fit_l['params'])
  156. # # HH
  157. # fit_h = fit_bstrp(hue_angles, jnd_h, func=rect_sin, n_bs=1000, p_ci=.68) #, p0=[1., 1, -112.5])
  158. # jnd_stim_h = rect_sin(stim_h, *fit_h['params'])
  159. # LL
  160. # fit_l = fit_bstrp(hue_angles, jnd_l, func=mixed_sin, n_bs=1000, p_ci=.68, p0=[45, 90, 1, 0])
  161. fit_l = fit_mixed_sin(hue_angles, jnd_l)
  162. jnd_stim_l = mixed_sin(stim_l, *fit_l['params'])
  163. # HH
  164. # fit_h = fit_bstrp(hue_angles, jnd_h, func=mixed_sin, n_bs=1000, p_ci=.68, p0=[45, 90, 1, 0])
  165. fit_h = fit_mixed_sin(hue_angles, jnd_h)
  166. jnd_stim_h = mixed_sin(stim_h, *fit_h['params'])
  167. # same for LH... but not used for matrix
  168. d_lh = rep_end(estm.query('condition == "LH"'))
  169. hue_angles = d_lh['Hue Angle'].values
  170. jnd_lh = d_lh['JND'].values
  171. # fit_lh = fit_bstrp(hue_angles, jnd_lh, func=rect_sin, n_bs=1000, p_ci=.68) # , p0=[1., 1, -112.5])
  172. # fit_lh = fit_bstrp(hue_angles, jnd_lh, func=mixed_sin, n_bs=1000, p_ci=.68, p0=[45, 90, 1, 0])
  173. fit_lh = fit_mixed_sin(hue_angles, jnd_lh)
  174. # """
  175. # 3. Build matrix
  176. mat_l, edges_l = build_matrix(x_grid, fit_l)
  177. mat_h, edges_h = build_matrix(x_grid, fit_h)
  178. #
  179. # 4. Sample and estimate likelihoods
  180. # samples: 2d array, len(unique stimulus pairs) x n_sample
  181. # likelihoods: 3d array, len(unique stimulus pairs) x n_sample x len(x_grid)
  182. #
  183. samples_l = np.sort(np.array([vonmises_sample(t=s, jnd=j, n=n_sample)
  184. for s, j in zip(stim_l, jnd_stim_l)]), axis=1)
  185. samples_h = np.sort(np.array([vonmises_sample(t=s, jnd=j, n=n_sample)
  186. for s, j in zip(stim_h, jnd_stim_h)]), axis=1)
  187. samples_size = np.shape(samples_l)
  188. likelihoods_l = np.array([mat_l[np.abs(x_grid - sp).argmin(), :] for sp in samples_l.flatten()])
  189. likelihoods_l = np.reshape(likelihoods_l, (samples_size[0], samples_size[1], len(x_grid)))
  190. likelihoods_h = np.array([mat_h[np.abs(x_grid - sp).argmin(), :] for sp in samples_h.flatten()])
  191. likelihoods_h = np.reshape(likelihoods_h, (samples_size[0], samples_size[1], len(x_grid)))
  192. # """
  193. # 5. Save data
  194. with open(save_dir + '_jnd_fit_l.pickle', 'wb') as f_l:
  195. pickle.dump(fit_l, f_l)
  196. with open(save_dir + '_jnd_fit_h.pickle', 'wb') as f_h:
  197. pickle.dump(fit_h, f_h)
  198. with open(save_dir + '_jnd_fit_lh.pickle', 'wb') as f_lh:
  199. pickle.dump(fit_lh, f_lh)
  200. # """
  201. np.save(save_dir + '_mat_l.npy', mat_l)
  202. np.save(save_dir + '_mat_h.npy', mat_h)
  203. np.save(save_dir + '_mat_edges_l.npy', edges_l)
  204. np.save(save_dir + '_mat_edges_h.npy', edges_h)
  205. np.save(save_dir + '_samples_l.npy', samples_l)
  206. np.save(save_dir + '_samples_h.npy', samples_h)
  207. np.save(save_dir + '_likelihoods_l.npy', likelihoods_l)
  208. np.save(save_dir + '_likelihoods_h.npy', likelihoods_h)
  209. # """
  210. """
  211. ################# Running estimation ######################
  212. """
  213. # """
  214. model_path = "data_analysis/model_estimates_v2"
  215. # x_grid = np.arange(0.0001, 359.9999, 2.)
  216. x_grid = np.load(f"{model_path}/x_grid.npy")
  217. n_sample = 100
  218. # """
  219. """
  220. # For single sub
  221. all_data = pd.read_csv('data/all_sel_data.csv')
  222. all_estimates = pd.read_csv('data_analysis/pf_estimates/all_estimates.csv')
  223. # sub_list = ['s1', 's2', 's4', 's5', 's6']
  224. # for sub in sub_list:
  225. for sub in ['s3']:
  226. sub_in_data = sub[0] + '0' + sub[1] # different naming way in data
  227. sub_data = all_data.query('subject == @sub_in_data')
  228. sub_estimates = all_estimates.query('subject == @sub')
  229. sub_path = f"{model_path}/{sub}/{sub}"
  230. estimate_lik(sub_data, sub_estimates, x_grid, n_sample, sub_path)
  231. """
  232. """
  233. # For average sub
  234. all_data = pd.read_csv('data/all_sel_data.csv')
  235. avg_estimates = pd.read_csv('data_analysis/pf_estimates/avg_estimates.csv')
  236. sub_path = f"{model_path}/sAVG/sAVG"
  237. estimate_lik(all_data, avg_estimates, x_grid, n_sample, sub_path)
  238. """