ei_cov_softmax_control_nu.stan 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. functions {
  2. real partial_sum_lpdf(array[] vector X,
  3. int start, int end,
  4. int R,
  5. int C,
  6. array[,] int indices,
  7. array[,] int NR,
  8. array[,] int NC,
  9. array[] vector cov,
  10. array[] vector expertise,
  11. matrix mu, vector L_sigma,
  12. array[] vector delta,
  13. array[] vector gamma,
  14. array[] vector beta_vec
  15. ) {
  16. vector[C] theta;
  17. matrix[C,R] beta;
  18. vector [C] tmp = rep_vector(0, C);
  19. real lpmf = 0;
  20. for (i in start:end) {
  21. for(r in 1:R) {
  22. if (NR[i,r] == 0) {
  23. beta[:,r] = rep_vector(0, C);
  24. }
  25. else {
  26. lpmf += normal_lpdf(beta_vec[indices[i,r]] | 0, 1);
  27. tmp = mu[r,:]';//'
  28. tmp[1:C-1] += beta_vec[indices[i,r]] .* L_sigma;
  29. beta[:,r] = softmax(tmp + delta[r] .* cov[i] + gamma[r] .* expertise[i]);
  30. }
  31. }
  32. theta = beta*X[i-start+1,:];
  33. lpmf += multinomial_lpmf(NC[i,:] | theta);
  34. }
  35. return lpmf;
  36. }
  37. }
  38. data {
  39. int<lower=1> n_units;
  40. int<lower=2> R;
  41. int<lower=2> C;
  42. array[n_units,R] int NR;
  43. array[n_units,C] int NC;
  44. array[n_units] vector[C] cov;
  45. array[n_units] vector[C] expertise;
  46. matrix<lower=0,upper=1>[R,C] nu;
  47. int<lower=1> threads;
  48. }
  49. transformed data {
  50. array [n_units] int population;
  51. array[n_units] vector<lower=0,upper=1>[R] X;
  52. array[n_units,R] int indices;
  53. int largest_index = 1;
  54. for (i in 1:n_units) {
  55. population[i] = sum(NR[i,:]);
  56. for (r in 1:R) {
  57. X[i,r] = (NR[i,r]*1.0)/(population[i]*1.0);
  58. if (NR[i,r] > 0) {
  59. indices[i,r] = largest_index;
  60. largest_index += 1;
  61. }
  62. else {
  63. indices[i,r] = 0;
  64. }
  65. }
  66. }
  67. print("largest index:", largest_index);
  68. print("RxN: ", R*n_units);
  69. }
  70. parameters {
  71. matrix[R, C-1] mu_;
  72. array[R] vector[C] delta;
  73. array[R] vector[C] gamma;
  74. array [largest_index] vector[C-1] beta_vec;
  75. real delta_0;
  76. real delta_nu;
  77. //real gamma_nu;
  78. real mu_nu;
  79. //cholesky_factor_corr[C-1] L_Omega;
  80. vector<lower=0>[C-1] L_sigma;
  81. }
  82. model {
  83. matrix[R, C] mu;
  84. for (r in 1:R) {
  85. mu[r,:C-1] = mu_[r];
  86. mu[r,C] = 0;
  87. mu[r,:] += mu_nu*nu[r,:];
  88. }
  89. //matrix[C-1, C-1] L_Sigma = diag_pre_multiply(L_sigma, L_Omega);
  90. //L_Omega ~ lkj_corr_cholesky(10);
  91. L_sigma ~ exponential(1);
  92. for (r in 1:R) {
  93. mu_[r] ~ normal(0, 1);
  94. delta[r] ~ normal(delta_0+delta_nu*nu[r,:], 1);
  95. gamma[r] ~ normal(0, 1);
  96. }
  97. delta_0 ~ normal(0, 1);
  98. delta_nu ~ normal(0, 1);
  99. mu_nu ~ normal(0, 1);
  100. target += reduce_sum(
  101. partial_sum_lpdf, X, n_units%/%(threads*40),
  102. R, C, indices, NR, NC, cov, expertise,
  103. mu, L_sigma,
  104. delta, gamma,
  105. beta_vec
  106. );
  107. }
  108. generated quantities {
  109. array [n_units,R] vector[C] beta;
  110. array [R] vector[C] counts;
  111. for (r in 1:R) {
  112. counts[r] = rep_vector(0, C);
  113. }
  114. vector [C] tmp = rep_vector(0, C);
  115. matrix[R, C] mu;
  116. for (r in 1:R) {
  117. mu[r,:C-1] = mu_[r];
  118. mu[r,C] = 0;
  119. mu[r,:] += mu_nu*nu[r,:];
  120. }
  121. for (r in 1:R) {
  122. for (i in 1:n_units) {
  123. if (NR[i,r] == 0) {
  124. beta[i,r] = rep_vector(0, C);
  125. }
  126. else {
  127. tmp = mu[r,:]';//'
  128. tmp[1:C-1] += beta_vec[indices[i,r]] .* L_sigma;
  129. beta[i,r] = softmax(tmp + delta[r] .* cov[i] + gamma[r] .* expertise[i]);
  130. counts[r] += NR[i,r]*beta[i,r,:];
  131. }
  132. }
  133. counts[r] = counts[r]/sum(population);
  134. }
  135. }