enumeration.stan 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. functions {
  2. real confusion_model_lpmf(array[] int group,
  3. int start, int end,
  4. int n_classes,
  5. array[,] int vtc,
  6. array[,] int truth,
  7. array[] real clip_duration,
  8. array[] matrix lambda,
  9. array[] vector lambda_fp
  10. ) {
  11. real ll = 0;
  12. vector [4] bp;
  13. vector [4] log_lik;
  14. vector[16384] log_contrib_comb;
  15. int n = size(log_contrib_comb);
  16. for (k in start:end) {
  17. log_lik = rep_vector(0, 4);
  18. for (i in 1:n_classes) {
  19. log_contrib_comb[:n] = rep_vector(0, n);
  20. n = 1;
  21. for (chi in 0:truth[k,1]) {
  22. bp[1] = binomial_lpmf(chi | truth[k,1], lambda[group[k-start+1],1,i]);
  23. for (och in 0:truth[k,2]) {
  24. bp[2] = binomial_lpmf(och | truth[k,2], lambda[group[k-start+1],2,i]);
  25. for (fem in 0:truth[k,3]) {
  26. bp[3] = binomial_lpmf(fem | truth[k,3], lambda[group[k-start+1],3,i]);
  27. for (mal in 0:truth[k, 4]) {
  28. bp[4] = binomial_lpmf(mal | truth[k,4], lambda[group[k-start+1],4,i]);
  29. int delta = vtc[k,i] - (mal+fem+och+chi);
  30. if (delta >= 0) {
  31. log_contrib_comb[n] += sum(bp);
  32. log_contrib_comb[n] += poisson_lpmf(
  33. delta | lambda_fp[group[k-start+1],i]*clip_duration[k]
  34. );
  35. n = n+1;
  36. }
  37. }
  38. }
  39. }
  40. }
  41. if (n>1) {
  42. log_lik[i] = log_sum_exp(log_contrib_comb[1:n-1]);
  43. }
  44. }
  45. ll += log_sum_exp(log_lik);
  46. }
  47. return ll;
  48. }
  49. real model_lpmf(array[] int children,
  50. int start, int end,
  51. int n_recs,
  52. int n_classes,
  53. real duration,
  54. array [,] int vocs,
  55. matrix truth_vocs,
  56. array [] matrix actual_confusion,
  57. array [] vector actual_fp_rate
  58. ) {
  59. real ll = 0;
  60. vector [4] expect;
  61. //vector [4] sd;
  62. for (k in start:end) {
  63. expect = rep_vector(0, 4);
  64. //sd = rep_vector(0, 4);
  65. for (i in 1:n_classes) {
  66. expect[i] = dot_product(truth_vocs[k,:], actual_confusion[k,:,i]);
  67. expect[i] += actual_fp_rate[k,i] * duration;
  68. }
  69. ll += normal_lpdf(vocs[k,:] | expect, sqrt(expect));
  70. }
  71. return ll;
  72. }
  73. }
  74. // TODO
  75. // use speech rates to set priors on truth_vocs
  76. data {
  77. int<lower=1> n_classes; // number of classes
  78. // analysis data block
  79. int<lower=1> n_recs;
  80. int<lower=1> n_children;
  81. array[n_recs] int<lower=1> children;
  82. array[n_recs] real<lower=1> age;
  83. array[n_recs, n_classes] int<lower=0> vocs;
  84. array[n_children] int<lower=1> corpus;
  85. real<lower=0> recs_duration;
  86. // speaker confusion data block
  87. int<lower=1> n_clips; // number of clips
  88. int<lower=1> n_groups; // number of groups
  89. int<lower=1> n_corpora;
  90. array [n_clips] int group;
  91. array [n_clips] int conf_corpus;
  92. array [n_clips,n_classes] int<lower=0> vtc_total; // vtc vocs attributed to specific speakers
  93. array [n_clips,n_classes] int<lower=0> truth_total;
  94. array [n_clips] real<lower=0> clip_duration;
  95. int<lower=1> n_validation;
  96. // actual speech rates
  97. int<lower=1> n_rates;
  98. array [n_rates,n_classes] int<lower=0> speech_rates;
  99. array [n_rates] int group_corpus;
  100. array [n_rates] real<lower=0> durations;
  101. }
  102. parameters {
  103. matrix<lower=0> [n_recs, n_classes] truth_vocs;
  104. //array [n_children] matrix<lower=0>[n_classes,n_classes] actual_confusion_baseline;
  105. array [n_recs] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion_baseline;
  106. array [n_recs] vector<lower=0>[n_classes] actual_fp_rate;
  107. // confusion parameters
  108. matrix<lower=1>[n_classes,n_classes] etas;
  109. matrix<lower=0,upper=1>[n_classes,n_classes] mus;
  110. array [n_groups] matrix<lower=0,upper=1>[n_classes,n_classes] lambda;
  111. vector<lower=1>[n_classes] alphas_fp;
  112. vector<lower=0>[n_classes] mus_fp;
  113. array [n_groups] vector<lower=0>[n_classes] lambda_fp;
  114. //array [n_corpora] matrix[n_classes,n_classes] corpus_bias;
  115. //matrix<lower=0>[n_classes,n_classes] corpus_sigma;
  116. // speech rates
  117. matrix<lower=1>[n_classes,n_corpora] speech_rate_alpha;
  118. matrix<lower=0>[n_classes,n_corpora] speech_rate_mu;
  119. matrix<lower=0> [n_classes,n_rates] speech_rate;
  120. }
  121. transformed parameters {
  122. // array [n_children] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion;
  123. // for (c in 1:n_children) {
  124. // actual_confusion[c] = inv_logit(logit(actual_confusion_baseline[c])+corpus_bias[corpus[c]]);
  125. // }
  126. }
  127. model {
  128. //actual model
  129. //target += reduce_sum(
  130. // model_lpmf, children, 1,
  131. // n_recs, n_classes, recs_duration,
  132. // vocs,
  133. // truth_vocs, actual_confusion_baseline, actual_fp_rate
  134. //);
  135. for (k in 1:n_recs) {
  136. for (i in 1:n_classes) {
  137. actual_confusion_baseline[k,i] ~ beta_proportion(mus[i,:], etas[i,:]);
  138. }
  139. actual_fp_rate[k] ~ gamma(alphas_fp, alphas_fp./mus_fp);
  140. }
  141. for (k in 1:n_recs) {
  142. truth_vocs[k,:] ~ gamma(
  143. speech_rate_alpha[:,corpus[children[k]]],
  144. (speech_rate_alpha[:,corpus[children[k]]]./speech_rate_mu[:,corpus[children[k]]])/1000/recs_duration
  145. );
  146. }
  147. target += reduce_sum(
  148. confusion_model_lpmf, group, n_clips%/%640,
  149. n_classes,
  150. vtc_total, truth_total, clip_duration,
  151. lambda, lambda_fp
  152. );
  153. mus_fp ~ exponential(1);
  154. alphas_fp ~ normal(1, 1);
  155. for (i in 1:n_classes) {
  156. lambda_fp[:,i] ~ gamma(alphas_fp[i], alphas_fp[i]/mus_fp[i]);
  157. for (j in 1:n_classes) {
  158. if (i==j) {
  159. mus[i,j] ~ beta(2,1);
  160. }
  161. else {
  162. mus[i,j] ~ beta(1,2);
  163. }
  164. etas[i,j] ~ pareto(1, 1.5);
  165. for (c in 1:n_groups) {
  166. lambda[c,i,j] ~ beta_proportion(mus[i,j],etas[i,j]);
  167. }
  168. }
  169. }
  170. // speech rates
  171. for (i in 1:n_classes) {
  172. speech_rate_alpha[i,:] ~ normal(1, 1);
  173. speech_rate_mu[i,:] ~ exponential(2);
  174. }
  175. for (g in 1:n_rates) {
  176. for (i in 1:n_classes) {
  177. speech_rate[i,g] ~ gamma(
  178. speech_rate_alpha[i,group_corpus[g]],
  179. (speech_rate_alpha[i,group_corpus[g]]/speech_rate_mu[i,group_corpus[g]])/1000
  180. );
  181. speech_rates[g,i] ~ poisson(speech_rate[i,g]*durations[g]);
  182. }
  183. }
  184. }