analysis.stan 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. // TODO
  2. // use speech rates to set priors on truth_vocs
  3. data {
  4. int<lower=1> n_classes; // number of classes
  5. // analysis data block
  6. int<lower=1> n_recs;
  7. int<lower=1> n_children;
  8. array[n_recs] int<lower=1> children;
  9. array[n_recs] real<lower=1> age;
  10. array[n_recs, n_classes] int<lower=0> vocs;
  11. array[n_children] int<lower=1> corpus;
  12. real<lower=0> duration;
  13. // speaker confusion data block
  14. int<lower=1> n_clips; // number of clips
  15. int<lower=1> n_groups; // number of groups
  16. int<lower=1> n_corpora;
  17. array [n_clips] int group;
  18. array [n_clips] int conf_corpus;
  19. array [n_clips,n_classes,n_classes] int vtc;
  20. array [n_clips,n_classes] int truth;
  21. int<lower=1> n_validation;
  22. // actual speech rates
  23. int<lower=1> n_rates;
  24. array [n_rates,n_classes] int<lower=0> speech_rates;
  25. array [n_rates] int group_corpus;
  26. array [n_rates] real<lower=0> durations;
  27. }
  28. parameters {
  29. matrix<lower=0> [n_recs, n_classes] truth_vocs;
  30. array [n_children] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion_baseline;
  31. // confusion parameters
  32. matrix<lower=0,upper=1>[n_classes,n_classes] mus;
  33. matrix<lower=1>[n_classes,n_classes] etas;
  34. array [n_groups] matrix<lower=0,upper=1>[n_classes,n_classes] group_confusion;
  35. array [n_corpora] matrix[n_classes,n_classes] corpus_bias;
  36. matrix<lower=0>[n_classes,n_classes] corpus_sigma;
  37. // speech rates
  38. matrix<lower=1>[n_classes,n_corpora] speech_rate_alpha;
  39. matrix<lower=0>[n_classes,n_corpora] speech_rate_mu;
  40. matrix<lower=0> [n_classes,n_rates] speech_rate;
  41. }
  42. transformed parameters {
  43. array [n_children] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion;
  44. for (c in 1:n_children) {
  45. actual_confusion[c] = inv_logit(logit(actual_confusion_baseline[c])+corpus_bias[corpus[c]]);
  46. }
  47. }
  48. model {
  49. // actual model
  50. vector [4] expect;
  51. vector [4] sd;
  52. for (k in 1:n_recs) {
  53. expect = rep_vector(0, 4);
  54. sd = rep_vector(0, 4);
  55. for (i in 1:n_classes) {
  56. for (j in 1:n_classes) {
  57. expect[i] += truth_vocs[k,j]*actual_confusion[children[k],j,i];
  58. sd[i] += truth_vocs[k,j]*actual_confusion[children[k],j,i]*(1-actual_confusion[children[k],j,i]);
  59. }
  60. }
  61. vocs[k,:] ~ normal(expect, sqrt(sd));
  62. }
  63. for (c in 1:n_children) {
  64. for (i in 1:n_classes) {
  65. actual_confusion_baseline[c,i] ~ beta_proportion(mus[i,:], etas[i,:]);
  66. }
  67. }
  68. for (k in 1:n_recs) {
  69. truth_vocs[k,:] ~ gamma(
  70. speech_rate_alpha[:,corpus[children[k]]],
  71. (speech_rate_alpha[:,corpus[children[k]]]./speech_rate_mu[:,corpus[children[k]]])/1000/duration
  72. );
  73. }
  74. // speaker confusion model
  75. for (k in n_validation:n_clips) {
  76. for (i in 1:n_classes) {
  77. for (j in 1:n_classes) {
  78. vtc[k,i,j] ~ binomial(
  79. truth[k,j], inv_logit(logit(group_confusion[group[k],j,i]) + corpus_bias[conf_corpus[k],j,i])
  80. );
  81. }
  82. }
  83. }
  84. for (i in 1:n_classes) {
  85. for (j in 1:n_classes) {
  86. if (i==j) {
  87. mus[i,j] ~ beta(2,1);
  88. }
  89. else {
  90. mus[i,j] ~ beta(1,2);
  91. }
  92. etas[i,j] ~ pareto(1,1.5);
  93. }
  94. }
  95. for (c in 1:n_groups) {
  96. for (i in 1:n_classes) {
  97. for (j in 1:n_classes) {
  98. group_confusion[c,i,j] ~ beta_proportion(mus[i,j], etas[i,j]);
  99. }
  100. }
  101. }
  102. for (i in 1:n_classes) {
  103. for (j in 1:n_classes) {
  104. for (c in 1:n_corpora) {
  105. corpus_bias[c,j,i] ~ normal(0, corpus_sigma[j,i]);
  106. }
  107. corpus_sigma[j,i] ~ normal(0, 1);
  108. }
  109. }
  110. // speech rates
  111. for (i in 1:n_classes) {
  112. speech_rate_alpha[i,:] ~ normal(1, 1);
  113. speech_rate_mu[i,:] ~ exponential(2);
  114. }
  115. for (g in 1:n_rates) {
  116. for (i in 1:n_classes) {
  117. speech_rate[i,g] ~ gamma(
  118. speech_rate_alpha[i,group_corpus[g]],
  119. (speech_rate_alpha[i,group_corpus[g]]/speech_rate_mu[i,group_corpus[g]])/1000
  120. );
  121. speech_rates[g,i] ~ poisson(speech_rate[i,g]*durations[g]);
  122. }
  123. }
  124. }