dev_siblings_combined_binomial_hurdle.stan 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. functions {
  2. #include "blocks/confusion_model_binomial_hurdle_fast.stan"
  3. #include "blocks/confusion_inverse_model_binomial_hurdle.stan"
  4. #include "blocks/behavior_model_truth.stan"
  5. }
  6. // TODO
  7. // use speech rates to set priors on truth_vocs
  8. data {
  9. int<lower=1> n_classes; // number of classes
  10. // analysis data block
  11. int<lower=1> n_recs;
  12. int<lower=1> n_children;
  13. array[n_recs] int<lower=1> children;
  14. array[n_recs] real<lower=0> age;
  15. array[n_recs] int<lower=-1> siblings;
  16. array[n_recs, n_classes] int<lower=0> vocs_algo1;
  17. array[n_recs, n_classes] int<lower=0> vocs_algo2;
  18. array[n_children] int<lower=1> corpus;
  19. real<lower=0> recs_duration;
  20. // speaker confusion data block
  21. int<lower=1> n_clips; // number of clips
  22. int<lower=1> n_groups; // number of groups
  23. int<lower=1> n_corpora;
  24. array [n_clips] int group;
  25. array [n_clips] int conf_corpus;
  26. array [n_clips,n_classes] int<lower=0> algo1_total; // algo vocs attributed to specific speakers
  27. array [n_clips,n_classes] int<lower=0> algo2_total; // algo vocs attributed to specific speakers
  28. array [n_clips,n_classes] int<lower=0> truth_total;
  29. array [n_clips] real<lower=0> clip_duration;
  30. array [n_clips] real<lower=0> clip_age;
  31. int<lower=0> n_validation;
  32. // actual speech rates
  33. int<lower=1> n_rates;
  34. int<lower=1> n_speech_rate_children;
  35. array [n_rates,n_classes] int<lower=0> speech_rates;
  36. array [n_rates] int group_corpus;
  37. array [n_rates] real<lower=0> durations;
  38. array [n_rates] real<lower=0> speech_rate_age;
  39. array [n_rates] int<lower=-1> speech_rate_siblings;
  40. array [n_rates] int<lower=1,upper=n_speech_rate_children> speech_rate_child;
  41. // parallel processing
  42. int<lower=1> threads;
  43. }
  44. transformed data {
  45. vector<lower=0>[n_groups] recording_age;
  46. array[n_speech_rate_children] int<lower=1> speech_rate_child_corpus;
  47. array[n_children] int<lower=-1> child_siblings;
  48. array[n_speech_rate_children] int<lower=-1> speech_rate_child_siblings;
  49. int no_siblings = 0;
  50. int has_siblings = 0;
  51. real p_outlier = 0.025;
  52. for (c in 1:n_clips) {
  53. recording_age[group[c]] = clip_age[c];
  54. }
  55. for (k in 1:n_rates) {
  56. speech_rate_child_corpus[speech_rate_child[k]] = group_corpus[k];
  57. }
  58. for (k in 1:n_recs) {
  59. child_siblings[children[k]] = siblings[k];
  60. }
  61. for (c in 1:n_children) {
  62. if (child_siblings[c] == 0) {
  63. no_siblings += 1;
  64. }
  65. else if (child_siblings[c] > 0) {
  66. has_siblings += 1;
  67. }
  68. }
  69. for (k in 1:n_rates) {
  70. speech_rate_child_siblings[speech_rate_child[k]] = speech_rate_siblings[k];
  71. }
  72. }
  73. parameters {
  74. matrix<lower=0>[n_children,n_classes-1] mu_child_level;
  75. vector [n_children] child_dev_age;
  76. matrix<lower=0> [n_recs, n_classes] truth_vocs;
  77. // nuisance parameters
  78. array [n_recs] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion_baseline_algo1;
  79. array [n_recs] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion_baseline_algo2;
  80. // confusion parameters
  81. matrix<lower=1>[n_classes,n_classes] etas_algo1;
  82. matrix<lower=0,upper=1>[n_classes,n_classes] mus_algo1;
  83. array [n_groups] matrix<lower=0,upper=1>[n_classes,n_classes] lambda_algo1;
  84. matrix<lower=0,upper=1>[n_classes,n_classes] p_algo1;
  85. matrix<lower=1>[n_classes,n_classes] etas_algo2;
  86. matrix<lower=0,upper=1>[n_classes,n_classes] mus_algo2;
  87. array [n_groups] matrix<lower=0,upper=1>[n_classes,n_classes] lambda_algo2;
  88. matrix<lower=0,upper=1>[n_classes,n_classes] p_algo2;
  89. // behavior model parameters
  90. #include "blocks/behavior_model_parameters.stan"
  91. // parameters specific to human annotations
  92. #include "blocks/human_annotations_parameters.stan"
  93. }
  94. model {
  95. //actual model
  96. // inverse confusion model
  97. target += reduce_sum(
  98. inverse_model_lpdf, actual_confusion_baseline_algo1, 1,
  99. n_recs, n_classes, recs_duration,
  100. vocs_algo1, age,
  101. truth_vocs, mus_algo1, etas_algo1, p_algo1
  102. );
  103. target += reduce_sum(
  104. inverse_model_lpdf, actual_confusion_baseline_algo2, 1,
  105. n_recs, n_classes, recs_duration,
  106. vocs_algo2, age,
  107. truth_vocs, mus_algo2, etas_algo2, p_algo2
  108. );
  109. // contribution of full recordings to the model of behavior
  110. #include "blocks/behavior_observations_model.stan"
  111. target += reduce_sum(
  112. confusion_model_lpdf, lambda_algo1, 1,
  113. n_classes, n_clips,
  114. algo1_total, truth_total, group, clip_duration, clip_age,
  115. p_algo1
  116. );
  117. target += reduce_sum(
  118. confusion_model_lpdf, lambda_algo2, 1,
  119. n_classes, n_clips,
  120. algo2_total, truth_total, group, clip_duration, clip_age,
  121. p_algo1
  122. );
  123. // priors on the nuisance parameters of the confusion model
  124. for (i in 1:n_classes) {
  125. p_algo1[i] ~ beta(3,1);
  126. mus_algo1[i,:] ~ uniform(0, 1);
  127. etas_algo1[i,:] ~ pareto(1, 1.5);
  128. p_algo2[i] ~ beta(3,1);
  129. mus_algo2[i,:] ~ uniform(0, 1);
  130. etas_algo2[i,:] ~ pareto(1, 1.5);
  131. }
  132. for (c in 1:n_groups) {
  133. real ll_normal = 0;
  134. for (i in 1:n_classes) {
  135. ll_normal += beta_proportion_lpdf(lambda_algo1[c,i,:] | mus_algo1[i,:], etas_algo1[i,:]) + beta_proportion_lpdf(lambda_algo2[c,i,:] | mus_algo2[i,:], etas_algo2[i,:]);
  136. }
  137. // tolerance to outliers
  138. target += log_mix(p_outlier, 0, ll_normal);
  139. }
  140. // priors on the hierarchical model of speech behavior
  141. #include "blocks/behavior_model_priors.stan"
  142. // human annotations contribution
  143. #include "blocks/human_annotations.stan"
  144. }