123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180 |
- functions {
- #include "blocks/confusion_model_binomial_hurdle_fast.stan"
- #include "blocks/confusion_inverse_model_binomial_hurdle.stan"
- #include "blocks/behavior_model_truth.stan"
- }
- // TODO
- // use speech rates to set priors on truth_vocs
- data {
- int<lower=1> n_classes; // number of classes
- // analysis data block
- int<lower=1> n_recs;
- int<lower=1> n_children;
- array[n_recs] int<lower=1> children;
- array[n_recs] real<lower=0> age;
- array[n_recs] int<lower=-1> siblings;
- array[n_recs, n_classes] int<lower=0> vocs_algo1;
- array[n_recs, n_classes] int<lower=0> vocs_algo2;
- array[n_children] int<lower=1> corpus;
- real<lower=0> recs_duration;
- // speaker confusion data block
- int<lower=1> n_clips; // number of clips
- int<lower=1> n_groups; // number of groups
- int<lower=1> n_corpora;
- array [n_clips] int group;
- array [n_clips] int conf_corpus;
- array [n_clips,n_classes] int<lower=0> algo1_total; // algo vocs attributed to specific speakers
- array [n_clips,n_classes] int<lower=0> algo2_total; // algo vocs attributed to specific speakers
- array [n_clips,n_classes] int<lower=0> truth_total;
- array [n_clips] real<lower=0> clip_duration;
- array [n_clips] real<lower=0> clip_age;
- int<lower=0> n_validation;
- // actual speech rates
- int<lower=1> n_rates;
- int<lower=1> n_speech_rate_children;
- array [n_rates,n_classes] int<lower=0> speech_rates;
- array [n_rates] int group_corpus;
- array [n_rates] real<lower=0> durations;
- array [n_rates] real<lower=0> speech_rate_age;
- array [n_rates] int<lower=-1> speech_rate_siblings;
- array [n_rates] int<lower=1,upper=n_speech_rate_children> speech_rate_child;
- // parallel processing
- int<lower=1> threads;
- }
- transformed data {
- vector<lower=0>[n_groups] recording_age;
- array[n_speech_rate_children] int<lower=1> speech_rate_child_corpus;
- array[n_children] int<lower=-1> child_siblings;
- array[n_speech_rate_children] int<lower=-1> speech_rate_child_siblings;
- int no_siblings = 0;
- int has_siblings = 0;
- real p_outlier = 0.025;
- for (c in 1:n_clips) {
- recording_age[group[c]] = clip_age[c];
- }
- for (k in 1:n_rates) {
- speech_rate_child_corpus[speech_rate_child[k]] = group_corpus[k];
- }
- for (k in 1:n_recs) {
- child_siblings[children[k]] = siblings[k];
- }
- for (c in 1:n_children) {
- if (child_siblings[c] == 0) {
- no_siblings += 1;
- }
- else if (child_siblings[c] > 0) {
- has_siblings += 1;
- }
- }
- for (k in 1:n_rates) {
- speech_rate_child_siblings[speech_rate_child[k]] = speech_rate_siblings[k];
- }
- }
- parameters {
- matrix<lower=0>[n_children,n_classes-1] mu_child_level;
- vector [n_children] child_dev_age;
- matrix<lower=0> [n_recs, n_classes] truth_vocs;
- // nuisance parameters
- array [n_recs] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion_baseline_algo1;
- array [n_recs] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion_baseline_algo2;
- // confusion parameters
- matrix<lower=1>[n_classes,n_classes] etas_algo1;
- matrix<lower=0,upper=1>[n_classes,n_classes] mus_algo1;
- array [n_groups] matrix<lower=0,upper=1>[n_classes,n_classes] lambda_algo1;
- matrix<lower=0,upper=1>[n_classes,n_classes] p_algo1;
- matrix<lower=1>[n_classes,n_classes] etas_algo2;
- matrix<lower=0,upper=1>[n_classes,n_classes] mus_algo2;
- array [n_groups] matrix<lower=0,upper=1>[n_classes,n_classes] lambda_algo2;
- matrix<lower=0,upper=1>[n_classes,n_classes] p_algo2;
- // behavior model parameters
- #include "blocks/behavior_model_parameters.stan"
- // parameters specific to human annotations
- #include "blocks/human_annotations_parameters.stan"
- }
- model {
- //actual model
- // inverse confusion model
- target += reduce_sum(
- inverse_model_lpdf, actual_confusion_baseline_algo1, 1,
- n_recs, n_classes, recs_duration,
- vocs_algo1, age,
- truth_vocs, mus_algo1, etas_algo1, p_algo1
- );
- target += reduce_sum(
- inverse_model_lpdf, actual_confusion_baseline_algo2, 1,
- n_recs, n_classes, recs_duration,
- vocs_algo2, age,
- truth_vocs, mus_algo2, etas_algo2, p_algo2
- );
- // contribution of full recordings to the model of behavior
- #include "blocks/behavior_observations_model.stan"
- target += reduce_sum(
- confusion_model_lpdf, lambda_algo1, 1,
- n_classes, n_clips,
- algo1_total, truth_total, group, clip_duration, clip_age,
- p_algo1
- );
- target += reduce_sum(
- confusion_model_lpdf, lambda_algo2, 1,
- n_classes, n_clips,
- algo2_total, truth_total, group, clip_duration, clip_age,
- p_algo1
- );
- // priors on the nuisance parameters of the confusion model
- for (i in 1:n_classes) {
- p_algo1[i] ~ beta(3,1);
- mus_algo1[i,:] ~ uniform(0, 1);
- etas_algo1[i,:] ~ pareto(1, 1.5);
- p_algo2[i] ~ beta(3,1);
- mus_algo2[i,:] ~ uniform(0, 1);
- etas_algo2[i,:] ~ pareto(1, 1.5);
-
- }
- for (c in 1:n_groups) {
- real ll_normal = 0;
- for (i in 1:n_classes) {
- ll_normal += beta_proportion_lpdf(lambda_algo1[c,i,:] | mus_algo1[i,:], etas_algo1[i,:]) + beta_proportion_lpdf(lambda_algo2[c,i,:] | mus_algo2[i,:], etas_algo2[i,:]);
- }
- // tolerance to outliers
- target += log_mix(p_outlier, 0, ll_normal);
- }
- // priors on the hierarchical model of speech behavior
- #include "blocks/behavior_model_priors.stan"
- // human annotations contribution
- #include "blocks/human_annotations.stan"
- }
|