|
@@ -1,127 +1,7 @@
|
|
|
functions {
|
|
|
- real confusion_model_lpmf(array[] int group,
|
|
|
- int start, int end,
|
|
|
- int n_classes,
|
|
|
- array[,] int algo,
|
|
|
- array[,] int truth,
|
|
|
- array[] real age,
|
|
|
- array[] real clip_duration,
|
|
|
- array[] matrix lambda//,
|
|
|
- //array[] vector lambda_fp,
|
|
|
- ) {
|
|
|
- real ll = 0;
|
|
|
- vector [4] bp;
|
|
|
-
|
|
|
- vector[8192] log_contrib_comb;
|
|
|
- int n = size(log_contrib_comb);
|
|
|
-
|
|
|
- for (k in start:end) {
|
|
|
- for (i in 1:n_classes) {
|
|
|
- log_contrib_comb[:n] = rep_vector(0, n);
|
|
|
- n = 1;
|
|
|
-
|
|
|
- for (chi in 0:(truth[k,1]>0?max(truth[k,1], algo[k,i]):0)) {
|
|
|
- bp[1] = truth[k,1]==0?0:poisson_lpmf(chi | truth[k,1]*lambda[group[k-start+1],1,i]);
|
|
|
-
|
|
|
- for (och in 0:(truth[k,2]>0?max(truth[k,2], algo[k,i]-chi):0)) {
|
|
|
- bp[2] = truth[k,2]==0?0:poisson_lpmf(och | truth[k,2]*lambda[group[k-start+1],2,i]);
|
|
|
-
|
|
|
- for (fem in 0:(truth[k,3]>0?max(truth[k,3], algo[k,i]-chi-och):0)) {
|
|
|
- bp[3] = truth[k,3]==0?0:poisson_lpmf(fem | truth[k,3]*lambda[group[k-start+1],3,i]);
|
|
|
-
|
|
|
- for (mal in 0:(truth[k,4]>0?max(truth[k,4], algo[k,i]-chi-och-fem):0)) {
|
|
|
- bp[4] = truth[k,4]==0?0:poisson_lpmf(mal | truth[k,4]*lambda[group[k-start+1],4,i]);
|
|
|
-
|
|
|
- int delta = algo[k,i] - (mal+fem+och+chi);
|
|
|
- // if (delta >= 0) {
|
|
|
- // log_contrib_comb[n] += sum(bp);
|
|
|
- // log_contrib_comb[n] += poisson_lpmf(
|
|
|
- // delta | lambda_fp[group[k-start+1],i]*clip_duration[k]
|
|
|
- // );
|
|
|
- // n = n+1;
|
|
|
- // }
|
|
|
- if (delta==0) {
|
|
|
- log_contrib_comb[n] += sum(bp);
|
|
|
- n = n+1;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if (n>1) {
|
|
|
- ll += log_sum_exp(log_contrib_comb[1:n-1]);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return ll;
|
|
|
- }
|
|
|
-
|
|
|
- real inverse_model_lpmf(array[] int children,
|
|
|
- int start, int end,
|
|
|
- int n_recs,
|
|
|
- int n_classes,
|
|
|
- real duration,
|
|
|
- array [,] int vocs,
|
|
|
- array [] real age,
|
|
|
- matrix truth_vocs,
|
|
|
- array [] matrix actual_confusion,
|
|
|
- //array [] vector actual_fp_rate,
|
|
|
- matrix mus,
|
|
|
- matrix alphas//,
|
|
|
- //vector mus_fp,
|
|
|
- //vector alphas_fp
|
|
|
- ) {
|
|
|
- real ll = 0;
|
|
|
-
|
|
|
- vector [4] expect;
|
|
|
-
|
|
|
- for (k in start:end) {
|
|
|
- expect = rep_vector(0, 4);
|
|
|
-
|
|
|
- for (i in 1:n_classes) {
|
|
|
- ll += gamma_lpdf(actual_confusion[k,i] | alphas[i,:], alphas[i,:]./mus[i,:]);
|
|
|
- //ll += gamma_lpdf(actual_fp_rate[k] | alphas_fp, alphas_fp./mus_fp);
|
|
|
-
|
|
|
- expect[i] = dot_product(truth_vocs[k,:], actual_confusion[k,:,i]);
|
|
|
- //expect[i] += actual_fp_rate[k,i] * duration;
|
|
|
- }
|
|
|
-
|
|
|
- ll += normal_lpdf(vocs[k,:] | expect, sqrt(expect));
|
|
|
- }
|
|
|
-
|
|
|
- return ll;
|
|
|
- }
|
|
|
-
|
|
|
- real recs_priors_lpmf(array[] int children,
|
|
|
- int start, int end,
|
|
|
- int n_recs,
|
|
|
- int n_classes,
|
|
|
- real recs_duration,
|
|
|
- array [] real age,
|
|
|
- matrix truth_vocs,
|
|
|
- vector mu_pop_level,
|
|
|
- matrix mu_child_level,
|
|
|
- vector alpha_child_level,
|
|
|
- vector child_dev_age,
|
|
|
- real beta_dev
|
|
|
- ) {
|
|
|
- real ll = 0;
|
|
|
-
|
|
|
- for (k in start:end) {
|
|
|
- real chi_mu = mu_pop_level[1]*exp(
|
|
|
- child_dev_age[children[k-start+1]]*age[k]/12.0/10.0+beta_dev*(mu_child_level[children[k-start+1],2]+mu_child_level[children[k-start+1],3]-mu_pop_level[3]-mu_pop_level[4])*age[k]/12.0/10.0
|
|
|
- );
|
|
|
- ll += gamma_lpdf(
|
|
|
- truth_vocs[k,1]/1000/recs_duration | alpha_child_level[1], alpha_child_level[1]/chi_mu
|
|
|
- );
|
|
|
-
|
|
|
- ll += gamma_lpdf(
|
|
|
- truth_vocs[k,2:]/1000/recs_duration | alpha_child_level[2:], alpha_child_level[2:]./mu_child_level[children[k-start+1],:]' //'
|
|
|
- );
|
|
|
- }
|
|
|
-
|
|
|
- return ll;
|
|
|
- }
|
|
|
+ #include "blocks/confusion_model.stan"
|
|
|
+ #include "blocks/confusion_inverse_model.stan"
|
|
|
+ #include "blocks/behavior_model_truth.stan"
|
|
|
}
|
|
|
|
|
|
// TODO
|
|
@@ -208,115 +88,29 @@ parameters {
|
|
|
matrix<lower=0>[n_children,n_classes-1] mu_child_level;
|
|
|
vector [n_children] child_dev_age;
|
|
|
matrix<lower=0> [n_recs, n_classes] truth_vocs;
|
|
|
-
|
|
|
- // nuisance parameters
|
|
|
array [n_recs] matrix<lower=0>[n_classes,n_classes] actual_confusion_baseline;
|
|
|
- //array [n_recs] vector<lower=0>[n_classes] actual_fp_rate;
|
|
|
|
|
|
// confusion parameters
|
|
|
- // confusion matrix
|
|
|
- matrix<lower=0>[n_classes,n_classes] alphas;
|
|
|
- matrix<lower=0>[n_classes,n_classes] mus;
|
|
|
- array [n_groups] matrix<lower=0>[n_classes,n_classes] lambda;
|
|
|
+ #include "blocks/confusion_model_parameters.stan"
|
|
|
|
|
|
- // false positives
|
|
|
- //vector<lower=0>[n_classes] alphas_fp;
|
|
|
- //vector<lower=0>[n_classes] mus_fp;
|
|
|
- //array [n_groups] vector<lower=0>[n_classes] lambda_fp;
|
|
|
+ // behavior model parameters
|
|
|
+ #include "blocks/behavior_model_parameters.stan"
|
|
|
|
|
|
- // speech rates
|
|
|
- vector<lower=0>[n_classes] alpha_child_level; // variance across recordings for a given child
|
|
|
- array[2] matrix<lower=0>[n_classes-1,n_corpora] alpha_corpus_level; // variance among children
|
|
|
- matrix<lower=0>[n_classes-1,n_corpora] mu_corpus_level; // child-level average
|
|
|
- vector<lower=0>[n_classes-1] alpha_pop_level; // variance among corpora
|
|
|
- vector<lower=0>[n_classes] mu_pop_level; // population level averages
|
|
|
- vector<lower=0>[n_classes-1] alpha_pop;
|
|
|
- matrix<lower=0>[n_classes,n_rates] speech_rate; // truth speech rates observed in annotated clips
|
|
|
- matrix<lower=0>[n_speech_rate_children,n_classes-1] speech_rate_child_level; // expected speech rate at the child-level
|
|
|
-
|
|
|
- // siblings
|
|
|
- real beta_sib_och; // effect of having siblings on OCH speech
|
|
|
- real beta_sib_adu; // effect of having siblings on ADU speech
|
|
|
- real<lower=0,upper=1> p_sib; // prob of having siblings
|
|
|
-
|
|
|
- vector [n_speech_rate_children] child_dev_speech_age;
|
|
|
-
|
|
|
- // average effect of age
|
|
|
- real alpha_dev;
|
|
|
- real<lower=0> sigma_dev;
|
|
|
-
|
|
|
- // effect of excess ADU input
|
|
|
- real beta_dev;
|
|
|
+ // parameters specific to human annotations
|
|
|
+ #include "blocks/human_annotations_parameters.stan"
|
|
|
}
|
|
|
|
|
|
model {
|
|
|
- //actual model
|
|
|
-
|
|
|
// inverse confusion model
|
|
|
target += reduce_sum(
|
|
|
inverse_model_lpmf, children, 1,
|
|
|
n_recs, n_classes, recs_duration,
|
|
|
vocs, age,
|
|
|
truth_vocs, actual_confusion_baseline, mus, alphas//, mus_fp, alphas_fp
|
|
|
- );
|
|
|
+ );
|
|
|
|
|
|
- // priors on actual speech
|
|
|
- target += reduce_sum(
|
|
|
- recs_priors_lpmf, children, 1,
|
|
|
- n_recs, n_classes, recs_duration, age,
|
|
|
- truth_vocs,
|
|
|
- mu_pop_level, mu_child_level, alpha_child_level,
|
|
|
- child_dev_age, beta_dev
|
|
|
- );
|
|
|
-
|
|
|
- vector [2] ll;
|
|
|
- int distrib;
|
|
|
- for (c in 1:n_children) {
|
|
|
- // if there is sibling data
|
|
|
- if (child_siblings[c]>=0) {
|
|
|
- distrib = child_siblings[c]>0?2:1;
|
|
|
-
|
|
|
- mu_child_level[c,1] ~ gamma(
|
|
|
- alpha_corpus_level[distrib,1,corpus[c]],
|
|
|
- (alpha_corpus_level[distrib,1,corpus[c]]/(mu_corpus_level[1,corpus[c]]*exp(
|
|
|
- child_siblings[c]>0?beta_sib_och:0
|
|
|
- )))
|
|
|
- );
|
|
|
- mu_child_level[c,2:] ~ gamma(
|
|
|
- alpha_corpus_level[distrib,2:,corpus[c]],
|
|
|
- (alpha_corpus_level[distrib,2:,corpus[c]]./mu_corpus_level[2:,corpus[c]]*exp(
|
|
|
- child_siblings[c]>0?beta_sib_adu:0
|
|
|
- ))
|
|
|
- );
|
|
|
- }
|
|
|
- // otherwise
|
|
|
- else {
|
|
|
- // assuming no sibling
|
|
|
- ll[1] = log(p_sib)+gamma_lpdf(
|
|
|
- mu_child_level[c,1] | alpha_corpus_level[2,1,corpus[c]], alpha_corpus_level[2,1,corpus[c]]/(mu_corpus_level[1,corpus[c]]*exp(beta_sib_och))
|
|
|
- );
|
|
|
- ll[1] += gamma_lpdf(
|
|
|
- mu_child_level[c,2] | alpha_corpus_level[2,2,corpus[c]], alpha_corpus_level[2,2,corpus[c]]/(mu_corpus_level[2,corpus[c]]*exp(beta_sib_adu))
|
|
|
- );
|
|
|
- ll[1] += gamma_lpdf(
|
|
|
- mu_child_level[c,3] | alpha_corpus_level[2,3,corpus[c]], alpha_corpus_level[2,3,corpus[c]]/(mu_corpus_level[3,corpus[c]]*exp(beta_sib_adu))
|
|
|
- );
|
|
|
-
|
|
|
- // assuming sibling
|
|
|
- ll[2] = log(1-p_sib)+gamma_lpdf(
|
|
|
- mu_child_level[c,1] | alpha_corpus_level[1,1,corpus[c]], alpha_corpus_level[1,1,corpus[c]]/(mu_corpus_level[1,corpus[c]])
|
|
|
- );
|
|
|
- ll[2] += gamma_lpdf(
|
|
|
- mu_child_level[c,2] | alpha_corpus_level[1,2,corpus[c]], alpha_corpus_level[1,2,corpus[c]]/(mu_corpus_level[2,corpus[c]])
|
|
|
- );
|
|
|
- ll[2] += gamma_lpdf(
|
|
|
- mu_child_level[c,3] | alpha_corpus_level[1,3,corpus[c]], alpha_corpus_level[1,3,corpus[c]]/(mu_corpus_level[3,corpus[c]])
|
|
|
- );
|
|
|
- target += log_sum_exp(ll);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- alpha_child_level ~ gamma(4,1);
|
|
|
+ // contribution of full recordings to the model of behavior
|
|
|
+ #include "blocks/behavior_observations_model_uncentered.stan"
|
|
|
|
|
|
target += reduce_sum(
|
|
|
confusion_model_lpmf, group, n_clips%/%(threads*4),
|
|
@@ -325,77 +119,12 @@ model {
|
|
|
lambda//, lambda_fp
|
|
|
);
|
|
|
|
|
|
- //mus_fp ~ exponential(1);
|
|
|
- //alphas_fp ~ gamma(2, 1);
|
|
|
-
|
|
|
- for (i in 1:n_classes) {
|
|
|
- //lambda_fp[:,i] ~ gamma(alphas_fp[i], alphas_fp[i]/mus_fp[i]);
|
|
|
-
|
|
|
- for (j in 1:n_classes) {
|
|
|
- mus[i,j] ~ exponential(2);
|
|
|
- alphas[i,j] ~ inv_gamma(1, 1);
|
|
|
- // mus[i,j] ~ exponential(1);
|
|
|
- // alphas[i,j] ~ exponential(1);
|
|
|
- for (c in 1:n_groups) {
|
|
|
- lambda[c,i,j] ~ gamma(alphas[i,j], alphas[i,j]/mus[i,j]);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // speech rates
|
|
|
- mu_pop_level ~ exponential(4); // 250 vocs/hour
|
|
|
- alpha_pop_level ~ gamma(16, 4); // dispersion of corpora within population sd = 0.5 x \mu
|
|
|
- alpha_pop ~ gamma(16, 4); // dispersion of dispersion of individuals within corpora
|
|
|
- for (i in 1:n_classes-1) {
|
|
|
- alpha_corpus_level[1,i,:] ~ gamma(8, 8/alpha_pop[i]);
|
|
|
- alpha_corpus_level[2,i,:] ~ gamma(8, 8/alpha_pop[i]);
|
|
|
- mu_corpus_level[i,:] ~ gamma(alpha_pop_level[i],alpha_pop_level[i]/mu_pop_level[i+1]);
|
|
|
- }
|
|
|
-
|
|
|
- for (g in 1:n_rates) {
|
|
|
- real chi_mu = mu_pop_level[1]*exp(
|
|
|
- child_dev_speech_age[speech_rate_child[g]]*speech_rate_age[g]/12.0/10.0 + beta_dev*(speech_rate_child_level[speech_rate_child[g],2]+speech_rate_child_level[speech_rate_child[g],3]-mu_pop_level[3]-mu_pop_level[4])*speech_rate_age[g]/12.0/10.0
|
|
|
- );
|
|
|
- speech_rate[1,g] ~ gamma(
|
|
|
- alpha_child_level[1],
|
|
|
- alpha_child_level[1]/chi_mu
|
|
|
- );
|
|
|
-
|
|
|
- speech_rate[2:,g] ~ gamma(
|
|
|
- alpha_child_level[2:],
|
|
|
- (alpha_child_level[2:]./(speech_rate_child_level[speech_rate_child[g],:]')) //'
|
|
|
- );
|
|
|
- speech_rates[g,:] ~ poisson(speech_rate[:,g]*durations[g]*1000);
|
|
|
- }
|
|
|
-
|
|
|
- for (c in 1:n_speech_rate_children) {
|
|
|
- distrib = child_siblings[c]>0?2:1;
|
|
|
-
|
|
|
- speech_rate_child_level[c,1] ~ gamma(
|
|
|
- alpha_corpus_level[distrib,1,speech_rate_child_corpus[c]],
|
|
|
- (alpha_corpus_level[distrib,1,speech_rate_child_corpus[c]]/(mu_corpus_level[1,speech_rate_child_corpus[c]]*exp(
|
|
|
- speech_rate_child_siblings[c]>0?beta_sib_och:0
|
|
|
- )))
|
|
|
- );
|
|
|
-
|
|
|
- speech_rate_child_level[c,2:] ~ gamma(
|
|
|
- alpha_corpus_level[distrib,2:,speech_rate_child_corpus[c]],
|
|
|
- (alpha_corpus_level[distrib,2:,speech_rate_child_corpus[c]]./(mu_corpus_level[2:,speech_rate_child_corpus[c]]*exp(
|
|
|
- speech_rate_child_siblings[c]>0?beta_sib_adu:0
|
|
|
- )))
|
|
|
- );
|
|
|
- }
|
|
|
-
|
|
|
- child_dev_age ~ normal(alpha_dev, sigma_dev);
|
|
|
- child_dev_speech_age ~ normal(alpha_dev, sigma_dev);
|
|
|
-
|
|
|
- has_siblings ~ binomial(has_siblings+no_siblings, p_sib);
|
|
|
- p_sib ~ uniform(0, 1);
|
|
|
- beta_sib_och ~ normal(0, 1);
|
|
|
- beta_sib_adu ~ normal(0, 1);
|
|
|
+ // priors on the nuisance parameters of the confusion model
|
|
|
+ #include "blocks/confusion_model_priors.stan"
|
|
|
|
|
|
- alpha_dev ~ normal(0, 1);
|
|
|
- sigma_dev ~ exponential(1);
|
|
|
+ // priors on the hierarchical model of speech behavior
|
|
|
+ #include "blocks/behavior_model_priors_uncentered.stan"
|
|
|
|
|
|
- beta_dev ~ normal(0, 1);
|
|
|
+ // human annotations contribution
|
|
|
+ #include "blocks/human_annotations_uncentered.stan"
|
|
|
}
|