123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235 |
- functions {
- real confusion_model_lpmf(array[] int group,
- int start, int end,
- int n_classes,
- array[,,] int vtc,
- array[,] int vtc_fp,
- array[,] int truth,
- array[,,] int coocc,
- array[,] int truth_total,
- array[] real clip_duration,
- array[] matrix lambda,
- array[] matrix delta,
- array[] vector lambda_fp,
- array[] matrix coocc_rate
- ) {
- real ll = 0;
- for (k in start:end) {
- for (i in 1:n_classes) {
- for (j in 1:n_classes) {
- if(truth[k,j]==0) {
- continue;
- }
- ll += poisson_lpmf(
- vtc[k,i,j] | lambda[group[k-start+1],j,i]*truth[k,j]*exp(delta[k,i,j])
- );
- if (i!=j) {
- ll += binomial_lpmf(coocc[k,j,i] | truth_total[k,i], coocc_rate[group[k-start+1],i,j]);
- }
- }
- ll += poisson_lpmf(
- vtc_fp[k,i] | lambda_fp[group[k-start+1],i] * clip_duration[k]
- );
- }
- }
- return ll;
- }
- real model_lpmf(array[] int children,
- int start, int end,
- int n_recs,
- int n_classes,
- real duration,
- array [,] int vocs,
- matrix truth_vocs,
- array [] matrix actual_confusion,
- array [] vector actual_fp_rate,
- array[] matrix actual_coocc
- ) {
- real ll = 0;
- vector [4] expect;
- vector [4] sd;
- vector [4] tmp;
- for (k in start:end) {
- expect = rep_vector(0, 4);
- sd = rep_vector(0, 4);
- for (i in 1:n_classes) {
- expect[i] = dot_product(truth_vocs[k,:], actual_confusion[k,:,i]);
- expect[i] += actual_fp_rate[k,i] * duration;
- sd[i] = expect[i];
-
- tmp = actual_coocc[k,i,:]'; //'
- tmp[i] = 0;
- expect[i] -= 0.5*dot_product(tmp, actual_confusion[k,:,i]')*truth_vocs[k,i]*actual_confusion[k,i,i];
- sd[i] += 0.5*dot_product(tmp, actual_confusion[k,:,i]')*truth_vocs[k,i]*actual_confusion[k,i,i];
- }
-
- ll += normal_lpdf(vocs[k,:] | expect, sqrt(sd));
- }
- return ll;
- }
- }
- // TODO
- // use speech rates to set priors on truth_vocs
- data {
- int<lower=1> n_classes; // number of classes
- // analysis data block
- int<lower=1> n_recs;
- int<lower=1> n_children;
- array[n_recs] int<lower=1> children;
- array[n_recs] real<lower=1> age;
- array[n_recs, n_classes] int<lower=0> vocs;
- array[n_children] int<lower=1> corpus;
- real<lower=0> recs_duration;
- // speaker confusion data block
- int<lower=1> n_clips; // number of clips
- int<lower=1> n_groups; // number of groups
- int<lower=1> n_corpora;
- array [n_clips] int group;
- array [n_clips] int conf_corpus;
- array [n_clips,n_classes,n_classes] int<lower=0> vtc; // vtc vocs attributed to specific speakers
- array [n_clips,n_classes] int<lower=0> vtc_fp; // vtc vocs attributed to non-speech
- array [n_clips,n_classes] int<lower=0> truth;
- array [n_clips,n_classes] int<lower=0> truth_total;
- array [n_clips] real<lower=0> clip_duration;
- array [n_clips,n_classes,n_classes] int<lower=0> coocc;
- int<lower=1> n_validation;
- // actual speech rates
- int<lower=1> n_rates;
- array [n_rates,n_classes] int<lower=0> speech_rates;
- array [n_rates] int group_corpus;
- array [n_rates] real<lower=0> durations;
- }
- parameters {
- matrix<lower=0> [n_recs, n_classes] truth_vocs;
- //array [n_children] matrix<lower=0>[n_classes,n_classes] actual_confusion_baseline;
- array [n_recs] matrix<lower=0>[n_classes,n_classes] actual_confusion_baseline;
- array [n_recs] matrix<lower=0,upper=1>[n_classes,n_classes] actual_coocc;
- array [n_recs] vector<lower=0>[n_classes] actual_fp_rate;
- // confusion parameters
- matrix<lower=1>[n_classes,n_classes] alphas;
- matrix<lower=0>[n_classes,n_classes] mus;
- array [n_groups] matrix<lower=0>[n_classes,n_classes] lambda;
- array [n_clips] matrix[n_classes,n_classes] delta;
- real<lower=0> sigma;
- matrix<lower=1>[n_classes,n_classes] etas_coocc;
- matrix<lower=0,upper=1>[n_classes,n_classes] mus_coocc;
- array [n_groups] matrix<lower=0,upper=1>[n_classes,n_classes] coocc_rate;
- vector<lower=1>[n_classes] alphas_fp;
- vector<lower=0>[n_classes] mus_fp;
- array [n_groups] vector<lower=0>[n_classes] lambda_fp;
- //array [n_corpora] matrix[n_classes,n_classes] corpus_bias;
- //matrix<lower=0>[n_classes,n_classes] corpus_sigma;
- // speech rates
- matrix<lower=1>[n_classes,n_corpora] speech_rate_alpha;
- matrix<lower=0>[n_classes,n_corpora] speech_rate_mu;
- matrix<lower=0> [n_classes,n_rates] speech_rate;
- }
- transformed parameters {
- // array [n_children] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion;
- // for (c in 1:n_children) {
- // actual_confusion[c] = inv_logit(logit(actual_confusion_baseline[c])+corpus_bias[corpus[c]]);
- // }
- }
- model {
- //actual model
- target += reduce_sum(
- model_lpmf, children, 1,
- n_recs, n_classes, recs_duration,
- vocs,
- truth_vocs, actual_confusion_baseline, actual_fp_rate, actual_coocc
- );
- for (k in 1:n_recs) {
- for (i in 1:n_classes) {
- actual_confusion_baseline[k,i] ~ gamma(alphas[i,:], alphas[i,:]./mus[i,:]);
- actual_coocc[k,i,:] ~ beta_proportion(mus_coocc[i,:], etas_coocc[i,:]);
- }
- actual_fp_rate[k] ~ gamma(alphas_fp, alphas_fp./mus_fp);
- }
-
- for (k in 1:n_recs) {
- truth_vocs[k,:] ~ gamma(
- speech_rate_alpha[:,corpus[children[k]]],
- (speech_rate_alpha[:,corpus[children[k]]]./speech_rate_mu[:,corpus[children[k]]])/1000/recs_duration
- );
- }
- target += reduce_sum(
- confusion_model_lpmf, group, n_clips%/%40,
- n_classes,
- vtc, vtc_fp, truth, coocc, truth_total, clip_duration,
- lambda, delta, lambda_fp, coocc_rate
- );
- mus_fp ~ exponential(1);
- alphas_fp ~ normal(1, 1);
- for (i in 1:n_classes) {
- lambda_fp[:,i] ~ gamma(alphas_fp[i], alphas_fp[i]/mus_fp[i]);
-
- for (j in 1:n_classes) {
- mus[i,j] ~ exponential(2);
- alphas[i,j] ~ normal(1,1);
- for (c in 1:n_groups) {
- lambda[c,i,j] ~ gamma(alphas[i,j], alphas[i,j]/mus[i,j]);
- coocc_rate[c,i,j] ~ beta_proportion(mus_coocc[i,j], etas_coocc[i,j]);
- }
- delta[:,i,j] ~ normal(0,sigma);
- mus_coocc[i,j] ~ uniform(0, 1);
- etas_coocc[i,j] ~ pareto(1, 1.5);
- }
- }
- sigma ~ normal(0, 0.1);
- // for (i in 1:n_classes) {
- // for (j in 1:n_classes) {
- // for (c in 1:n_corpora) {
- // corpus_bias[c,j,i] ~ normal(0, corpus_sigma[j,i]);
- // }
- // corpus_sigma[j,i] ~ normal(0, 1);
- // }
- // }
- // speech rates
- for (i in 1:n_classes) {
- speech_rate_alpha[i,:] ~ normal(1, 1);
- speech_rate_mu[i,:] ~ exponential(2);
- }
- for (g in 1:n_rates) {
- for (i in 1:n_classes) {
- speech_rate[i,g] ~ gamma(
- speech_rate_alpha[i,group_corpus[g]],
- (speech_rate_alpha[i,group_corpus[g]]/speech_rate_mu[i,group_corpus[g]])/1000
- );
- speech_rates[g,i] ~ poisson(speech_rate[i,g]*durations[g]);
- }
- }
- }
|