123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- functions {
- real confusion_model_lpmf(array[] int group,
- int start, int end,
- int n_classes,
- array[,] int algo,
- array[,] int truth,
- array[] real clip_duration,
- array[] matrix lambda,
- matrix omega//,
- //array[] vector lambda_fp,
- ) {
- real ll = 0;
- vector [4] bp;
- vector[8192] log_contrib_comb;
- int n = size(log_contrib_comb);
- for (k in start:end) {
- for (i in 1:n_classes) {
- log_contrib_comb[:n] = rep_vector(0, n);
- n = 1;
- for (chi in 0:(truth[k,1]>0?max(truth[k,1], algo[k,i]):0)) {
- bp[1] = truth[k,1]==0?0:neg_binomial_lpmf(chi | truth[k,1]*lambda[group[k-start+1],1,i]/(omega[1,i]-1), 1/(omega[1,i]-1));
- for (och in 0:(truth[k,2]>0?max(truth[k,2], algo[k,i]-chi):0)) {
- bp[2] = truth[k,2]==0?0:neg_binomial_lpmf(och | truth[k,2]*lambda[group[k-start+1],2,i]/(omega[2,i]-1), 1/(omega[2,i]-1));
- for (fem in 0:(truth[k,3]>0?max(truth[k,3], algo[k,i]-chi-och):0)) {
- bp[3] = truth[k,3]==0?0:neg_binomial_lpmf(fem | truth[k,3]*lambda[group[k-start+1],3,i]/(omega[3,i]-1), 1/(omega[3,i]-1));
- for (mal in 0:(truth[k,4]>0?max(truth[k,4], algo[k,i]-chi-och-fem):0)) {
- bp[4] = truth[k,4]==0?0:neg_binomial_lpmf(mal | truth[k,4]*lambda[group[k-start+1],4,i]/(omega[4,i]-1), 1/(omega[4,i]-1));
- int delta = algo[k,i] - (mal+fem+och+chi);
- // if (delta >= 0) {
- // log_contrib_comb[n] += sum(bp);
- // log_contrib_comb[n] += poisson_lpmf(
- // delta | lambda_fp[group[k-start+1],i]*clip_duration[k]
- // );
- // n = n+1;
- // }
- if (delta==0) {
- log_contrib_comb[n] += sum(bp);
- n = n+1;
- }
- }
- }
- }
- }
- if (n>1) {
- ll += log_sum_exp(log_contrib_comb[1:n-1]);
- }
- }
- }
- return ll;
- }
- }
- // TODO
- // use speech rates to set priors on truth_vocs
- data {
- int<lower=1> n_classes; // number of classes
- // analysis data block
- int<lower=1> n_recs;
- int<lower=1> n_children;
- array[n_recs] int<lower=1> children;
- array[n_recs] real<lower=1> age;
- array[n_recs] int<lower=-1> siblings;
- array[n_recs, n_classes] int<lower=0> vocs;
- array[n_children] int<lower=1> corpus;
- real<lower=0> recs_duration;
- // speaker confusion data block
- int<lower=1> n_clips; // number of clips
- int<lower=1> n_groups; // number of groups
- int<lower=1> n_corpora;
- array [n_clips] int group;
- array [n_clips] int conf_corpus;
- array [n_clips,n_classes] int<lower=0> algo_total; // algo vocs attributed to specific speakers
- array [n_clips,n_classes] int<lower=0> truth_total;
- array [n_clips] real<lower=0> clip_duration;
- array [n_clips] real<lower=0> clip_age;
- array [n_clips] int<lower=0> clip_rural;
- int<lower=0> n_validation;
- // parallel processing
- int<lower=1> threads;
- }
- transformed data {
- vector<lower=0>[n_groups] recording_age;
- array [n_groups] int<lower=0> recording_rural;
- for (c in 1:n_clips) {
- recording_age[group[c]] = clip_age[c];
- recording_rural[group[c]] = clip_rural[c];
- }
- }
- parameters {
- // confusion matrix
- array[2] matrix<lower=0>[n_classes,n_classes] alphas;
- array[2] matrix<lower=0>[n_classes,n_classes] mus;
- array [n_groups] matrix<lower=0>[n_classes,n_classes] lambda;
- //matrix<lower=0>[n_classes,n_classes] conf_sd;
- matrix<lower=1>[n_classes,n_classes] omega;
- }
- transformed parameters {
- //matrix<lower=1>[n_classes,n_classes] omega = exp(conf_sd/10);
- }
- model {
- //actual model
- target += reduce_sum(
- confusion_model_lpmf, group, n_clips%/%(threads*4),
- n_classes,
- algo_total, truth_total, clip_duration,
- lambda, omega//, lambda_fp
- );
- //mus_fp ~ exponential(1);
- //alphas_fp ~ gamma(2, 1);
- for (i in 1:n_classes) {
- //conf_sd[i,:] ~ normal(0, 1);
- omega[i,:] ~ pareto(1, 2);
- //lambda_fp[:,i] ~ gamma(alphas_fp[i], alphas_fp[i]/mus_fp[i]);
- for (j in 1:n_classes) {
- mus[1,i,j] ~ exponential(1); // urban
- mus[2,i,j] ~ exponential(1); // rural
- alphas[1,i,j] ~ lognormal(0,1); // urban
- alphas[2,i,j] ~ lognormal(0,1); // rural
- for (c in 1:n_groups) {
- lambda[c,i,j] ~ gamma(alphas[recording_rural[c]>0?2:1,i,j], alphas[recording_rural[c]>0?2:1,i,j]/(mus[recording_rural[c]>0?2:1,i,j]));
- }
- }
- }
- }
|