LAAC-LSCP
/
speaker-confusion-model


			
			
				
					
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
							functions {
    #include "blocks/confusion_model_binomial_hurdle_fast.stan"
    #include "blocks/confusion_inverse_model_binomial_hurdle.stan"
    #include "blocks/behavior_model_truth.stan"
}

// TODO
// use speech rates to set priors on truth_vocs
data {
    int<lower=1> n_classes; // number of classes

    // analysis data block
    int<lower=1> n_recs;
    int<lower=1> n_children;

    array[n_recs] int<lower=1> children;
    array[n_recs] real<lower=0> age;
    array[n_recs] int<lower=-1> siblings;
    array[n_recs, n_classes] int<lower=0> vocs_algo1;
    array[n_recs, n_classes] int<lower=0> vocs_algo2;
    array[n_children] int<lower=1> corpus;

    real<lower=0> recs_duration;

    // speaker confusion data block
    int<lower=1> n_clips;   // number of clips
    int<lower=1> n_groups; // number of groups
    int<lower=1> n_corpora;
    array [n_clips] int group;
    array [n_clips] int conf_corpus;
    array [n_clips,n_classes] int<lower=0> algo1_total; // algo vocs attributed to specific speakers
    array [n_clips,n_classes] int<lower=0> algo2_total; // algo vocs attributed to specific speakers
    array [n_clips,n_classes] int<lower=0> truth_total;
    array [n_clips] real<lower=0> clip_duration;
    array [n_clips] real<lower=0> clip_age;

    int<lower=0> n_validation;

    // actual speech rates
    int<lower=1> n_rates;
    int<lower=1> n_speech_rate_children;

    array [n_rates,n_classes] int<lower=0> speech_rates;
    array [n_rates] int group_corpus;
    array [n_rates] real<lower=0> durations;
    array [n_rates] real<lower=0> speech_rate_age;
    array [n_rates] int<lower=-1> speech_rate_siblings;
    array [n_rates] int<lower=1,upper=n_speech_rate_children> speech_rate_child;

    // parallel processing
    int<lower=1> threads;
}

transformed data {
    vector<lower=0>[n_groups] recording_age;
    array[n_speech_rate_children] int<lower=1> speech_rate_child_corpus;

    array[n_children] int<lower=-1> child_siblings;
    array[n_speech_rate_children] int<lower=-1> speech_rate_child_siblings;
    int no_siblings = 0;
    int has_siblings = 0;

    real p_outlier = 0.025;

    for (c in 1:n_clips) {
        recording_age[group[c]] = clip_age[c];
    }

    for (k in 1:n_rates) {
        speech_rate_child_corpus[speech_rate_child[k]] = group_corpus[k];
    }

    for (k in 1:n_recs) {
        child_siblings[children[k]] = siblings[k];
    }

    for (c in 1:n_children) {
        if (child_siblings[c] == 0) {
            no_siblings += 1;
        }
        else if (child_siblings[c] > 0) {
            has_siblings += 1;
        }
    }

    for (k in 1:n_rates) {
        speech_rate_child_siblings[speech_rate_child[k]] = speech_rate_siblings[k];
    }
}

parameters {
    matrix<lower=0>[n_children,n_classes-1] mu_child_level;
    vector [n_children] child_dev_age;
    matrix<lower=0> [n_recs, n_classes] truth_vocs;

    // nuisance parameters
    array [n_recs] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion_baseline_algo1;
    array [n_recs] matrix<lower=0,upper=1>[n_classes,n_classes] actual_confusion_baseline_algo2;

    // confusion parameters
    matrix<lower=1>[n_classes,n_classes] etas_algo1;
    matrix<lower=0,upper=1>[n_classes,n_classes] mus_algo1;
    array [n_groups] matrix<lower=0,upper=1>[n_classes,n_classes] lambda_algo1;
    matrix<lower=0,upper=1>[n_classes,n_classes] p_algo1;

    matrix<lower=1>[n_classes,n_classes] etas_algo2;
    matrix<lower=0,upper=1>[n_classes,n_classes] mus_algo2;
    array [n_groups] matrix<lower=0,upper=1>[n_classes,n_classes] lambda_algo2;
    matrix<lower=0,upper=1>[n_classes,n_classes] p_algo2;

        // behavior model parameters
    #include "blocks/behavior_model_parameters.stan"

    // parameters specific to human annotations
    #include "blocks/human_annotations_parameters.stan"
}

model {
    //actual model

    // inverse confusion model
    target += reduce_sum(
       inverse_model_lpdf, actual_confusion_baseline_algo1, 1,
       n_recs, n_classes, recs_duration,
       vocs_algo1, age,
       truth_vocs, mus_algo1, etas_algo1, p_algo1
    );

    target += reduce_sum(
       inverse_model_lpdf, actual_confusion_baseline_algo2, 1,
       n_recs, n_classes, recs_duration,
       vocs_algo2, age,
       truth_vocs, mus_algo2, etas_algo2, p_algo2
    );

    // contribution of full recordings to the model of behavior
    #include "blocks/behavior_observations_model.stan"

    target += reduce_sum(
        confusion_model_lpdf, lambda_algo1, 1,
        n_classes, n_clips,
        algo1_total, truth_total, group, clip_duration, clip_age,
        p_algo1
    );

    target += reduce_sum(
        confusion_model_lpdf, lambda_algo2, 1,
        n_classes, n_clips,
        algo2_total, truth_total, group, clip_duration, clip_age,
        p_algo1
    );

    // priors on the nuisance parameters of the confusion model
    for (i in 1:n_classes) {
        p_algo1[i] ~ beta(3,1); 
        mus_algo1[i,:] ~ uniform(0, 1);
        etas_algo1[i,:] ~ pareto(1, 1.5);

        p_algo2[i] ~ beta(3,1);
        mus_algo2[i,:] ~ uniform(0, 1);
        etas_algo2[i,:] ~ pareto(1, 1.5);

        
    }

    for (c in 1:n_groups) {
        real ll_normal = 0;
        for (i in 1:n_classes) {
            ll_normal += beta_proportion_lpdf(lambda_algo1[c,i,:] | mus_algo1[i,:], etas_algo1[i,:]) + beta_proportion_lpdf(lambda_algo2[c,i,:] | mus_algo2[i,:], etas_algo2[i,:]);
        }
        // tolerance to outliers
        target += log_mix(p_outlier, 0, ll_normal);
    }

    // priors on the hierarchical model of speech behavior
    #include "blocks/behavior_model_priors.stan"

    // human annotations contribution
    #include "blocks/human_annotations.stan"
}