před 2 měsíci · 581fa72db5
--- a/code/models/blocks/confusion_inverse_model_hurdle.stan
+++ b/code/models/blocks/confusion_inverse_model_hurdle.stan
@@ -0,0 +1,33 @@
 
				+real inverse_model_lpmf(array[] int children,
			
 
				+    int start, int end,
			
 
				+    int n_recs,
			
 
				+    int n_classes,
			
 
				+    real duration,
			
 
				+    array [,] int vocs,
			
 
				+    array [] real age,
			
 
				+    matrix truth_vocs,
			
 
				+    vector p,
			
 
				+    array [] matrix actual_confusion,
			
 
				+    //array [] vector actual_fp_rate,
			
 
				+    matrix mus,
			
 
				+    matrix alphas//,
			
 
				+    //vector mus_fp,
			
 
				+    //vector alphas_fp
			
 
				+    ) {
			
 
				+        real ll = 0;
			
 
				+
			
 
				+        vector [4] expect;
			
 
				+
			
 
				+        for (k in start:end) {
			
 
				+            expect = rep_vector(0, 4);
			
 
				+
			
 
				+            for (i in 1:n_classes) {
			
 
				+                ll += gamma_lpdf(actual_confusion[k,i] | alphas[i,:], alphas[i,:]./mus[i,:]);                
			
 
				+                expect[i] = dot_product(truth_vocs[k,:], (actual_confusion[k,:,i]./(1-exp(-actual_confusion[k,:,i]))).*p'); //'
			
 
				+            }
			
 
				+            
			
 
				+            ll += normal_lpdf(vocs[k,:] | expect, sqrt(expect));
			
 
				+        }
			
 
				+
			
 
				+        return ll;
			
 
				+    }
			
--- a/code/models/blocks/confusion_model.stan
+++ b/code/models/blocks/confusion_model.stan
@@ -18,16 +18,16 @@ real confusion_model_lpmf(array[] int group,
 
				             log_contrib_comb[:n] = rep_vector(0, n);
			
 
				             n = 1;
			
 
				 
			
 
				-            for (chi in 0:(truth[k,1]>0?max(0, algo[k,i]):0)) {
			
 
				+            for (chi in 0:(truth[k,1]>0?max(truth[k,1], algo[k,i]):0)) {
			
 
				                 bp[1] = truth[k,1]==0?0:poisson_lpmf(chi | truth[k,1]*lambda[group[k-start+1],1,i]);
			
 
				 
			
 
				-                for (och in 0:(truth[k,2]>0?max(0, algo[k,i]-chi):0)) {
			
 
				+                for (och in 0:(truth[k,2]>0?max(truth[k,2], algo[k,i]-chi):0)) {
			
 
				                     bp[2] = truth[k,2]==0?0:poisson_lpmf(och | truth[k,2]*lambda[group[k-start+1],2,i]);
			
 
				 
			
 
				-                    for (fem in 0:(truth[k,3]>0?max(0, algo[k,i]-chi-och):0)) {
			
 
				+                    for (fem in 0:(truth[k,3]>0?max(truth[k,3], algo[k,i]-chi-och):0)) {
			
 
				                         bp[3] = truth[k,3]==0?0:poisson_lpmf(fem | truth[k,3]*lambda[group[k-start+1],3,i]);
			
 
				 
			
 
				-                        for (mal in 0:(truth[k,4]>0?max(0, algo[k,i]-chi-och-fem):0)) {
			
 
				+                        for (mal in 0:(truth[k,4]>0?max(truth[k,4], algo[k,i]-chi-och-fem):0)) {
			
 
				                             bp[4] = truth[k,4]==0?0:poisson_lpmf(mal | truth[k,4]*lambda[group[k-start+1],4,i]);
			
 
				 
			
 
				                             int delta = algo[k,i] - (mal+fem+och+chi);
			
--- a/code/models/blocks/confusion_model_hurdle.stan
+++ b/code/models/blocks/confusion_model_hurdle.stan
@@ -0,0 +1,64 @@
 
				+real confusion_model_lpmf(array[] int group,
			
 
				+    int start, int end,
			
 
				+    int n_classes,
			
 
				+    array[,] int algo,
			
 
				+    array[,] int truth,
			
 
				+    array[] real age,
			
 
				+    array[] real clip_duration,
			
 
				+    vector p,
			
 
				+    array[] matrix lambda
			
 
				+) {
			
 
				+    real ll = 0;
			
 
				+    vector [4] dp;
			
 
				+    vector [4] bp;
			
 
				+
			
 
				+    vector[8192] log_contrib_detect;
			
 
				+    int n = size(log_contrib_detect);
			
 
				+
			
 
				+    vector[8192] log_contrib_comb;
			
 
				+    int n = size(log_contrib_comb);
			
 
				+
			
 
				+    for (k in start:end) {
			
 
				+        log_contrib_detect[:n] = rep_vector(0, n);
			
 
				+        for (chi_d in 0:truth[k,1]) {
			
 
				+            dp[1] = binomial_lpmf(chi_d | truth[k,1], p[1]);
			
 
				+            for (och_d in 0:truth[k,2]) {
			
 
				+                dp[2] = binomial_lpmf(och_d | truth[k,2], p[2]);
			
 
				+                for (fem_d in 0:truth[k,3]) {
			
 
				+                    dp[3] = binomial_lpmf(fem_d | truth[k,3], p[3]);
			
 
				+                        for (mal_d in 0:truth[k,4]) {
			
 
				+                            dp[4] = binomial_lpmf(mal_d | truth[k,4], p[4]);
			
 
				+                            for (i in 1:n_classes) {
			
 
				+                                log_contrib_comb[:n] = rep_vector(sum(dp), n);
			
 
				+                                n = 1;
			
 
				+                                for (chi in 0:(chi_d>0?max(0, algo[k,i]):0)) {
			
 
				+                                    bp[1] = chi_d==0?0:poisson_lpmf(chi_d | chi_d*lambda[group[k-start+1],1,i])-log1m_exp(-chi_d*lambda[group[k-start+1],1,i]);
			
 
				+                                        for (och in 0:(och_d>0?max(0, algo[k,i]-chi):0)) {
			
 
				+                                        bp[2] = och_d==0?0:poisson_lpmf(och | och_d*lambda[group[k-start+1],2,i])-log1m_exp(-och_d*lambda[group[k-start+1],2,i]);
			
 
				+                                            for (fem in 0:(fem_d>0?max(0, algo[k,i]-chi-och):0)) {
			
 
				+                                                bp[3] = fem_d==0?0:poisson_lpmf(fem_d | fem_d*lambda[group[k-start+1],3,i])-log1m_exp(-fem_d*lambda[group[k-start+1],3,i]);
			
 
				+                                                for (mal in 0:(mal_d>0?max(0, algo[k,i]-chi-och-fem):0)) {
			
 
				+                                                    bp[4] = mal_d==0?0:poisson_lpmf(mal | mal_d*lambda[group[k-start+1],4,i])-log1m_exp(-mal_d*lambda[group[k-start+1],4,i]);
			
 
				+
			
 
				+                                                    int delta = algo[k,i] - (mal+fem+och+chi);
			
 
				+                                                    if (delta==0) {
			
 
				+                                                        log_contrib_comb[n] += sum(bp);
			
 
				+                                                        n = n+1;
			
 
				+                                                    }
			
 
				+                                                }
			
 
				+                                            }
			
 
				+                                        }
			
 
				+                                    }
			
 
				+                                }
			
 
				+                                if (n>1) {
			
 
				+                                    ll += log_sum_exp(log_contrib_comb[1:n-1]);
			
 
				+                                }
			
 
				+                            }
			
 
				+                        }
			
 
				+                    }
			
 
				+                }   
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    return ll;
			
 
				+}
			
--- a/code/models/blocks/confusion_model_parameters_hurdle.stan
+++ b/code/models/blocks/confusion_model_parameters_hurdle.stan
@@ -0,0 +1,4 @@
 
				+matrix<lower=0>[n_classes,n_classes] alphas;
			
 
				+matrix<lower=0>[n_classes,n_classes] mus;
			
 
				+array [n_groups] matrix<lower=0>[n_classes,n_classes] lambda;
			
 
				+vector<lower=0,upper=1>[n_classes] p;
			
--- a/code/models/blocks/confusion_model_priors.stan
+++ b/code/models/blocks/confusion_model_priors.stan
@@ -2,7 +2,7 @@ for (i in 1:n_classes) {
 
				     for (j in 1:n_classes) {
			
 
				         if (i==j) {
			
 
				             mus[i,j] ~ exponential(2);
			
 
				-            (alphas[i,j]/10.0) ~ gamma(2, 1);
			
 
				+            alphas[i,j] ~ gamma(2, 1);
			
 
				         }
			
 
				         else {
			
 
				             mus[i,j] ~ exponential(8);
			
@@ -12,4 +12,4 @@ for (i in 1:n_classes) {
 
				             lambda[c,i,j] ~ gamma(alphas[i,j], alphas[i,j]/mus[i,j]);
			
 
				         }
			
 
				     }
			
 
				-}
			
 
				+}
			
--- a/code/models/blocks/confusion_model_priors_hurdle.stan
+++ b/code/models/blocks/confusion_model_priors_hurdle.stan
@@ -0,0 +1,12 @@
 
				+p ~ uniform(0, 1);
			
 
				+for (i in 1:n_classes) {
			
 
				+    for (j in 1:n_classes) {
			
 
				+        if (i==j) {
			
 
				+            mus[i,j] ~ exponential(1);
			
 
				+            alphas[i,j] ~ gamma(2, 1);
			
 
				+        }
			
 
				+        for (c in 1:n_groups) {
			
 
				+            lambda[c,i,j] ~ gamma(alphas[i,j], alphas[i,j]/mus[i,j]);
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/code/models/dev_siblings_hurdle.stan
+++ b/code/models/dev_siblings_hurdle.stan
@@ -0,0 +1,130 @@
 
				+functions {
			
 
				+    #include "blocks/confusion_model_hurdle.stan"
			
 
				+    #include "blocks/confusion_inverse_model_hurdle.stan"
			
 
				+    #include "blocks/behavior_model_truth.stan"
			
 
				+}
			
 
				+
			
 
				+// TODO
			
 
				+// use speech rates to set priors on truth_vocs
			
 
				+data {
			
 
				+    int<lower=1> n_classes; // number of classes
			
 
				+
			
 
				+    // analysis data block
			
 
				+    int<lower=1> n_recs;
			
 
				+    int<lower=1> n_children;
			
 
				+
			
 
				+    array[n_recs] int<lower=1> children;
			
 
				+    array[n_recs] real<lower=1> age;
			
 
				+    array[n_recs] int<lower=-1> siblings;
			
 
				+    array[n_recs, n_classes] int<lower=0> vocs;
			
 
				+    array[n_children] int<lower=1> corpus;
			
 
				+
			
 
				+    real<lower=0> recs_duration;
			
 
				+
			
 
				+    // speaker confusion data block
			
 
				+    int<lower=1> n_clips;   // number of clips
			
 
				+    int<lower=1> n_groups; // number of groups
			
 
				+    int<lower=1> n_corpora;
			
 
				+    array [n_clips] int group;
			
 
				+    array [n_clips] int conf_corpus;
			
 
				+    array [n_clips,n_classes] int<lower=0> algo_total; // algo vocs attributed to specific speakers
			
 
				+    array [n_clips,n_classes] int<lower=0> truth_total;
			
 
				+    array [n_clips] real<lower=0> clip_duration;
			
 
				+    array [n_clips] real<lower=0> clip_age;
			
 
				+
			
 
				+    int<lower=0> n_validation;
			
 
				+
			
 
				+    // actual speech rates
			
 
				+    int<lower=1> n_rates;
			
 
				+    int<lower=1> n_speech_rate_children;
			
 
				+
			
 
				+    array [n_rates,n_classes] int<lower=0> speech_rates;
			
 
				+    array [n_rates] int group_corpus;
			
 
				+    array [n_rates] real<lower=0> durations;
			
 
				+    array [n_rates] real<lower=0> speech_rate_age;
			
 
				+    array [n_rates] int<lower=-1> speech_rate_siblings;
			
 
				+    array [n_rates] int<lower=1,upper=n_speech_rate_children> speech_rate_child;
			
 
				+
			
 
				+    // parallel processing
			
 
				+    int<lower=1> threads;
			
 
				+}
			
 
				+
			
 
				+transformed data {
			
 
				+    vector<lower=0>[n_groups] recording_age;
			
 
				+    array[n_speech_rate_children] int<lower=1> speech_rate_child_corpus;
			
 
				+
			
 
				+    array[n_children] int<lower=-1> child_siblings;
			
 
				+    array[n_speech_rate_children] int<lower=-1> speech_rate_child_siblings;
			
 
				+    int no_siblings = 0;
			
 
				+    int has_siblings = 0;
			
 
				+
			
 
				+    for (c in 1:n_clips) {
			
 
				+        recording_age[group[c]] = clip_age[c];
			
 
				+    }
			
 
				+
			
 
				+    for (k in 1:n_rates) {
			
 
				+        speech_rate_child_corpus[speech_rate_child[k]] = group_corpus[k];
			
 
				+    }
			
 
				+
			
 
				+    for (k in 1:n_recs) {
			
 
				+        child_siblings[children[k]] = siblings[k];
			
 
				+    }
			
 
				+
			
 
				+    for (c in 1:n_children) {
			
 
				+        if (child_siblings[c] == 0) {
			
 
				+            no_siblings += 1;
			
 
				+        }
			
 
				+        else if (child_siblings[c] > 0) {
			
 
				+            has_siblings += 1;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    for (k in 1:n_rates) {
			
 
				+        speech_rate_child_siblings[speech_rate_child[k]] = speech_rate_siblings[k];
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+parameters {
			
 
				+    matrix<lower=0>[n_children,n_classes-1] mu_child_level;
			
 
				+    vector [n_children] child_dev_age;
			
 
				+    matrix<lower=0> [n_recs, n_classes] truth_vocs;
			
 
				+    array [n_recs] matrix<lower=0>[n_classes,n_classes] actual_confusion_baseline;
			
 
				+
			
 
				+    // confusion parameters
			
 
				+    #include "blocks/confusion_model_parameters_hurdle.stan"
			
 
				+
			
 
				+    // behavior model parameters
			
 
				+    #include "blocks/behavior_model_parameters.stan"
			
 
				+
			
 
				+    // parameters specific to human annotations
			
 
				+    #include "blocks/human_annotations_parameters.stan"
			
 
				+}
			
 
				+
			
 
				+model {
			
 
				+    // inverse confusion model
			
 
				+    target += reduce_sum(
			
 
				+       inverse_model_lpmf, children, 1,
			
 
				+       n_recs, n_classes, recs_duration,
			
 
				+       vocs, age,
			
 
				+       truth_vocs, actual_confusion_baseline, p, mus, alphas
			
 
				+    );    
			
 
				+
			
 
				+    // contribution of full recordings to the model of behavior
			
 
				+    #include "blocks/behavior_observations_model.stan"
			
 
				+
			
 
				+    target += reduce_sum(
			
 
				+        confusion_model_lpmf, group, n_clips%/%(threads*4),
			
 
				+        n_classes,
			
 
				+        algo_total, truth_total, clip_duration, clip_age,
			
 
				+        p, lambda//, lambda_fp
			
 
				+    );
			
 
				+
			
 
				+    // priors on the nuisance parameters of the confusion model
			
 
				+    #include "blocks/confusion_model_priors_hurdle.stan"
			
 
				+
			
 
				+    // priors on the hierarchical model of speech behavior
			
 
				+    #include "blocks/behavior_model_priors.stan"
			
 
				+
			
 
				+    // human annotations contribution
			
 
				+    #include "blocks/human_annotations.stan"
			
 
				+}