2 Commits 2c537b2171 ... 5f6ea6858f

Author SHA1 Message Date
  Lucas Gautheron 5f6ea6858f Merge branch 'analysis-example' of gin.g-node.org:/LAAC-LSCP/speaker-confusion-model into analysis-example 4 weeks ago
  Lucas Gautheron 50de765320 [DATALAD] Recorded changes 4 weeks ago

+ 2 - 1
code/models/blocks/behavior_model_parameters.stan

@@ -16,4 +16,5 @@ real alpha_dev;
 real<lower=0> sigma_dev;
 
 // effect of excess ADU input
-real beta_dev;
+real beta_dev;
+real beta_direct;

+ 18 - 0
code/models/blocks/behavior_model_parameters_nonlinear.stan

@@ -0,0 +1,18 @@
+vector<lower=0>[n_classes] alpha_child_level; // variance across recordings for a given child
+array[2] vector<lower=0>[n_classes-1] alpha_corpus_level; // variance among children
+matrix<lower=0>[n_classes-1,n_corpora] mu_corpus_level; // child-level average
+vector<lower=0>[n_classes-1] alpha_pop_level; // variance among corpora
+vector<lower=0>[n_classes] mu_pop_level; // population level averages
+//vector<lower=0>[n_classes-1] alpha_pop; // mean child-level variance
+
+// siblings
+real beta_sib_och; // effect of having siblings on OCH speech
+real beta_sib_adu; // effect of having siblings on ADU speech
+real<lower=0,upper=1> p_sib; // prob of having siblings
+
+
+// average effect of age
+vector[n_age_bins] d_beta_age;
+
+// effect of excess ADU input
+real beta_dev;

+ 2 - 1
code/models/blocks/behavior_model_priors.stan

@@ -17,4 +17,5 @@ beta_sib_adu ~ normal(0, 1);
 
 alpha_dev ~ normal(0, 1);
 sigma_dev ~ exponential(1);
-beta_dev ~ normal(0, 1);
+beta_dev ~ normal(0, 1);
+beta_direct ~ normal(0, 1);

+ 19 - 0
code/models/blocks/behavior_model_priors_nonlinear.stan

@@ -0,0 +1,19 @@
+alpha_child_level ~ gamma(4,1);
+mu_pop_level ~ exponential(4); // 250 vocs/hour
+alpha_pop_level ~ gamma(8, 4); // sd = 0.35 x \mu
+//alpha_pop ~ gamma(10, 10);
+for (i in 1:n_classes-1) {
+    //alpha_corpus_level[1,i] ~ gamma(4, 4/alpha_pop[i]);
+    //alpha_corpus_level[2,i] ~ gamma(4, 4/alpha_pop[i]);
+    alpha_corpus_level[1,i] ~ gamma(4, 2);
+    alpha_corpus_level[2,i] ~ gamma(4, 2);
+    mu_corpus_level[i,:] ~ gamma(alpha_pop_level[i],alpha_pop_level[i]/mu_pop_level[i+1]);
+}
+
+has_siblings ~ binomial(has_siblings+no_siblings, p_sib);
+p_sib ~ uniform(0, 1);
+beta_sib_och ~ normal(0, 1);
+beta_sib_adu ~ normal(0, 1);
+
+d_beta_age ~ normal(0, 0.05); // 5% variation per month
+beta_dev ~ normal(0, 1);

+ 3 - 2
code/models/blocks/behavior_model_truth.stan

@@ -9,13 +9,14 @@ real recs_priors_lpmf(array[] int children,
     matrix mu_child_level,
     vector alpha_child_level,
     vector child_dev_age,
-    real beta_dev
+    real beta_dev,
+    real beta_direct
     ) {
         real ll = 0;
         
         for (k in start:end) {
             real chi_mu = mu_pop_level[1]*exp(
-                (child_dev_age[children[k-start+1]])*age[k]/12.0/10.0+beta_dev*(mu_child_level[children[k-start+1],2]+mu_child_level[children[k-start+1],3]-mu_pop_level[3]-mu_pop_level[4])*age[k]/12.0/10.0
+                (child_dev_age[children[k-start+1]])*age[k]/12.0/10.0+(mu_child_level[children[k-start+1],2]+mu_child_level[children[k-start+1],3]-mu_pop_level[3]-mu_pop_level[4])*(beta_dev*age[k]/12.0/10.0+beta_direct/10.0)
             );
             ll += gamma_lpdf(
                 truth_vocs[k,1]/1000/recs_duration | alpha_child_level[1], alpha_child_level[1]/chi_mu

+ 10 - 7
code/models/blocks/confusion_model_priors_binomial_hurdle.stan

@@ -1,11 +1,14 @@
 for (i in 1:n_classes) {
     p[i] ~ beta(3,1);
-    for (j in 1:n_classes) {
-        mus[i,j] ~ uniform(0, 1);
-        etas[i,j] ~ pareto(1, 1.5);
-        // etas[i,j] ~ exponential(0.1);
-        for (c in 1:n_groups) {
-            lambda[c,i,j] ~ beta_proportion(mus[i,j], etas[i,j]);
-        }
+    mus[i] ~ uniform(0, 1);
+    etas[i] ~ pareto(1, 1.5);
+}
+
+for (c in 1:n_groups) {
+    real ll_normal = 0;
+    for (i in 1:n_classes) {
+        ll_normal += beta_proportion_lpdf(lambda[c,i,:] | mus[i,:], etas[i,:])
     }
+    // tolerance to outliers
+    target += log_mix(0.025, 0, ll_normal);
 }

+ 1 - 1
code/models/blocks/human_annotations.stan

@@ -1,6 +1,6 @@
 for (g in 1:n_rates) {
     real chi_mu = mu_pop_level[1]*exp(
-        (alpha_dev+sigma_dev*child_dev_speech_age[speech_rate_child[g]])*speech_rate_age[g]/12.0/10.0 + beta_dev*(speech_rate_child_level[speech_rate_child[g],2]+speech_rate_child_level[speech_rate_child[g],3]-mu_pop_level[3]-mu_pop_level[4])*speech_rate_age[g]/12.0/10.0
+        (alpha_dev+sigma_dev*child_dev_speech_age[speech_rate_child[g]])*speech_rate_age[g]/12.0/10.0 + (speech_rate_child_level[speech_rate_child[g],2]+speech_rate_child_level[speech_rate_child[g],3]-mu_pop_level[3]-mu_pop_level[4])*(beta_dev*speech_rate_age[g]/12.0/10.0+beta_direct/10.0)
     );
     speech_rate[1,g] ~ gamma(
         alpha_child_level[1],

+ 1 - 4
code/models/blocks/human_annotations_adu.stan

@@ -29,10 +29,7 @@ speech_rates[:,3] ~ poisson(speech_rate_fem.*durations*1000);
 speech_rates[:,4] ~ poisson(speech_rate_mal.*durations*1000);
 
 for (c in 1:n_speech_rate_children) {
-    speech_rate_child_fem_share[speech_rate_child_corpus[c]] ~ beta(
-        corpus_fem_share[1],
-        corpus_fem_share[2]
-    );
+    speech_rate_child_fem_share[speech_rate_child_corpus[c]] ~ uniform(0, 1);
 
     int distrib = child_siblings[c]==0?2:1;