Переглянути джерело

[DATALAD] Recorded changes

Lucas Gautheron 2 місяців тому
батько
коміт
cece699c3d

+ 28 - 0
code/models/blocks/behavior_model_parameters_adu.stan

@@ -0,0 +1,28 @@
+real<lower=0> alpha_pop_och; // variance among corpora
+real<lower=0> alpha_pop_adu;
+
+vector<lower=0>[2] alpha_corpus_och; // variance among children
+vector<lower=0>[2] alpha_corpus_adu;
+
+vector<lower=0>[n_classes] alpha_child; // variance across recordings for a given child
+
+vector<lower=0>[n_corpora] mu_corpus_och;
+vector<lower=0>[n_corpora] mu_corpus_adu;
+
+matrix<lower=0>[n_corpora,2] corpus_fem_share; // beta parameters for the expected share of female speech
+
+vector<lower=0>[n_classes-1] mu_pop; // population level averages (chi, adu, och)
+//vector<lower=0>[n_classes-1] alpha_pop; // mean child-level variance
+
+// siblings
+real beta_sib_och; // effect of having siblings on OCH speech
+real beta_sib_adu; // effect of having siblings on ADU speech
+real<lower=0,upper=1> p_sib; // prob of having siblings
+
+
+// average effect of age
+real alpha_dev;
+real<lower=0> sigma_dev;
+
+// effect of excess ADU input
+real beta_dev;

+ 26 - 0
code/models/blocks/behavior_model_priors_adu.stan

@@ -0,0 +1,26 @@
+
+alpha_pop_och ~ gamma(8, 4); // level of dispersion among corpora
+alpha_pop_adu ~ gamma(8, 4);
+
+alpha_corpus_och ~ gamma(4, 2); // level of dispersion among children of a corpus
+alpha_corpus_adu ~ gamma(4, 2);
+
+alpha_child ~ gamma(4,1); // level of dispersion among recordings of a child
+
+mu_pop ~ exponential(4); // 250 vocs/hour
+
+mu_corpus_och ~ gamma(alpha_pop_och,alpha_pop_och/mu_pop[2]);
+mu_corpus_adu ~ gamma(alpha_pop_adu,alpha_pop_adu/mu_pop[3]);
+
+for (gender in 1:2) {
+    corpus_fem_share[:,gender] ~ exponential(1);
+}
+
+has_siblings ~ binomial(has_siblings+no_siblings, p_sib);
+p_sib ~ uniform(0, 1);
+beta_sib_och ~ normal(0, 1);
+beta_sib_adu ~ normal(0, 1);
+
+alpha_dev ~ normal(0, 1);
+sigma_dev ~ exponential(1);
+beta_dev ~ normal(0, 1);

+ 40 - 0
code/models/blocks/behavior_model_truth_adu.stan

@@ -0,0 +1,40 @@
+real recs_priors_lpmf(array[] int children,
+    int start, int end,
+    int n_recs,
+    int n_classes,
+    real recs_duration,
+    array [] real age,
+    matrix truth_vocs,
+    vector mu_pop,
+    vector mu_child_och,
+    vector mu_child_fem,
+    vector mu_child_mal,
+    vector alpha_child,
+    vector child_dev_age,
+    real beta_dev
+    ) {
+        real ll = 0;
+        
+        for (k in start:end) {
+            real chi_mu = mu_pop[1]*exp(
+                (child_dev_age[children[k-start+1]])*age[k]/12.0/10.0+beta_dev*(mu_child[children[k-start+1],2]+mu_child[children[k-start+1],3]-mu_pop[3]-mu_pop[4])*age[k]/12.0/10.0
+            );
+            ll += gamma_lpdf(
+                truth_vocs[k,1]/1000/recs_duration | alpha_child[1], alpha_child[1]/chi_mu
+            );
+
+            ll += gamma_lpdf(
+                truth_vocs[k,2]/1000/recs_duration | alpha_child[2], alpha_child[2]/mu_child_och[children[k-start+1]]
+            );
+    
+            ll += gamma_lpdf(
+                truth_vocs[k,3]/1000/recs_duration | alpha_child[3], alpha_child[3]/mu_child_fem[children[k-start+1]]
+            );
+
+            ll += gamma_lpdf(
+                truth_vocs[k,4]/1000/recs_duration | alpha_child[4], alpha_child[4]/mu_child_mal[children[k-start+1]]
+            );
+        }
+
+        return ll;
+    }

+ 54 - 0
code/models/blocks/human_annotations_adu.stan

@@ -0,0 +1,54 @@
+for (g in 1:n_rates) {
+    real chi_mu = mu_pop[1]*exp(
+        (alpha_dev+sigma_dev*child_dev_speech_age[speech_rate_child[g]])*speech_rate_age[g]/12.0/10.0 + beta_dev*(speech_rate_child_adu[speech_rate_child[g]]-mu_pop[2])*speech_rate_age[g]/12.0/10.0
+    );
+    speech_rate_chi[g] ~ gamma(
+        alpha_child[1],
+        alpha_child[1]/chi_mu
+    );
+
+    speech_rate_och[g] ~ gamma(
+        alpha_child[2],
+        alpha_child[2]/speech_rate_child_och[speech_rate_child[g]]
+    );
+
+    speech_rate_fem[g] ~ gamma(
+        alpha_child[3],
+        alpha_child[3]/(speech_rate_child_adu[speech_rate_child[g]]*speech_rate_child_fem_share[speech_rate_child[g]])
+    );
+
+    speech_rate_mal[g] ~ gamma(
+        alpha_child[4],
+        alpha_child[4]/(speech_rate_child_adu[speech_rate_child[g]]*(1-speech_rate_child_fem_share[speech_rate_child[g]]))
+    );
+}
+
+speech_rates[:,1] ~ poisson(speech_rate_chi.*durations*1000);
+speech_rates[:,2] ~ poisson(speech_rate_och.*durations*1000);
+speech_rates[:,3] ~ poisson(speech_rate_fem.*durations*1000);
+speech_rates[:,4] ~ poisson(speech_rate_mal.*durations*1000);
+
+for (c in 1:n_speech_rate_children) {
+    speech_rate_child_fem_share[speech_rate_child_corpus[c]] ~ beta(
+        corpus_fem_share[speech_rate_child_corpus[c],1],
+        corpus_fem_share[speech_rate_child_corpus[c],2]
+    );
+
+    int distrib = child_siblings[c]==0?2:1;
+
+    speech_rate_child_och[c] ~ gamma(
+        alpha_corpus_och[distrib],
+        (alpha_corpus_och[distrib]/(mu_corpus_och[speech_rate_child_corpus[c]]*exp(
+            speech_rate_child_siblings[c]==0?beta_sib_och:0
+        )))
+    );
+
+    speech_rate_child_adu[c] ~ gamma(
+        alpha_corpus_adu[distrib],
+        (alpha_corpus_adu[distrib]./(mu_corpus_adu[speech_rate_child_corpus[c]]*exp(
+            speech_rate_child_siblings[c]==0?beta_sib_adu/10.0:0
+        )))
+    );
+}
+
+child_dev_speech_age ~ normal(0, 1);

+ 8 - 0
code/models/blocks/human_annotations_parameters_adu.stan

@@ -0,0 +1,8 @@
+vector<lower=0>[n_rates] speech_rate_chi; // truth speech rates observed in annotated clips
+vector<lower=0>[n_rates] speech_rate_och;
+vector<lower=0>[n_rates] speech_rate_fem;
+vector<lower=0>[n_rates] speech_rate_mal;
+vector<lower=0>[n_speech_rate_children] speech_rate_child_och;
+vector<lower=0>[n_speech_rate_children] speech_rate_child_adu;
+vector<lower=0,upper=1>[n_speech_rate_children] speech_rate_child_fem_share;
+vector [n_speech_rate_children] child_dev_speech_age;

+ 78 - 0
code/models/posteriors_adu.stan

@@ -0,0 +1,78 @@
+// TODO
+// use speech rates to set priors on truth_vocs
+data {
+    int<lower=1> n_classes; // number of classes
+
+    // analysis data block
+    int<lower=1> n_recs;
+    int<lower=1> n_children;
+
+    array[n_recs] int<lower=1> children;
+    array[n_recs] real<lower=1> age;
+    array[n_recs] int<lower=-1> siblings;
+    array[n_children] int<lower=1> corpus;
+
+    real<lower=0> recs_duration;
+
+    // speaker confusion data block
+    int<lower=1> n_clips;   // number of clips
+    int<lower=1> n_groups; // number of groups
+    int<lower=1> n_corpora;
+
+    int<lower=0> n_validation;
+
+    // actual speech rates
+    int<lower=1> n_rates;
+    int<lower=1> n_speech_rate_children;
+
+    array [n_rates,n_classes] int<lower=0> speech_rates;
+    array [n_rates] int group_corpus;
+    vector<lower=0>[n_rates] durations;
+    array [n_rates] real<lower=0> speech_rate_age;
+    array [n_rates] int<lower=-1> speech_rate_siblings;
+    array [n_rates] int<lower=1,upper=n_speech_rate_children> speech_rate_child;
+
+    // parallel processing
+    int<lower=1> threads;
+}
+
+transformed data {
+    array[n_speech_rate_children] int<lower=1> speech_rate_child_corpus;
+
+    array[n_children] int<lower=-1> child_siblings;
+    array[n_speech_rate_children] int<lower=-1> speech_rate_child_siblings;
+    int no_siblings = 0;
+    int has_siblings = 0;
+
+    for (k in 1:n_rates) {
+        speech_rate_child_corpus[speech_rate_child[k]] = group_corpus[k];
+    }
+
+    for (k in 1:n_recs) {
+        child_siblings[children[k]] = siblings[k];
+    }
+
+    for (c in 1:n_children) {
+        if (child_siblings[c] == 0) {
+            no_siblings += 1;
+        }
+        else if (child_siblings[c] > 0) {
+            has_siblings += 1;
+        }
+    }
+
+    for (k in 1:n_rates) {
+        speech_rate_child_siblings[speech_rate_child[k]] = speech_rate_siblings[k];
+    }
+}
+
+parameters {
+    #include "blocks/behavior_model_parameters_adu.stan"
+    #include "blocks/human_annotations_parameters_adu.stan"
+}
+
+model {
+    //actual model
+    #include "blocks/behavior_model_priors_adu.stan"
+    #include "blocks/human_annotations_adu.stan"
+}