Procházet zdrojové kódy

[DATALAD] Recorded changes

Lucas Gautheron před 2 měsíci
rodič
revize
581fa72db5

+ 33 - 0
code/models/blocks/confusion_inverse_model_hurdle.stan

@@ -0,0 +1,33 @@
+real inverse_model_lpmf(array[] int children,
+    int start, int end,
+    int n_recs,
+    int n_classes,
+    real duration,
+    array [,] int vocs,
+    array [] real age,
+    matrix truth_vocs,
+    vector p,
+    array [] matrix actual_confusion,
+    //array [] vector actual_fp_rate,
+    matrix mus,
+    matrix alphas//,
+    //vector mus_fp,
+    //vector alphas_fp
+    ) {
+        real ll = 0;
+
+        vector [4] expect;
+
+        for (k in start:end) {
+            expect = rep_vector(0, 4);
+
+            for (i in 1:n_classes) {
+                ll += gamma_lpdf(actual_confusion[k,i] | alphas[i,:], alphas[i,:]./mus[i,:]);                
+                expect[i] = dot_product(truth_vocs[k,:], (actual_confusion[k,:,i]./(1-exp(-actual_confusion[k,:,i]))).*p'); //'
+            }
+            
+            ll += normal_lpdf(vocs[k,:] | expect, sqrt(expect));
+        }
+
+        return ll;
+    }

+ 4 - 4
code/models/blocks/confusion_model.stan

@@ -18,16 +18,16 @@ real confusion_model_lpmf(array[] int group,
             log_contrib_comb[:n] = rep_vector(0, n);
             n = 1;
 
-            for (chi in 0:(truth[k,1]>0?max(0, algo[k,i]):0)) {
+            for (chi in 0:(truth[k,1]>0?max(truth[k,1], algo[k,i]):0)) {
                 bp[1] = truth[k,1]==0?0:poisson_lpmf(chi | truth[k,1]*lambda[group[k-start+1],1,i]);
 
-                for (och in 0:(truth[k,2]>0?max(0, algo[k,i]-chi):0)) {
+                for (och in 0:(truth[k,2]>0?max(truth[k,2], algo[k,i]-chi):0)) {
                     bp[2] = truth[k,2]==0?0:poisson_lpmf(och | truth[k,2]*lambda[group[k-start+1],2,i]);
 
-                    for (fem in 0:(truth[k,3]>0?max(0, algo[k,i]-chi-och):0)) {
+                    for (fem in 0:(truth[k,3]>0?max(truth[k,3], algo[k,i]-chi-och):0)) {
                         bp[3] = truth[k,3]==0?0:poisson_lpmf(fem | truth[k,3]*lambda[group[k-start+1],3,i]);
 
-                        for (mal in 0:(truth[k,4]>0?max(0, algo[k,i]-chi-och-fem):0)) {
+                        for (mal in 0:(truth[k,4]>0?max(truth[k,4], algo[k,i]-chi-och-fem):0)) {
                             bp[4] = truth[k,4]==0?0:poisson_lpmf(mal | truth[k,4]*lambda[group[k-start+1],4,i]);
 
                             int delta = algo[k,i] - (mal+fem+och+chi);

+ 64 - 0
code/models/blocks/confusion_model_hurdle.stan

@@ -0,0 +1,64 @@
+real confusion_model_lpmf(array[] int group,
+    int start, int end,
+    int n_classes,
+    array[,] int algo,
+    array[,] int truth,
+    array[] real age,
+    array[] real clip_duration,
+    vector p,
+    array[] matrix lambda
+) {
+    real ll = 0;
+    vector [4] dp;
+    vector [4] bp;
+
+    vector[8192] log_contrib_detect;
+    int n = size(log_contrib_detect);
+
+    vector[8192] log_contrib_comb;
+    int n = size(log_contrib_comb);
+
+    for (k in start:end) {
+        log_contrib_detect[:n] = rep_vector(0, n);
+        for (chi_d in 0:truth[k,1]) {
+            dp[1] = binomial_lpmf(chi_d | truth[k,1], p[1]);
+            for (och_d in 0:truth[k,2]) {
+                dp[2] = binomial_lpmf(och_d | truth[k,2], p[2]);
+                for (fem_d in 0:truth[k,3]) {
+                    dp[3] = binomial_lpmf(fem_d | truth[k,3], p[3]);
+                        for (mal_d in 0:truth[k,4]) {
+                            dp[4] = binomial_lpmf(mal_d | truth[k,4], p[4]);
+                            for (i in 1:n_classes) {
+                                log_contrib_comb[:n] = rep_vector(sum(dp), n);
+                                n = 1;
+                                for (chi in 0:(chi_d>0?max(0, algo[k,i]):0)) {
+                                    bp[1] = chi_d==0?0:poisson_lpmf(chi_d | chi_d*lambda[group[k-start+1],1,i])-log1m_exp(-chi_d*lambda[group[k-start+1],1,i]);
+                                        for (och in 0:(och_d>0?max(0, algo[k,i]-chi):0)) {
+                                        bp[2] = och_d==0?0:poisson_lpmf(och | och_d*lambda[group[k-start+1],2,i])-log1m_exp(-och_d*lambda[group[k-start+1],2,i]);
+                                            for (fem in 0:(fem_d>0?max(0, algo[k,i]-chi-och):0)) {
+                                                bp[3] = fem_d==0?0:poisson_lpmf(fem_d | fem_d*lambda[group[k-start+1],3,i])-log1m_exp(-fem_d*lambda[group[k-start+1],3,i]);
+                                                for (mal in 0:(mal_d>0?max(0, algo[k,i]-chi-och-fem):0)) {
+                                                    bp[4] = mal_d==0?0:poisson_lpmf(mal | mal_d*lambda[group[k-start+1],4,i])-log1m_exp(-mal_d*lambda[group[k-start+1],4,i]);
+
+                                                    int delta = algo[k,i] - (mal+fem+och+chi);
+                                                    if (delta==0) {
+                                                        log_contrib_comb[n] += sum(bp);
+                                                        n = n+1;
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                if (n>1) {
+                                    ll += log_sum_exp(log_contrib_comb[1:n-1]);
+                                }
+                            }
+                        }
+                    }
+                }   
+            }
+        }
+    }
+    return ll;
+}

+ 4 - 0
code/models/blocks/confusion_model_parameters_hurdle.stan

@@ -0,0 +1,4 @@
+matrix<lower=0>[n_classes,n_classes] alphas;
+matrix<lower=0>[n_classes,n_classes] mus;
+array [n_groups] matrix<lower=0>[n_classes,n_classes] lambda;
+vector<lower=0,upper=1>[n_classes] p;

+ 2 - 2
code/models/blocks/confusion_model_priors.stan

@@ -2,7 +2,7 @@ for (i in 1:n_classes) {
     for (j in 1:n_classes) {
         if (i==j) {
             mus[i,j] ~ exponential(2);
-            (alphas[i,j]/10.0) ~ gamma(2, 1);
+            alphas[i,j] ~ gamma(2, 1);
         }
         else {
             mus[i,j] ~ exponential(8);
@@ -12,4 +12,4 @@ for (i in 1:n_classes) {
             lambda[c,i,j] ~ gamma(alphas[i,j], alphas[i,j]/mus[i,j]);
         }
     }
-}
+}

+ 12 - 0
code/models/blocks/confusion_model_priors_hurdle.stan

@@ -0,0 +1,12 @@
+p ~ uniform(0, 1);
+for (i in 1:n_classes) {
+    for (j in 1:n_classes) {
+        if (i==j) {
+            mus[i,j] ~ exponential(1);
+            alphas[i,j] ~ gamma(2, 1);
+        }
+        for (c in 1:n_groups) {
+            lambda[c,i,j] ~ gamma(alphas[i,j], alphas[i,j]/mus[i,j]);
+        }
+    }
+}

+ 130 - 0
code/models/dev_siblings_hurdle.stan

@@ -0,0 +1,130 @@
+functions {
+    #include "blocks/confusion_model_hurdle.stan"
+    #include "blocks/confusion_inverse_model_hurdle.stan"
+    #include "blocks/behavior_model_truth.stan"
+}
+
+// TODO
+// use speech rates to set priors on truth_vocs
+data {
+    int<lower=1> n_classes; // number of classes
+
+    // analysis data block
+    int<lower=1> n_recs;
+    int<lower=1> n_children;
+
+    array[n_recs] int<lower=1> children;
+    array[n_recs] real<lower=1> age;
+    array[n_recs] int<lower=-1> siblings;
+    array[n_recs, n_classes] int<lower=0> vocs;
+    array[n_children] int<lower=1> corpus;
+
+    real<lower=0> recs_duration;
+
+    // speaker confusion data block
+    int<lower=1> n_clips;   // number of clips
+    int<lower=1> n_groups; // number of groups
+    int<lower=1> n_corpora;
+    array [n_clips] int group;
+    array [n_clips] int conf_corpus;
+    array [n_clips,n_classes] int<lower=0> algo_total; // algo vocs attributed to specific speakers
+    array [n_clips,n_classes] int<lower=0> truth_total;
+    array [n_clips] real<lower=0> clip_duration;
+    array [n_clips] real<lower=0> clip_age;
+
+    int<lower=0> n_validation;
+
+    // actual speech rates
+    int<lower=1> n_rates;
+    int<lower=1> n_speech_rate_children;
+
+    array [n_rates,n_classes] int<lower=0> speech_rates;
+    array [n_rates] int group_corpus;
+    array [n_rates] real<lower=0> durations;
+    array [n_rates] real<lower=0> speech_rate_age;
+    array [n_rates] int<lower=-1> speech_rate_siblings;
+    array [n_rates] int<lower=1,upper=n_speech_rate_children> speech_rate_child;
+
+    // parallel processing
+    int<lower=1> threads;
+}
+
+transformed data {
+    vector<lower=0>[n_groups] recording_age;
+    array[n_speech_rate_children] int<lower=1> speech_rate_child_corpus;
+
+    array[n_children] int<lower=-1> child_siblings;
+    array[n_speech_rate_children] int<lower=-1> speech_rate_child_siblings;
+    int no_siblings = 0;
+    int has_siblings = 0;
+
+    for (c in 1:n_clips) {
+        recording_age[group[c]] = clip_age[c];
+    }
+
+    for (k in 1:n_rates) {
+        speech_rate_child_corpus[speech_rate_child[k]] = group_corpus[k];
+    }
+
+    for (k in 1:n_recs) {
+        child_siblings[children[k]] = siblings[k];
+    }
+
+    for (c in 1:n_children) {
+        if (child_siblings[c] == 0) {
+            no_siblings += 1;
+        }
+        else if (child_siblings[c] > 0) {
+            has_siblings += 1;
+        }
+    }
+
+    for (k in 1:n_rates) {
+        speech_rate_child_siblings[speech_rate_child[k]] = speech_rate_siblings[k];
+    }
+}
+
+parameters {
+    matrix<lower=0>[n_children,n_classes-1] mu_child_level;
+    vector [n_children] child_dev_age;
+    matrix<lower=0> [n_recs, n_classes] truth_vocs;
+    array [n_recs] matrix<lower=0>[n_classes,n_classes] actual_confusion_baseline;
+
+    // confusion parameters
+    #include "blocks/confusion_model_parameters_hurdle.stan"
+
+    // behavior model parameters
+    #include "blocks/behavior_model_parameters.stan"
+
+    // parameters specific to human annotations
+    #include "blocks/human_annotations_parameters.stan"
+}
+
+model {
+    // inverse confusion model
+    target += reduce_sum(
+       inverse_model_lpmf, children, 1,
+       n_recs, n_classes, recs_duration,
+       vocs, age,
+       truth_vocs, actual_confusion_baseline, p, mus, alphas
+    );    
+
+    // contribution of full recordings to the model of behavior
+    #include "blocks/behavior_observations_model.stan"
+
+    target += reduce_sum(
+        confusion_model_lpmf, group, n_clips%/%(threads*4),
+        n_classes,
+        algo_total, truth_total, clip_duration, clip_age,
+        p, lambda//, lambda_fp
+    );
+
+    // priors on the nuisance parameters of the confusion model
+    #include "blocks/confusion_model_priors_hurdle.stan"
+
+    // priors on the hierarchical model of speech behavior
+    #include "blocks/behavior_model_priors.stan"
+
+    // human annotations contribution
+    #include "blocks/human_annotations.stan"
+}