algo_siblings_effect.stan 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. functions {
  2. real recs_priors_lpmf(array[] int children,
  3. int start, int end,
  4. int n_recs,
  5. int n_classes,
  6. real recs_duration,
  7. array [] real age,
  8. matrix truth_vocs,
  9. vector mu_pop_level,
  10. matrix mu_child_level,
  11. vector alpha_child_level//,
  12. //vector child_dev_age,
  13. //real beta_dev
  14. ) {
  15. real ll = 0;
  16. for (k in start:end) {
  17. // real chi_mu = mu_pop_level[1]*exp(
  18. // child_dev_age[children[k-start+1]]*age[k]/12.0/10.0+beta_dev*(mu_child_level[children[k-start+1],2]+mu_child_level[children[k-start+1],3]-mu_pop_level[3]-mu_pop_level[4])*age[k]/12.0/10.0
  19. // );
  20. real chi_mu = mu_pop_level[1];
  21. ll += gamma_lpdf(
  22. truth_vocs[k,1]/1000/recs_duration | alpha_child_level[1], alpha_child_level[1]/chi_mu
  23. );
  24. ll += gamma_lpdf(
  25. truth_vocs[k,2:]/1000/recs_duration | alpha_child_level[2:], alpha_child_level[2:]./mu_child_level[children[k-start+1],:]' //'
  26. );
  27. }
  28. return ll;
  29. }
  30. }
  31. // TODO
  32. // use speech rates to set priors on truth_vocs
  33. data {
  34. int<lower=1> n_classes; // number of classes
  35. // analysis data block
  36. int<lower=1> n_recs;
  37. int<lower=1> n_children;
  38. array[n_recs] int<lower=1> children;
  39. array[n_recs] real<lower=1> age;
  40. array[n_recs] int<lower=-1> siblings;
  41. array[n_recs, n_classes] int<lower=0> vocs;
  42. array[n_children] int<lower=1> corpus;
  43. real<lower=0> recs_duration;
  44. int<lower=1> n_corpora;
  45. // actual speech rates
  46. int<lower=1> n_rates;
  47. int<lower=1> n_speech_rate_children;
  48. array [n_rates,n_classes] int<lower=0> speech_rates;
  49. array [n_rates] int group_corpus;
  50. array [n_rates] real<lower=0> durations;
  51. array [n_rates] real<lower=0> speech_rate_age;
  52. array [n_rates] int<lower=-1> speech_rate_siblings;
  53. array [n_rates] int<lower=1,upper=n_speech_rate_children> speech_rate_child;
  54. // parallel processing
  55. int<lower=1> threads;
  56. }
  57. transformed data {
  58. matrix[n_recs, n_classes] vocs_matrix = to_matrix(vocs);
  59. array[n_speech_rate_children] int<lower=1> speech_rate_child_corpus;
  60. array[n_children] int<lower=-1> child_siblings;
  61. array[n_speech_rate_children] int<lower=-1> speech_rate_child_siblings;
  62. int no_siblings = 0;
  63. int has_siblings = 0;
  64. for (k in 1:n_rates) {
  65. speech_rate_child_corpus[speech_rate_child[k]] = group_corpus[k];
  66. }
  67. for (k in 1:n_recs) {
  68. child_siblings[children[k]] = siblings[k];
  69. }
  70. for (c in 1:n_children) {
  71. if (child_siblings[c] == 0) {
  72. no_siblings += 1;
  73. }
  74. else if (child_siblings[c] > 0) {
  75. has_siblings += 1;
  76. }
  77. }
  78. for (k in 1:n_rates) {
  79. speech_rate_child_siblings[speech_rate_child[k]] = speech_rate_siblings[k];
  80. }
  81. }
  82. parameters {
  83. matrix<lower=0>[n_children,n_classes-1] mu_child_level;
  84. //vector [n_children] child_dev_age;
  85. // speech rates
  86. vector<lower=0>[n_classes] alpha_child_level; // variance across recordings for a given child
  87. matrix<lower=0>[n_classes-1,n_corpora] alpha_corpus_level; // variance among children
  88. matrix<lower=0>[n_classes-1,n_corpora] mu_corpus_level; // child-level average
  89. vector<lower=0>[n_classes-1] alpha_pop_level; // variance among corpora
  90. vector<lower=0>[n_classes] mu_pop_level; // population level averages
  91. vector<lower=0>[n_classes-1] alpha_pop;
  92. matrix<lower=0>[n_classes,n_rates] speech_rate; // truth speech rates observed in annotated clips
  93. matrix<lower=0>[n_speech_rate_children,n_classes-1] speech_rate_child_level; // expected speech rate at the child-level
  94. // vector<lower=0>[n_corpora] alpha_corpus_level_adu;
  95. // vector<lower=0>[n_corpora] mu_corpus_level_adu;
  96. // siblings
  97. real beta_sib_och; // effect of n of siblings on OCH speech
  98. // real beta_sib_adu; // effect of n of siblings on ADU speech
  99. real<lower=0,upper=1> p_sib; // prob of having siblings
  100. //vector [n_speech_rate_children] child_dev_speech_age;
  101. // average effect of age
  102. // real alpha_dev;
  103. // real<lower=0> sigma_dev;
  104. // effect of excess ADU input
  105. //real beta_dev;
  106. }
  107. model {
  108. // priors on actual speech
  109. target += reduce_sum(
  110. recs_priors_lpmf, children, 1,
  111. n_recs, n_classes, recs_duration, age,
  112. vocs_matrix,
  113. mu_pop_level, mu_child_level, alpha_child_level//,
  114. // child_dev_age, beta_dev
  115. );
  116. vector [2] ll;
  117. for (c in 1:n_children) {
  118. mu_child_level[c,2:] ~ gamma(
  119. alpha_corpus_level[2:,corpus[c]],
  120. (alpha_corpus_level[2:,corpus[c]]./mu_corpus_level[2:,corpus[c]])
  121. );
  122. // if there is sibling data
  123. if (child_siblings[c]>=0) {
  124. mu_child_level[c,1] ~ gamma(
  125. alpha_corpus_level[1,corpus[c]],
  126. (alpha_corpus_level[1,corpus[c]]/(mu_corpus_level[1,corpus[c]]*exp(
  127. child_siblings[c]>0?beta_sib_och:0
  128. )))
  129. );
  130. //(mu_child_level[c,2]+mu_child_level[c,3]) ~ gamma(
  131. // alpha_corpus_level_adu[corpus[c]],
  132. // (alpha_corpus_level_adu[corpus[c]]/(mu_corpus_level_adu[corpus[c]]*exp(
  133. // child_siblings[c]>0?beta_sib_adu:0
  134. // )))
  135. //);
  136. }
  137. // otherwise
  138. else {
  139. ll[1] = log(p_sib)+gamma_lpdf(
  140. mu_child_level[c,1] | alpha_corpus_level[1,corpus[c]], alpha_corpus_level[1,corpus[c]]/(mu_corpus_level[1,corpus[c]]*exp(beta_sib_och))
  141. );
  142. //ll[1] += gamma_lpdf(
  143. // mu_child_level[c,2]+mu_child_level[c,3] | alpha_corpus_level_adu[corpus[c]], (alpha_corpus_level_adu[corpus[c]]/(mu_corpus_level_adu[corpus[c]]*exp(beta_sib_adu)))
  144. //);
  145. ll[2] = log(1-p_sib)+gamma_lpdf(
  146. mu_child_level[c,1] | alpha_corpus_level[1,corpus[c]], alpha_corpus_level[1,corpus[c]]/(mu_corpus_level[1,corpus[c]])
  147. );
  148. // ll[2] += gamma_lpdf(
  149. // mu_child_level[c,2]+mu_child_level[c,3] | alpha_corpus_level_adu[corpus[c]], (alpha_corpus_level_adu[corpus[c]]/(mu_corpus_level_adu[corpus[c]]))
  150. // );
  151. target += log_sum_exp(ll);
  152. }
  153. }
  154. alpha_child_level ~ gamma(2,1);
  155. // speech rates
  156. mu_pop_level ~ exponential(4); // 250 vocs/hour
  157. alpha_pop_level ~ gamma(8, 4); // sd = 0.35 x \mu
  158. alpha_pop ~ gamma(10, 10);
  159. for (i in 1:n_classes-1) {
  160. alpha_corpus_level[i,:] ~ gamma(4, 4/alpha_pop[i]);
  161. mu_corpus_level[i,:] ~ gamma(alpha_pop_level[i],alpha_pop_level[i]/mu_pop_level[i+1]);
  162. }
  163. // mu_corpus_level_adu ~ exponential(4);
  164. // alpha_corpus_level_adu ~ gamma(4,4);
  165. for (g in 1:n_rates) {
  166. // real chi_mu = mu_pop_level[1]*exp(
  167. // child_dev_speech_age[speech_rate_child[g]]*speech_rate_age[g]/12.0/10.0 + beta_dev*(speech_rate_child_level[speech_rate_child[g],2]+speech_rate_child_level[speech_rate_child[g],3]-mu_pop_level[3]-mu_pop_level[4])*speech_rate_age[g]/12.0/10.0
  168. // );
  169. real chi_mu = mu_pop_level[1];
  170. speech_rate[1,g] ~ gamma(
  171. alpha_child_level[1],
  172. alpha_child_level[1]/chi_mu
  173. );
  174. speech_rate[2:,g] ~ gamma(
  175. alpha_child_level[2:],
  176. (alpha_child_level[2:]./(speech_rate_child_level[speech_rate_child[g],:]')) //'
  177. );
  178. speech_rates[g,:] ~ poisson(speech_rate[:,g]*durations[g]*1000);
  179. }
  180. for (c in 1:n_speech_rate_children) {
  181. speech_rate_child_level[c,1] ~ gamma(
  182. alpha_corpus_level[1,speech_rate_child_corpus[c]],
  183. (alpha_corpus_level[1,speech_rate_child_corpus[c]]/(mu_corpus_level[1,speech_rate_child_corpus[c]]*exp(
  184. speech_rate_child_siblings[c]>0?beta_sib_och:0
  185. )))
  186. );
  187. speech_rate_child_level[c,2:] ~ gamma(
  188. alpha_corpus_level[2:,speech_rate_child_corpus[c]],
  189. (alpha_corpus_level[2:,speech_rate_child_corpus[c]]./(mu_corpus_level[2:,speech_rate_child_corpus[c]]))
  190. );
  191. }
  192. //child_dev_age ~ normal(0, 1);
  193. //child_dev_speech_age ~ normal(0, 1);
  194. //beta_dev ~ normal(0, 1);
  195. has_siblings ~ binomial(has_siblings+no_siblings, p_sib);
  196. p_sib ~ uniform(0, 1);
  197. beta_sib_och ~ normal(0, 1);
  198. // beta_sib_adu ~ normal(0, 1);
  199. // alpha_dev ~ normal(0, 1);
  200. // sigma_dev ~ exponential(1);
  201. }