Browse Source

more bug fixes but still need to double check all numbers

alecristia 4 months ago
parent
commit
4ce629128c
13 changed files with 614 additions and 592 deletions
  1. BIN
      CODE/Rplot.pdf
  2. 55 31
      CODE/SM.Rmd
  3. 541 542
      CODE/SM.log
  4. BIN
      CODE/SM.pdf
  5. BIN
      CODE/fig2.png
  6. BIN
      CODE/fig4.png
  7. BIN
      CODE/fig5.png
  8. BIN
      CODE/fig6.png
  9. BIN
      CODE/fig7.png
  10. BIN
      CODE/fig8.png
  11. BIN
      CODE/fig9.png
  12. 17 18
      CODE/sessionInfo.txt
  13. 1 1
      OUTPUT/corpus_description.csv

BIN
CODE/Rplot.pdf


+ 55 - 31
CODE/SM.Rmd

@@ -1,13 +1,13 @@
 ---
 title: Supplementary Materials to Establishing the reliability and validity of measures extracted from long-form recordings
 output:
+  pdf_document:
+    toc: yes
+    toc_depth: 3
   html_document:
     toc: yes
     toc_depth: '3'
     df_print: paged
-  pdf_document:
-    toc: yes
-    toc_depth: 3
 ---
 
 ```{r setup, include=FALSE, eval=TRUE}
@@ -157,30 +157,30 @@ Third, and perhaps most relevant, we looked for references that evaluated the ps
 
 
 ```{r tab2}
-chi_per_corpus= aggregate(data = mydat_aclew, child_id ~ experiment, function(child_id) length(unique(child_id)))[,2]
+chiXcor= aggregate(data = mydat_aclew, child_id ~ experiment, function(child_id) length(unique(child_id)))[,2]
 
-rec_per_corpus = aggregate(data = mydat_aclew, session_id ~ experiment, function(session_id) length(unique(session_id)))[,2]
+recXcor = aggregate(data = mydat_aclew, session_id ~ experiment, function(session_id) length(unique(session_id)))[,2]
 
 rec_per_child = setNames(aggregate(data = mydat_aclew, session_id ~ experiment*child_id, function(session_id) length(unique(session_id))), c('experiment', 'Chi', 'No_rec'))
 
 min_rec_per_child = aggregate(data = rec_per_child, No_rec ~ experiment, min)[,2]
 max_rec_per_child = aggregate(data = rec_per_child, No_rec ~ experiment, max)[,2]
-rec_r_per_child = paste(min_rec_per_child,max_rec_per_child,sep="-")
+recRXchi = paste(min_rec_per_child,max_rec_per_child,sep="-")
 
-dur_per_corpus = aggregate(data = mydat_aclew, duration_vtc ~ experiment, function(duration_vtc) round(mean(duration_vtc)/3.6e+6,1))[,2]
+durXcor = aggregate(data = mydat_aclew, duration_vtc ~ experiment, function(duration_vtc) round(mean(duration_vtc)/3.6e+6,1))[,2]
 
-age_mean_per_corpus = aggregate(data = mydat_aclew, age ~ experiment, function(age) round(mean(age),1))[,2]
+ageXcor = aggregate(data = mydat_aclew, age ~ experiment, function(age) round(mean(age),1))[,2]
 
 age_min_per_corpus = aggregate(data = mydat_aclew, age ~ experiment, function(age) min(age))[,2]
 
 age_max_per_corpus = aggregate(data = mydat_aclew, age ~ experiment, function(age) max(age))[,2]
 
-age_r_per_corpus = paste(age_min_per_corpus,age_max_per_corpus,sep="-")
+ageRXcor = paste(age_min_per_corpus,age_max_per_corpus,sep="-")
 
 corpus=c("bergelson", "cougar", "fausey-trio", "lucid","lyon", "quechua",  "warlaumont", "winnipeg")
 location=c("Northeast US", "Northwest US", "Western US", "Northwest England", "Central France", "Highlands Bolivia", "Western US", "Western Canada")
 
-corpus_description=cbind(corpus,location,chi_per_corpus, rec_r_per_child, rec_per_corpus, dur_per_corpus, age_mean_per_corpus,age_r_per_corpus)
+corpus_description=cbind(corpus,location,chiXcor, recRXchi, recXcor, durXcor, ageXcor,ageRXcor)
 
 write.table(corpus_description, "../output/corpus_description.csv", sep='\t')
 
@@ -195,7 +195,7 @@ nrecs=length(levels(mydat_aclew$session_id))
 
 ## SM D: Code to reproduce Fig. 2
 
-```{r icc-examples-fig2, fig.width=4, fig.height=3,fig.cap="Figure 2 (reproduced). Scatterplots for two selected variables. The left one has relatively low ICCs; the right one has relatively higher ICCs."}
+```{r icc-examples-fig2,  fig.width=6, fig.height=4.5,fig.cap="Figure 2 (reproduced). Scatterplots for two selected variables. The left one has relatively low ICCs; the right one has relatively higher ICCs."}
 # figure of bad ICC: lena     used to be: avg_voc_dur_chi, now is: peak_wc_adu_ph; good ICC: lena used to be: voc_och_ph, now is: voc_dur_och_ph
 
 # remove missing data points altogether
@@ -258,11 +258,12 @@ panel.background = element_blank(), axis.line = element_line(colour = "black"))
   geom_abline(intercept = 0, slope = 1)
 
 
-ggarrange(bad, good,
+fig2 = ggarrange(bad, good,
           ncol = 2, nrow = 1, common.legend = TRUE, vjust = 1.5, hjust=0,
           font.label = list(size = 20))  + labs(color= "Corpus")  +  theme(text = element_text(size = 20))
+fig2 
 
-
+ggsave("fig2.png", plot = fig2, width = 6, height = 4.5, units = "in")
 ```
 
 ## SM E: Code to reproduce text at the beginning of the "Setting the stage" section
@@ -318,7 +319,7 @@ cor_t=t.test(rval_tab$m ~ rval_tab$data_set)
 
 ```
 
-> To see whether correlations in this analysis differed by talker types and pipelines, we fit a linear model with the formula $lm(cor ~ type * pipeline)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children; and pipeline LENA or ACLEW. The model was overall significant (F(`round(reg_sum_cor$fstatistic["dendf"],2)`) = `round(reg_sum_cor$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum_cor$adj.r.squared*100)`%, suggesting this model did not explain a great deal of variance in correlation coefficients. A Type 3 ANOVA on this model revealed a significant effect of pipeline (F = `r round(reg_anova_cor["data_set","F value"],2)`, p = `r round(reg_anova_cor["data_set","Pr(>F)"],2)`), due to higher correlations for ACLEW (`r r_msds["aclew","x"]`) than for LENA metrics (m = `r r_msds["lena","x"]`). 
+> To see whether correlations in this analysis differed by talker types and pipelines, we fit a linear model with the formula $lm(cor ~ type * pipeline)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children; and pipeline LENA or ACLEW. The model was overall significant (F(`r round(reg_sum_cor$fstatistic["dendf"],2)`) = `r round(reg_sum_cor$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum_cor$adj.r.squared*100)`%, suggesting this model did not explain a great deal of variance in correlation coefficients. A Type 3 ANOVA on this model revealed a significant effect of pipeline (F = `r round(reg_anova_cor["data_set","F value"],2)`, p = `r round(reg_anova_cor["data_set","Pr(>F)"],2)`), due to higher correlations for ACLEW (`r r_msds["aclew","x"]`) than for LENA metrics (m = `r r_msds["lena","x"]`). 
 
 See table below for results of the Type 3 ANOVA.
 
@@ -333,11 +334,16 @@ kable(round(reg_anova_cor,2),caption="Type 3 ANOVA on model attempting to explai
 ```{r r-fig4, echo=F,fig.width=4, fig.height=3,fig.cap="Figure 4 (reproduced). Violin plot reflecting the distribution of correlations."}
 
 
-ggplot(rval_tab, aes(y = m, x = toupper(data_set))) +
+fig4 <- ggplot(rval_tab, aes(y = m, x = toupper(data_set))) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom(aes(colour = Type,shape = Type)) +  
-  theme() +labs( y = "r",x="Pipeline")
+  theme() +labs( y = "r",x="Pipeline") + 
+  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
+panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
 
+fig4
+
+ggsave("fig4.png", plot = fig4, width = 4, height = 3, units = "in")
 
 ```
 
@@ -446,13 +452,17 @@ panel.background = element_blank(), legend.key=element_blank(), axis.line = elem
 ```{r icc-allexp-fig5, echo=F,fig.width=4, fig.height=3,fig.cap="Figure 5 (reproduced). Violin plot reflecting the distribution of Child ICC."}
 
 
-ggplot(df.icc.mixed, aes(y = icc_child_id, x = toupper(data_set))) +
+fig5 <- ggplot(df.icc.mixed, aes(y = icc_child_id, x = toupper(data_set))) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom(aes(colour = Type,shape = Type)) +  
   labs( y = "Child ICC",x="Pipeline") +  theme(text = element_text(size = 20)) + 
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
 
+fig5
+
+ggsave("fig5.png", plot = fig5, width = 4, height = 3, units = "in")
+
 ```
 
 
@@ -478,7 +488,7 @@ rownames(msds_p)<-msds_p$Group.1
 ```
 
 
-> Next, we explored how similar Child ICCs were across different talker types and pipelines. We fit a linear model with the formula $lm(icc\_child\_id ~ type * pipeline)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children; and pipeline LENA or ACLEW. The model was overall significant (F(`round(reg_sum$fstatistic["dendf"],2)`) = `round(reg_sum$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum$adj.r.squared*100)`%, suggesting much of the variance across Child ICCs was explained by these factors. A Type 3 ANOVA on this model revealed type was a signficant predictor (F(`r reg_anova["Type","Df"]`) = `r round(reg_anova["Type","F value"],1)`, p<.001), as was pipeline (F(`r reg_anova["data_set","Df"]`) = `r round(reg_anova["data_set","F value"],1)`, p = `r round(reg_anova["data_set","Pr(>F)"],3)`); the interaction between type and pipeline was not significant. The main effect of type emerged because output metrics tended to have higher Child ICC (`r msds["Output","x"]`)  than those associated to adults in general (`r msds["Adults","x"]`), females (`r msds["Female","x"]`), and males (`r msds["Male","x"]`); whereas those associated with other children had even higher Child ICCs (`r msds["Other children","x"]`). The main effect of pipeline arose because of slightly higher Child ICCs for the ACLEW metrics (`r msds_p["aclew","x"]`) than for LENA metrics (`r msds_p["lena","x"]`). 
+> Next, we explored how similar Child ICCs were across different talker types and pipelines. We fit a linear model with the formula $lm(icc\_child\_id ~ type * pipeline)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children; and pipeline LENA or ACLEW. The model was overall significant (F(`r round(reg_sum$fstatistic["dendf"],2)`) = `r round(reg_sum$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum$adj.r.squared*100)`%, suggesting much of the variance across Child ICCs was explained by these factors. A Type 3 ANOVA on this model revealed type was a signficant predictor (F(`r reg_anova["Type","Df"]`) = `r round(reg_anova["Type","F value"],1)`, p<.001), as was pipeline (F(`r reg_anova["data_set","Df"]`) = `r round(reg_anova["data_set","F value"],1)`, p = `r round(reg_anova["data_set","Pr(>F)"],3)`); the interaction between type and pipeline was not significant. The main effect of type emerged because output metrics tended to have higher Child ICC (`r msds["Output","x"]`)  than those associated to adults in general (`r msds["Adults","x"]`), females (`r msds["Female","x"]`), and males (`r msds["Male","x"]`); whereas those associated with other children had even higher Child ICCs (`r msds["Other children","x"]`). The main effect of pipeline arose because of slightly higher Child ICCs for the ACLEW metrics (`r msds_p["aclew","x"]`) than for LENA metrics (`r msds_p["lena","x"]`). 
 
 
 ## SM O: Code to reproduce Table 4
@@ -528,7 +538,7 @@ f_labels<-data.frame(age_bin=levels(df.icc.age$age_bin),facet_labels_chi=facet_l
 
 f_labels$age_bin<-factor(f_labels$age_bin,levels=age_levels)
 
-ggplot(df.icc.age, aes(y = icc_child_id, x = toupper(data_set))) +
+fig6 <- ggplot(df.icc.age, aes(y = icc_child_id, x = toupper(data_set))) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom(aes(colour = Type,shape = Type)) +  
   theme(legend.position="none") +labs( y = "r",x="Pipeline") + facet_wrap(~age_bin, ncol = 3) +
@@ -537,6 +547,9 @@ ggplot(df.icc.age, aes(y = icc_child_id, x = toupper(data_set))) +
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
 
+fig6
+
+ggsave("fig6.png", plot = fig6, width = 6, height = 10, units = "in")
 
 
 ```
@@ -556,7 +569,7 @@ reg_anova_age_icc=Anova(age_icc)
 
 ```
 
-> To interrogate these results statistically, and assess whether Child ICCs tended to be higher or lower in certain age bins, we fit a linear model with the formula $lm(Child_ICC ~ type * pipeline * age_bin)$. The model was overall significant (F(`round(reg_sum_age_icc$fstatistic["dendf"],2)`) = `round(reg_sum_age_icc$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum_age_icc$adj.r.squared*100)`%, suggesting this model explained about a third of the variance in Child ICC.  A Type 3 ANOVA on this model revealed type was a signficant predictor (F(`r reg_anova["Type","Df"]`) = `r round(reg_anova["Type","F value"],1)`, p<.001), whereas as was pipeline (F(`r reg_anova["data_set","Df"]`) = `r round(reg_anova["data_set","F value"],1)`, p = `r round(reg_anova["data_set","Pr(>F)"],3)`); the interaction between type and pipeline was not significant. 
+> To interrogate these results statistically, and assess whether Child ICCs tended to be higher or lower in certain age bins, we fit a linear model with the formula $lm(Child_ICC ~ type * pipeline * age_bin)$. The model was overall significant (F(`r round(reg_sum_age_icc$fstatistic["dendf"],2)`) = `r round(reg_sum_age_icc$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum_age_icc$adj.r.squared*100)`%, suggesting this model explained about a third of the variance in Child ICC.  A Type 3 ANOVA on this model revealed type was a signficant predictor (F(`r reg_anova["Type","Df"]`) = `r round(reg_anova["Type","F value"],1)`, p<.001), whereas as was pipeline (F(`r reg_anova["data_set","Df"]`) = `r round(reg_anova["data_set","F value"],1)`, p = `r round(reg_anova["data_set","Pr(>F)"],3)`); the interaction between type and pipeline was not significant. 
 
 See table below for results of the Type 3 ANOVA.
 
@@ -591,12 +604,16 @@ r_X_age$ageA=factor(r_X_age$ageA,levels=age_levels)
 
 #summary(r_X_age$cor) #mean correlation across corpora is zero!
 
-ggplot(r_X_age, aes(y = cor, x = ageA)) +
+fig7 <- ggplot(r_X_age, aes(y = cor, x = ageA)) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom() +
   theme() +labs( y = "Correlation coefficient r",x="Age") + 
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
+
+fig7
+
+ggsave("fig7.png", plot = fig7, width = 4, height = 4, units = "in")
 ```
 
 
@@ -610,16 +627,16 @@ panel.background = element_blank(), legend.key=element_blank(), axis.line = elem
 ## SM U: Code to reproduce Figure 8
 
 
-```{r icc-bycor-fig8, echo=F,fig.width=4, fig.height=10,fig.cap="Figure 8 (reproduced). Child ICC by metric type and pipeline, when considering each corpus separately."}
+```{r icc-bycor-fig8, echo=F,fig.width=4, fig.height=4,fig.cap="Figure 8 (reproduced). Child ICC by metric type and pipeline, when considering each corpus separately."}
 
-facet_labels_chi = paste0("N chi=",chi_per_corpus)
+facet_labels_chi = paste0("N chi=",chiXcor)
 
 #and then we structure it so that it goes on the plot
 f_labels<-data.frame(levels(factor(df.icc.corpus$corpus)),facet_labels_chi=facet_labels_chi)
 
 colnames(f_labels)<-c("corpus","nchi")
 
-ggplot(df.icc.corpus, aes(y = icc_child_id, x = toupper(data_set))) +
+fig8 <- ggplot(df.icc.corpus, aes(y = icc_child_id, x = toupper(data_set))) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom(aes(colour = Type,shape = Type)) +  
   theme(legend.position = "top", axis.title.y=element_blank() ,axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +labs( y = "Child ICC",x="Pipeline") +   
@@ -628,6 +645,9 @@ ggplot(df.icc.corpus, aes(y = icc_child_id, x = toupper(data_set))) +
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
 
+fig8
+
+ggsave("fig8.png", plot = fig8, width = 4, height = 4, units = "in")
 
 ```
 
@@ -646,7 +666,7 @@ reg_anova_cor_icc=Anova(cor_icc)
 
 ```
 
-> The fact that we cannot infer reliability from one corpus based on another one was confirmed statistically: We checked whether Child ICC differed by talker types and pipelines across corpora by fitting a linear model with the formula $lm(Child_ICC ~ type * pipeline * corpus)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children;  pipeline LENA or ACLEW; and corpus the corpus ID. The model was overall significant (F(`round(reg_sum_cor_icc$fstatistic["dendf"],2)`) = `round(reg_sum_cor_icc$fstatistic["value"],2)`, p < .001).  We found an adjusted R-squared of `r round(reg_sum_cor_icc$adj.r.squared*100)`%, suggesting this model explained nearly half of the variance in Child ICC. A Type 3 ANOVA on this model revealed several significant effects and interactions, including a three-way interaction of type, pipeline, and corpus  (F(`r reg_anova_cor_icc["Type:data_set:corpus","Df"]`) = `r round(reg_anova_cor_icc["Type:data_set:corpus","F value"],1)`, p<.001); a two-way interaction of type and corpus  (F(`r reg_anova_cor_icc["data_set:corpus","Df"]`) = `r round(reg_anova_cor_icc["data_set:corpus","F value"],1)`, p<.001); and a main effect of corpus (F(`r reg_anova_cor_icc["corpus","Df"]`) = `r round(reg_anova_cor_icc["corpus","F value"],1)`, p<.001). 
+> The fact that we cannot infer reliability from one corpus based on another one was confirmed statistically: We checked whether Child ICC differed by talker types and pipelines across corpora by fitting a linear model with the formula $lm(Child_ICC ~ type * pipeline * corpus)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children;  pipeline LENA or ACLEW; and corpus the corpus ID. The model was overall significant (F(`r round(reg_sum_cor_icc$fstatistic["dendf"],2)`) = `r round(reg_sum_cor_icc$fstatistic["value"],2)`, p < .001).  We found an adjusted R-squared of `r round(reg_sum_cor_icc$adj.r.squared*100)`%, suggesting this model explained nearly half of the variance in Child ICC. A Type 3 ANOVA on this model revealed several significant effects and interactions, including a three-way interaction of type, pipeline, and corpus  (F(`r reg_anova_cor_icc["Type:data_set:corpus","Df"]`) = `r round(reg_anova_cor_icc["Type:data_set:corpus","F value"],1)`, p<.001); a two-way interaction of type and corpus  (F(`r reg_anova_cor_icc["data_set:corpus","Df"]`) = `r round(reg_anova_cor_icc["data_set:corpus","F value"],1)`, p<.001); and a main effect of corpus (F(`r reg_anova_cor_icc["corpus","Df"]`) = `r round(reg_anova_cor_icc["corpus","F value"],1)`, p<.001). 
 
 See Table below for results of the Type 3 ANOVA.
 
@@ -658,7 +678,7 @@ kable(round(reg_anova_cor_icc,2),caption="Type 3 ANOVA on model attempting to ex
 
 ## SM W: Code to reproduce Figure 9
 
-```{r icc-bycor-fig9, echo=F,fig.width=4, fig.height=10,fig.cap="Figure 9 (reproduced). Correlations in Child ICC across corpora."}
+```{r icc-bycor-fig9, echo=F,fig.width=4, fig.height=4,fig.cap="Figure 9 (reproduced). Correlations in Child ICC across corpora."}
 
 
 
@@ -678,12 +698,16 @@ r_X_corpus$cor=as.numeric(as.character(r_X_corpus$cor))
 
 #summary(r_X_corpus$cor) #mean correlation across corpora is zero!
 
-ggplot(r_X_corpus, aes(y = cor, x = corpusA)) +
+fig9 <- ggplot(r_X_corpus, aes(y = cor, x = corpusA)) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom() +  
   theme() +labs( y = "Correlation coefficient r",x="Corpus") + 
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
+
+fig9
+
+ggsave("fig9.png", plot = fig9, width = 4, height = 4, units = "in")
 ```
 
 ## SM X: Code to reproduce text in the Discussion section
@@ -703,14 +727,14 @@ northam[grep("Bolivia",location)]<-F
 northam[grep("France",location)]<-F
 northam[grep("England",location)]<-F
 
-bias_tab<-data.frame(cbind(chi_per_corpus, rec_per_corpus))
-bias_tab$chi_per_corpus<-bias_tab$chi_per_corpus/sum(bias_tab$chi_per_corpus)
-bias_tab$rec_per_corpus<-bias_tab$rec_per_corpus/sum(bias_tab$rec_per_corpus)
+bias_tab<-data.frame(cbind(chiXcor, recXcor))
+bias_tab$chiXcor<-bias_tab$chiXcor/sum(bias_tab$chiXcor)
+bias_tab$recXcor<-bias_tab$recXcor/sum(bias_tab$recXcor)
 
 
 ```
 
-> Our data draws mainly from urban (`r round(sum(bias_tab$rec_per_corpus[urban])*100)`% of recordings, `r round(sum(bias_tab$chi_per_corpus[urban])*100)`% of the children, `r round(sum(urban)/length(urban)*100)`% of the corpora), English-speaking settings (`r round(sum(bias_tab$rec_per_corpus[english])*100)`% of recordings, `r round(sum(bias_tab$chi_per_corpus[english])*100)`% of the children, `r round(sum(english)/length(english)*100)`% of the corpora), and almost exclusively from North America (`r round(sum(bias_tab$rec_per_corpus[northam])*100)`% of recordings, `r round(sum(bias_tab$chi_per_corpus[northam])*100)`% of the children, `r round(sum(northam)/length(northam)*100)`% of the corpora). 
+> Our data draws mainly from urban (`r round(sum(bias_tab$recXcor[urban])*100)`% of recordings, `r round(sum(bias_tab$chiXcor[urban])*100)`% of the children, `r round(sum(urban)/length(urban)*100)`% of the corpora), English-speaking settings (`r round(sum(bias_tab$recXcor[english])*100)`% of recordings, `r round(sum(bias_tab$chiXcor[english])*100)`% of the children, `r round(sum(english)/length(english)*100)`% of the corpora), and almost exclusively from North America (`r round(sum(bias_tab$recXcor[northam])*100)`% of recordings, `r round(sum(bias_tab$chiXcor[northam])*100)`% of the children, `r round(sum(northam)/length(northam)*100)`% of the corpora). 
 
 ## SM Y: Variability as a function of hardware
 

File diff suppressed because it is too large
+ 541 - 542
CODE/SM.log


BIN
CODE/SM.pdf


BIN
CODE/fig2.png


BIN
CODE/fig4.png


BIN
CODE/fig5.png


BIN
CODE/fig6.png


BIN
CODE/fig7.png


BIN
CODE/fig8.png


BIN
CODE/fig9.png


+ 17 - 18
CODE/sessionInfo.txt

@@ -23,21 +23,20 @@ other attached packages:
 [13] lme4_1.1-33           Matrix_1.5-4.1       
 
 loaded via a namespace (and not attached):
- [1] beeswarm_0.4.0    gtable_0.3.3      xfun_0.39         bslib_0.5.0      
- [5] insight_0.19.2    rstatix_0.7.2     lattice_0.21-8    vctrs_0.6.3      
- [9] tools_4.3.0       generics_0.1.3    parallel_4.3.0    tibble_3.2.1     
-[13] fansi_1.0.4       highr_0.10        pkgconfig_2.0.3   webshot_0.5.5    
-[17] lifecycle_1.0.3   farver_2.1.1      compiler_4.3.0    mnormt_2.1.1     
-[21] munsell_0.5.0     vipor_0.4.5       htmltools_0.5.5   sass_0.4.7       
-[25] yaml_2.3.7        pillar_1.9.0      nloptr_2.0.3      jquerylib_0.1.4  
-[29] MASS_7.3-60       cachem_1.0.8      boot_1.3-28.1     abind_1.4-5      
-[33] nlme_3.1-162      tidyselect_1.2.0  rvest_1.0.3       digest_0.6.33    
-[37] stringi_1.7.12    purrr_1.0.1       labeling_0.4.2    splines_4.3.0    
-[41] cowplot_1.1.1     fastmap_1.1.1     grid_4.3.0        colorspace_2.1-0 
-[45] cli_3.6.1         magrittr_2.0.3    utf8_1.2.3        broom_1.0.5      
-[49] withr_2.5.0       scales_1.2.1      backports_1.4.1   rmarkdown_2.23   
-[53] httr_1.4.6        gridExtra_2.3     ggsignif_0.6.4    evaluate_0.21    
-[57] knitr_1.43        viridisLite_0.4.2 mgcv_1.8-42       rlang_1.1.1      
-[61] Rcpp_1.0.10       glue_1.6.2        xml2_1.3.5        svglite_2.1.1    
-[65] rstudioapi_0.15.0 minqa_1.2.5       jsonlite_1.8.7    R6_2.5.1         
-[69] systemfonts_1.0.4
+ [1] beeswarm_0.4.0    gtable_0.3.3      xfun_0.39         insight_0.19.2   
+ [5] rstatix_0.7.2     lattice_0.21-8    vctrs_0.6.3       tools_4.3.0      
+ [9] generics_0.1.3    parallel_4.3.0    tibble_3.2.1      fansi_1.0.4      
+[13] highr_0.10        pkgconfig_2.0.3   webshot_0.5.5     lifecycle_1.0.3  
+[17] farver_2.1.1      compiler_4.3.0    textshaping_0.3.6 munsell_0.5.0    
+[21] mnormt_2.1.1      vipor_0.4.5       htmltools_0.5.5   yaml_2.3.7       
+[25] pillar_1.9.0      nloptr_2.0.3      MASS_7.3-60       boot_1.3-28.1    
+[29] abind_1.4-5       nlme_3.1-162      tidyselect_1.2.0  rvest_1.0.3      
+[33] digest_0.6.33     stringi_1.7.12    purrr_1.0.1       labeling_0.4.2   
+[37] splines_4.3.0     cowplot_1.1.1     fastmap_1.1.1     grid_4.3.0       
+[41] colorspace_2.1-0  cli_3.6.1         magrittr_2.0.3    utf8_1.2.3       
+[45] broom_1.0.5       withr_2.5.0       scales_1.2.1      backports_1.4.1  
+[49] rmarkdown_2.23    httr_1.4.6        gridExtra_2.3     ggsignif_0.6.4   
+[53] ragg_1.2.5        evaluate_0.21     knitr_1.43        viridisLite_0.4.2
+[57] mgcv_1.8-42       rlang_1.1.1       Rcpp_1.0.10       glue_1.6.2       
+[61] xml2_1.3.5        svglite_2.1.1     rstudioapi_0.15.0 minqa_1.2.5      
+[65] R6_2.5.1          systemfonts_1.0.4

+ 1 - 1
OUTPUT/corpus_description.csv

@@ -1,4 +1,4 @@
-"corpus"	"location"	"chi_per_corpus"	"rec_r_per_child"	"rec_per_corpus"	"dur_per_corpus"	"age_mean_per_corpus"	"age_r_per_corpus"
+"corpus"	"location"	"chiXcor"	"recRXchi"	"recXcor"	"durXcor"	"ageXcor"	"ageRXcor"
 "1"	"bergelson"	"Northeast US"	"44"	"10-12"	"522"	"14"	"11.2"	"6-17"
 "2"	"cougar"	"Northwest US"	"26"	"3-45"	"239"	"11.1"	"26.6"	"0-59"
 "3"	"fausey-trio"	"Western US"	"28"	"3-3"	"84"	"13.7"	"8.9"	"6-12"