Browse Source

more bug fixes but still need to double check all numbers

alecristia 6 months ago
13 changed files with 614 additions and 592 deletions
  1. BIN
  2. 55 31
  3. 541 542
  4. BIN
  5. BIN
  6. BIN
  7. BIN
  8. BIN
  9. BIN
  10. BIN
  11. BIN
  12. 17 18
  13. 1 1


+ 55 - 31

@@ -1,13 +1,13 @@
 title: Supplementary Materials to Establishing the reliability and validity of measures extracted from long-form recordings
+  pdf_document:
+    toc: yes
+    toc_depth: 3
     toc: yes
     toc_depth: '3'
     df_print: paged
-  pdf_document:
-    toc: yes
-    toc_depth: 3
 ```{r setup, include=FALSE, eval=TRUE}
@@ -157,30 +157,30 @@ Third, and perhaps most relevant, we looked for references that evaluated the ps
 ```{r tab2}
-chi_per_corpus= aggregate(data = mydat_aclew, child_id ~ experiment, function(child_id) length(unique(child_id)))[,2]
+chiXcor= aggregate(data = mydat_aclew, child_id ~ experiment, function(child_id) length(unique(child_id)))[,2]
-rec_per_corpus = aggregate(data = mydat_aclew, session_id ~ experiment, function(session_id) length(unique(session_id)))[,2]
+recXcor = aggregate(data = mydat_aclew, session_id ~ experiment, function(session_id) length(unique(session_id)))[,2]
 rec_per_child = setNames(aggregate(data = mydat_aclew, session_id ~ experiment*child_id, function(session_id) length(unique(session_id))), c('experiment', 'Chi', 'No_rec'))
 min_rec_per_child = aggregate(data = rec_per_child, No_rec ~ experiment, min)[,2]
 max_rec_per_child = aggregate(data = rec_per_child, No_rec ~ experiment, max)[,2]
-rec_r_per_child = paste(min_rec_per_child,max_rec_per_child,sep="-")
+recRXchi = paste(min_rec_per_child,max_rec_per_child,sep="-")
-dur_per_corpus = aggregate(data = mydat_aclew, duration_vtc ~ experiment, function(duration_vtc) round(mean(duration_vtc)/3.6e+6,1))[,2]
+durXcor = aggregate(data = mydat_aclew, duration_vtc ~ experiment, function(duration_vtc) round(mean(duration_vtc)/3.6e+6,1))[,2]
-age_mean_per_corpus = aggregate(data = mydat_aclew, age ~ experiment, function(age) round(mean(age),1))[,2]
+ageXcor = aggregate(data = mydat_aclew, age ~ experiment, function(age) round(mean(age),1))[,2]
 age_min_per_corpus = aggregate(data = mydat_aclew, age ~ experiment, function(age) min(age))[,2]
 age_max_per_corpus = aggregate(data = mydat_aclew, age ~ experiment, function(age) max(age))[,2]
-age_r_per_corpus = paste(age_min_per_corpus,age_max_per_corpus,sep="-")
+ageRXcor = paste(age_min_per_corpus,age_max_per_corpus,sep="-")
 corpus=c("bergelson", "cougar", "fausey-trio", "lucid","lyon", "quechua",  "warlaumont", "winnipeg")
 location=c("Northeast US", "Northwest US", "Western US", "Northwest England", "Central France", "Highlands Bolivia", "Western US", "Western Canada")
-corpus_description=cbind(corpus,location,chi_per_corpus, rec_r_per_child, rec_per_corpus, dur_per_corpus, age_mean_per_corpus,age_r_per_corpus)
+corpus_description=cbind(corpus,location,chiXcor, recRXchi, recXcor, durXcor, ageXcor,ageRXcor)
 write.table(corpus_description, "../output/corpus_description.csv", sep='\t')
@@ -195,7 +195,7 @@ nrecs=length(levels(mydat_aclew$session_id))
 ## SM D: Code to reproduce Fig. 2
-```{r icc-examples-fig2, fig.width=4, fig.height=3,fig.cap="Figure 2 (reproduced). Scatterplots for two selected variables. The left one has relatively low ICCs; the right one has relatively higher ICCs."}
+```{r icc-examples-fig2,  fig.width=6, fig.height=4.5,fig.cap="Figure 2 (reproduced). Scatterplots for two selected variables. The left one has relatively low ICCs; the right one has relatively higher ICCs."}
 # figure of bad ICC: lena     used to be: avg_voc_dur_chi, now is: peak_wc_adu_ph; good ICC: lena used to be: voc_och_ph, now is: voc_dur_och_ph
 # remove missing data points altogether
@@ -258,11 +258,12 @@ panel.background = element_blank(), axis.line = element_line(colour = "black"))
   geom_abline(intercept = 0, slope = 1)
-ggarrange(bad, good,
+fig2 = ggarrange(bad, good,
           ncol = 2, nrow = 1, common.legend = TRUE, vjust = 1.5, hjust=0,
           font.label = list(size = 20))  + labs(color= "Corpus")  +  theme(text = element_text(size = 20))
+ggsave("fig2.png", plot = fig2, width = 6, height = 4.5, units = "in")
 ## SM E: Code to reproduce text at the beginning of the "Setting the stage" section
@@ -318,7 +319,7 @@ cor_t=t.test(rval_tab$m ~ rval_tab$data_set)
-> To see whether correlations in this analysis differed by talker types and pipelines, we fit a linear model with the formula $lm(cor ~ type * pipeline)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children; and pipeline LENA or ACLEW. The model was overall significant (F(`round(reg_sum_cor$fstatistic["dendf"],2)`) = `round(reg_sum_cor$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum_cor$adj.r.squared*100)`%, suggesting this model did not explain a great deal of variance in correlation coefficients. A Type 3 ANOVA on this model revealed a significant effect of pipeline (F = `r round(reg_anova_cor["data_set","F value"],2)`, p = `r round(reg_anova_cor["data_set","Pr(>F)"],2)`), due to higher correlations for ACLEW (`r r_msds["aclew","x"]`) than for LENA metrics (m = `r r_msds["lena","x"]`). 
+> To see whether correlations in this analysis differed by talker types and pipelines, we fit a linear model with the formula $lm(cor ~ type * pipeline)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children; and pipeline LENA or ACLEW. The model was overall significant (F(`r round(reg_sum_cor$fstatistic["dendf"],2)`) = `r round(reg_sum_cor$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum_cor$adj.r.squared*100)`%, suggesting this model did not explain a great deal of variance in correlation coefficients. A Type 3 ANOVA on this model revealed a significant effect of pipeline (F = `r round(reg_anova_cor["data_set","F value"],2)`, p = `r round(reg_anova_cor["data_set","Pr(>F)"],2)`), due to higher correlations for ACLEW (`r r_msds["aclew","x"]`) than for LENA metrics (m = `r r_msds["lena","x"]`). 
 See table below for results of the Type 3 ANOVA.
@@ -333,11 +334,16 @@ kable(round(reg_anova_cor,2),caption="Type 3 ANOVA on model attempting to explai
 ```{r r-fig4, echo=F,fig.width=4, fig.height=3,fig.cap="Figure 4 (reproduced). Violin plot reflecting the distribution of correlations."}
-ggplot(rval_tab, aes(y = m, x = toupper(data_set))) +
+fig4 <- ggplot(rval_tab, aes(y = m, x = toupper(data_set))) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom(aes(colour = Type,shape = Type)) +  
-  theme() +labs( y = "r",x="Pipeline")
+  theme() +labs( y = "r",x="Pipeline") + 
+  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
+panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
+ggsave("fig4.png", plot = fig4, width = 4, height = 3, units = "in")
@@ -446,13 +452,17 @@ panel.background = element_blank(), legend.key=element_blank(), axis.line = elem
 ```{r icc-allexp-fig5, echo=F,fig.width=4, fig.height=3,fig.cap="Figure 5 (reproduced). Violin plot reflecting the distribution of Child ICC."}
-ggplot(df.icc.mixed, aes(y = icc_child_id, x = toupper(data_set))) +
+fig5 <- ggplot(df.icc.mixed, aes(y = icc_child_id, x = toupper(data_set))) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom(aes(colour = Type,shape = Type)) +  
   labs( y = "Child ICC",x="Pipeline") +  theme(text = element_text(size = 20)) + 
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
+ggsave("fig5.png", plot = fig5, width = 4, height = 3, units = "in")
@@ -478,7 +488,7 @@ rownames(msds_p)<-msds_p$Group.1
-> Next, we explored how similar Child ICCs were across different talker types and pipelines. We fit a linear model with the formula $lm(icc\_child\_id ~ type * pipeline)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children; and pipeline LENA or ACLEW. The model was overall significant (F(`round(reg_sum$fstatistic["dendf"],2)`) = `round(reg_sum$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum$adj.r.squared*100)`%, suggesting much of the variance across Child ICCs was explained by these factors. A Type 3 ANOVA on this model revealed type was a signficant predictor (F(`r reg_anova["Type","Df"]`) = `r round(reg_anova["Type","F value"],1)`, p<.001), as was pipeline (F(`r reg_anova["data_set","Df"]`) = `r round(reg_anova["data_set","F value"],1)`, p = `r round(reg_anova["data_set","Pr(>F)"],3)`); the interaction between type and pipeline was not significant. The main effect of type emerged because output metrics tended to have higher Child ICC (`r msds["Output","x"]`)  than those associated to adults in general (`r msds["Adults","x"]`), females (`r msds["Female","x"]`), and males (`r msds["Male","x"]`); whereas those associated with other children had even higher Child ICCs (`r msds["Other children","x"]`). The main effect of pipeline arose because of slightly higher Child ICCs for the ACLEW metrics (`r msds_p["aclew","x"]`) than for LENA metrics (`r msds_p["lena","x"]`). 
+> Next, we explored how similar Child ICCs were across different talker types and pipelines. We fit a linear model with the formula $lm(icc\_child\_id ~ type * pipeline)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children; and pipeline LENA or ACLEW. The model was overall significant (F(`r round(reg_sum$fstatistic["dendf"],2)`) = `r round(reg_sum$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum$adj.r.squared*100)`%, suggesting much of the variance across Child ICCs was explained by these factors. A Type 3 ANOVA on this model revealed type was a signficant predictor (F(`r reg_anova["Type","Df"]`) = `r round(reg_anova["Type","F value"],1)`, p<.001), as was pipeline (F(`r reg_anova["data_set","Df"]`) = `r round(reg_anova["data_set","F value"],1)`, p = `r round(reg_anova["data_set","Pr(>F)"],3)`); the interaction between type and pipeline was not significant. The main effect of type emerged because output metrics tended to have higher Child ICC (`r msds["Output","x"]`)  than those associated to adults in general (`r msds["Adults","x"]`), females (`r msds["Female","x"]`), and males (`r msds["Male","x"]`); whereas those associated with other children had even higher Child ICCs (`r msds["Other children","x"]`). The main effect of pipeline arose because of slightly higher Child ICCs for the ACLEW metrics (`r msds_p["aclew","x"]`) than for LENA metrics (`r msds_p["lena","x"]`). 
 ## SM O: Code to reproduce Table 4
@@ -528,7 +538,7 @@ f_labels<-data.frame(age_bin=levels(df.icc.age$age_bin),facet_labels_chi=facet_l
-ggplot(df.icc.age, aes(y = icc_child_id, x = toupper(data_set))) +
+fig6 <- ggplot(df.icc.age, aes(y = icc_child_id, x = toupper(data_set))) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom(aes(colour = Type,shape = Type)) +  
   theme(legend.position="none") +labs( y = "r",x="Pipeline") + facet_wrap(~age_bin, ncol = 3) +
@@ -537,6 +547,9 @@ ggplot(df.icc.age, aes(y = icc_child_id, x = toupper(data_set))) +
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
+ggsave("fig6.png", plot = fig6, width = 6, height = 10, units = "in")
@@ -556,7 +569,7 @@ reg_anova_age_icc=Anova(age_icc)
-> To interrogate these results statistically, and assess whether Child ICCs tended to be higher or lower in certain age bins, we fit a linear model with the formula $lm(Child_ICC ~ type * pipeline * age_bin)$. The model was overall significant (F(`round(reg_sum_age_icc$fstatistic["dendf"],2)`) = `round(reg_sum_age_icc$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum_age_icc$adj.r.squared*100)`%, suggesting this model explained about a third of the variance in Child ICC.  A Type 3 ANOVA on this model revealed type was a signficant predictor (F(`r reg_anova["Type","Df"]`) = `r round(reg_anova["Type","F value"],1)`, p<.001), whereas as was pipeline (F(`r reg_anova["data_set","Df"]`) = `r round(reg_anova["data_set","F value"],1)`, p = `r round(reg_anova["data_set","Pr(>F)"],3)`); the interaction between type and pipeline was not significant. 
+> To interrogate these results statistically, and assess whether Child ICCs tended to be higher or lower in certain age bins, we fit a linear model with the formula $lm(Child_ICC ~ type * pipeline * age_bin)$. The model was overall significant (F(`r round(reg_sum_age_icc$fstatistic["dendf"],2)`) = `r round(reg_sum_age_icc$fstatistic["value"],2)`, p < .001). We found an adjusted R-squared of `r round(reg_sum_age_icc$adj.r.squared*100)`%, suggesting this model explained about a third of the variance in Child ICC.  A Type 3 ANOVA on this model revealed type was a signficant predictor (F(`r reg_anova["Type","Df"]`) = `r round(reg_anova["Type","F value"],1)`, p<.001), whereas as was pipeline (F(`r reg_anova["data_set","Df"]`) = `r round(reg_anova["data_set","F value"],1)`, p = `r round(reg_anova["data_set","Pr(>F)"],3)`); the interaction between type and pipeline was not significant. 
 See table below for results of the Type 3 ANOVA.
@@ -591,12 +604,16 @@ r_X_age$ageA=factor(r_X_age$ageA,levels=age_levels)
 #summary(r_X_age$cor) #mean correlation across corpora is zero!
-ggplot(r_X_age, aes(y = cor, x = ageA)) +
+fig7 <- ggplot(r_X_age, aes(y = cor, x = ageA)) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom() +
   theme() +labs( y = "Correlation coefficient r",x="Age") + 
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
+ggsave("fig7.png", plot = fig7, width = 4, height = 4, units = "in")
@@ -610,16 +627,16 @@ panel.background = element_blank(), legend.key=element_blank(), axis.line = elem
 ## SM U: Code to reproduce Figure 8
-```{r icc-bycor-fig8, echo=F,fig.width=4, fig.height=10,fig.cap="Figure 8 (reproduced). Child ICC by metric type and pipeline, when considering each corpus separately."}
+```{r icc-bycor-fig8, echo=F,fig.width=4, fig.height=4,fig.cap="Figure 8 (reproduced). Child ICC by metric type and pipeline, when considering each corpus separately."}
-facet_labels_chi = paste0("N chi=",chi_per_corpus)
+facet_labels_chi = paste0("N chi=",chiXcor)
 #and then we structure it so that it goes on the plot
-ggplot(df.icc.corpus, aes(y = icc_child_id, x = toupper(data_set))) +
+fig8 <- ggplot(df.icc.corpus, aes(y = icc_child_id, x = toupper(data_set))) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom(aes(colour = Type,shape = Type)) +  
   theme(legend.position = "top", axis.title.y=element_blank() ,axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +labs( y = "Child ICC",x="Pipeline") +   
@@ -628,6 +645,9 @@ ggplot(df.icc.corpus, aes(y = icc_child_id, x = toupper(data_set))) +
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
+ggsave("fig8.png", plot = fig8, width = 4, height = 4, units = "in")
@@ -646,7 +666,7 @@ reg_anova_cor_icc=Anova(cor_icc)
-> The fact that we cannot infer reliability from one corpus based on another one was confirmed statistically: We checked whether Child ICC differed by talker types and pipelines across corpora by fitting a linear model with the formula $lm(Child_ICC ~ type * pipeline * corpus)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children;  pipeline LENA or ACLEW; and corpus the corpus ID. The model was overall significant (F(`round(reg_sum_cor_icc$fstatistic["dendf"],2)`) = `round(reg_sum_cor_icc$fstatistic["value"],2)`, p < .001).  We found an adjusted R-squared of `r round(reg_sum_cor_icc$adj.r.squared*100)`%, suggesting this model explained nearly half of the variance in Child ICC. A Type 3 ANOVA on this model revealed several significant effects and interactions, including a three-way interaction of type, pipeline, and corpus  (F(`r reg_anova_cor_icc["Type:data_set:corpus","Df"]`) = `r round(reg_anova_cor_icc["Type:data_set:corpus","F value"],1)`, p<.001); a two-way interaction of type and corpus  (F(`r reg_anova_cor_icc["data_set:corpus","Df"]`) = `r round(reg_anova_cor_icc["data_set:corpus","F value"],1)`, p<.001); and a main effect of corpus (F(`r reg_anova_cor_icc["corpus","Df"]`) = `r round(reg_anova_cor_icc["corpus","F value"],1)`, p<.001). 
+> The fact that we cannot infer reliability from one corpus based on another one was confirmed statistically: We checked whether Child ICC differed by talker types and pipelines across corpora by fitting a linear model with the formula $lm(Child_ICC ~ type * pipeline * corpus)$, where type indicates whether the measure pertained to the key child, (female/male) adults, other children;  pipeline LENA or ACLEW; and corpus the corpus ID. The model was overall significant (F(`r round(reg_sum_cor_icc$fstatistic["dendf"],2)`) = `r round(reg_sum_cor_icc$fstatistic["value"],2)`, p < .001).  We found an adjusted R-squared of `r round(reg_sum_cor_icc$adj.r.squared*100)`%, suggesting this model explained nearly half of the variance in Child ICC. A Type 3 ANOVA on this model revealed several significant effects and interactions, including a three-way interaction of type, pipeline, and corpus  (F(`r reg_anova_cor_icc["Type:data_set:corpus","Df"]`) = `r round(reg_anova_cor_icc["Type:data_set:corpus","F value"],1)`, p<.001); a two-way interaction of type and corpus  (F(`r reg_anova_cor_icc["data_set:corpus","Df"]`) = `r round(reg_anova_cor_icc["data_set:corpus","F value"],1)`, p<.001); and a main effect of corpus (F(`r reg_anova_cor_icc["corpus","Df"]`) = `r round(reg_anova_cor_icc["corpus","F value"],1)`, p<.001). 
 See Table below for results of the Type 3 ANOVA.
@@ -658,7 +678,7 @@ kable(round(reg_anova_cor_icc,2),caption="Type 3 ANOVA on model attempting to ex
 ## SM W: Code to reproduce Figure 9
-```{r icc-bycor-fig9, echo=F,fig.width=4, fig.height=10,fig.cap="Figure 9 (reproduced). Correlations in Child ICC across corpora."}
+```{r icc-bycor-fig9, echo=F,fig.width=4, fig.height=4,fig.cap="Figure 9 (reproduced). Correlations in Child ICC across corpora."}
@@ -678,12 +698,16 @@ r_X_corpus$cor=as.numeric(as.character(r_X_corpus$cor))
 #summary(r_X_corpus$cor) #mean correlation across corpora is zero!
-ggplot(r_X_corpus, aes(y = cor, x = corpusA)) +
+fig9 <- ggplot(r_X_corpus, aes(y = cor, x = corpusA)) +
   geom_violin(alpha = 0.5) +
   geom_quasirandom() +  
   theme() +labs( y = "Correlation coefficient r",x="Corpus") + 
   theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
 panel.background = element_blank(), legend.key=element_blank(), axis.line = element_line(colour = "black")) 
+ggsave("fig9.png", plot = fig9, width = 4, height = 4, units = "in")
 ## SM X: Code to reproduce text in the Discussion section
@@ -703,14 +727,14 @@ northam[grep("Bolivia",location)]<-F
-bias_tab<-data.frame(cbind(chi_per_corpus, rec_per_corpus))
+bias_tab<-data.frame(cbind(chiXcor, recXcor))
-> Our data draws mainly from urban (`r round(sum(bias_tab$rec_per_corpus[urban])*100)`% of recordings, `r round(sum(bias_tab$chi_per_corpus[urban])*100)`% of the children, `r round(sum(urban)/length(urban)*100)`% of the corpora), English-speaking settings (`r round(sum(bias_tab$rec_per_corpus[english])*100)`% of recordings, `r round(sum(bias_tab$chi_per_corpus[english])*100)`% of the children, `r round(sum(english)/length(english)*100)`% of the corpora), and almost exclusively from North America (`r round(sum(bias_tab$rec_per_corpus[northam])*100)`% of recordings, `r round(sum(bias_tab$chi_per_corpus[northam])*100)`% of the children, `r round(sum(northam)/length(northam)*100)`% of the corpora). 
+> Our data draws mainly from urban (`r round(sum(bias_tab$recXcor[urban])*100)`% of recordings, `r round(sum(bias_tab$chiXcor[urban])*100)`% of the children, `r round(sum(urban)/length(urban)*100)`% of the corpora), English-speaking settings (`r round(sum(bias_tab$recXcor[english])*100)`% of recordings, `r round(sum(bias_tab$chiXcor[english])*100)`% of the children, `r round(sum(english)/length(english)*100)`% of the corpora), and almost exclusively from North America (`r round(sum(bias_tab$recXcor[northam])*100)`% of recordings, `r round(sum(bias_tab$chiXcor[northam])*100)`% of the children, `r round(sum(northam)/length(northam)*100)`% of the corpora). 
 ## SM Y: Variability as a function of hardware

File diff suppressed because it is too large
+ 541 - 542









+ 17 - 18

@@ -23,21 +23,20 @@ other attached packages:
 [13] lme4_1.1-33           Matrix_1.5-4.1       
 loaded via a namespace (and not attached):
- [1] beeswarm_0.4.0    gtable_0.3.3      xfun_0.39         bslib_0.5.0      
- [5] insight_0.19.2    rstatix_0.7.2     lattice_0.21-8    vctrs_0.6.3      
- [9] tools_4.3.0       generics_0.1.3    parallel_4.3.0    tibble_3.2.1     
-[13] fansi_1.0.4       highr_0.10        pkgconfig_2.0.3   webshot_0.5.5    
-[17] lifecycle_1.0.3   farver_2.1.1      compiler_4.3.0    mnormt_2.1.1     
-[21] munsell_0.5.0     vipor_0.4.5       htmltools_0.5.5   sass_0.4.7       
-[25] yaml_2.3.7        pillar_1.9.0      nloptr_2.0.3      jquerylib_0.1.4  
-[29] MASS_7.3-60       cachem_1.0.8      boot_1.3-28.1     abind_1.4-5      
-[33] nlme_3.1-162      tidyselect_1.2.0  rvest_1.0.3       digest_0.6.33    
-[37] stringi_1.7.12    purrr_1.0.1       labeling_0.4.2    splines_4.3.0    
-[41] cowplot_1.1.1     fastmap_1.1.1     grid_4.3.0        colorspace_2.1-0 
-[45] cli_3.6.1         magrittr_2.0.3    utf8_1.2.3        broom_1.0.5      
-[49] withr_2.5.0       scales_1.2.1      backports_1.4.1   rmarkdown_2.23   
-[53] httr_1.4.6        gridExtra_2.3     ggsignif_0.6.4    evaluate_0.21    
-[57] knitr_1.43        viridisLite_0.4.2 mgcv_1.8-42       rlang_1.1.1      
-[61] Rcpp_1.0.10       glue_1.6.2        xml2_1.3.5        svglite_2.1.1    
-[65] rstudioapi_0.15.0 minqa_1.2.5       jsonlite_1.8.7    R6_2.5.1         
-[69] systemfonts_1.0.4
+ [1] beeswarm_0.4.0    gtable_0.3.3      xfun_0.39         insight_0.19.2   
+ [5] rstatix_0.7.2     lattice_0.21-8    vctrs_0.6.3       tools_4.3.0      
+ [9] generics_0.1.3    parallel_4.3.0    tibble_3.2.1      fansi_1.0.4      
+[13] highr_0.10        pkgconfig_2.0.3   webshot_0.5.5     lifecycle_1.0.3  
+[17] farver_2.1.1      compiler_4.3.0    textshaping_0.3.6 munsell_0.5.0    
+[21] mnormt_2.1.1      vipor_0.4.5       htmltools_0.5.5   yaml_2.3.7       
+[25] pillar_1.9.0      nloptr_2.0.3      MASS_7.3-60       boot_1.3-28.1    
+[29] abind_1.4-5       nlme_3.1-162      tidyselect_1.2.0  rvest_1.0.3      
+[33] digest_0.6.33     stringi_1.7.12    purrr_1.0.1       labeling_0.4.2   
+[37] splines_4.3.0     cowplot_1.1.1     fastmap_1.1.1     grid_4.3.0       
+[41] colorspace_2.1-0  cli_3.6.1         magrittr_2.0.3    utf8_1.2.3       
+[45] broom_1.0.5       withr_2.5.0       scales_1.2.1      backports_1.4.1  
+[49] rmarkdown_2.23    httr_1.4.6        gridExtra_2.3     ggsignif_0.6.4   
+[53] ragg_1.2.5        evaluate_0.21     knitr_1.43        viridisLite_0.4.2
+[57] mgcv_1.8-42       rlang_1.1.1       Rcpp_1.0.10       glue_1.6.2       
+[61] xml2_1.3.5        svglite_2.1.1     rstudioapi_0.15.0 minqa_1.2.5      
+[65] R6_2.5.1          systemfonts_1.0.4

+ 1 - 1

@@ -1,4 +1,4 @@
-"corpus"	"location"	"chi_per_corpus"	"rec_r_per_child"	"rec_per_corpus"	"dur_per_corpus"	"age_mean_per_corpus"	"age_r_per_corpus"
+"corpus"	"location"	"chiXcor"	"recRXchi"	"recXcor"	"durXcor"	"ageXcor"	"ageRXcor"
 "1"	"bergelson"	"Northeast US"	"44"	"10-12"	"522"	"14"	"11.2"	"6-17"
 "2"	"cougar"	"Northwest US"	"26"	"3-45"	"239"	"11.1"	"26.6"	"0-59"
 "3"	"fausey-trio"	"Western US"	"28"	"3-3"	"84"	"13.7"	"8.9"	"6-12"