--- title: "checking things" author: "AC" date: "1/16/2023" output: html_document --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ## Checking change Here, we compare our current results against those from [this commit](https://gin.g-node.org/LAAC-LSCP/RELIVAL/src/2297fdb2703440052c1d690569424e2400a792ca/OUTPUT) ### changes in icc & t for age ```{r eval=FALSE} df.icc.mixed<-read.csv("OUTPUT/df.icc.mixed.csv") df.icc.mixed_old<-read.csv("OUTPUT/df.icc.mixed_eff251299c.csv") #library(waldo) #compare(df.icc.mixed$icc_child_id_corpus, df.icc.mixed_old$icc_child_id_corpus) df.icc.mixed$id=paste(df.icc.mixed[,"data_set"],df.icc.mixed[,"metric"]) df.icc.mixed_old$id=paste(df.icc.mixed_old[,"data_set"],df.icc.mixed_old[,"metric"]) merge(df.icc.mixed_old,df.icc.mixed,by="id")->merged colnames(merged)=gsub(".x",".old",colnames(merged)) colnames(merged)=gsub(".y",".new",colnames(merged)) myrange=range(merged[,c("icc_child_id_corpus","icc_child_id")]) plot(merged[,"icc_child_id_corpus"],merged[,"icc_child_id"],xlim=myrange,ylim=myrange,xlab="new ICC",ylab=("old ICC")) lines(c(0,1),c(0,1),lty=2) #before, we used to have some cases with 800 obs myrange=range(merged[,c("nobs.old","nobs.new")]) plot(merged[,"nobs.old"],merged[,"nobs.new"],xlim=myrange,ylim=myrange,xlab="nobs old",ylab=("nobs new")) # the difference is not due to these myrange=range(merged[,c("icc_child_id_corpus","icc_child_id")]) plot(merged[merged$nobs.new>1500,"icc_child_id_corpus"],merged[merged$nobs.new>1500,"icc_child_id"],xlim=myrange,ylim=myrange,xlab="new ICC",ylab=("old ICC")) lines(c(0,1),c(0,1),lty=2) #biggest change merged$icc_change=merged$icc_adjusted.new-merged$icc_adjusted.old merged[order(merged$icc_change,decreasing = T),c("data_set.new","metric.new","icc_change")][1:10,] merged[merged$metric.new=="voc_chi_ph",c("age_t.old","age_t.new")] myrange=range(merged[,c("iqr.old","iqr.new")]) plot(merged$iqr.new~merged$iqr.old,ylim=myrange,xlim=myrange) ``` ### changes in metrics files Since age changes too, it seems likely something has gone deeply wrong either with our metadata, or with our metrics ```{r eval=FALSE} old<-read.csv("DATA/aclew_metrics_2297fdb270.csv") old$id=paste(old$experiment,old$session_id,old$child_id) new<-read.csv("DATA/aclew_metrics.csv") new$id=paste(new$experiment,new$session_id,new$child_id) dim(old) dim(new) #hmm do we remember why we have more lines rec ID? new[!(new$id %in% old$id),"id"] #it's a bunch of fausey trios summary(old) summary(new) merge(old,new[,-c(1:3)],by="id")->merged colnames(merged)=gsub(".x",".old",colnames(merged)) colnames(merged)=gsub(".y",".new",colnames(merged)) merged$voc_chi_ph_change = merged$voc_chi_ph.new - merged$voc_chi_ph.old summary(merged$voc_chi_ph_change) #this is just rounding differences plot(merged$voc_chi_ph.new ~ merged$voc_chi_ph.old) plot(merged$age.new ~ merged$age.old) #no differences here either ``` we have a few more lines (about 60, out of 1700) in the new version of metrics, but the lines that are in common between new and old are basically identical, based on inspection of voc_chi_ph and age so this strongly suggests we have a bug in our code, and the drop in ICC and age significance comes from our change in functions