123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990 |
- ---
- title: "checking things"
- author: "AC"
- date: "1/16/2023"
- output: html_document
- ---
- ```{r setup, include=FALSE}
- knitr::opts_chunk$set(echo = TRUE)
- ```
- ## Checking change
- Here, we compare our current results against those from [this commit](https://gin.g-node.org/LAAC-LSCP/RELIVAL/src/2297fdb2703440052c1d690569424e2400a792ca/OUTPUT)
- ### changes in icc & t for age
- ```{r eval=FALSE}
- df.icc.mixed<-read.csv("OUTPUT/df.icc.mixed.csv")
- df.icc.mixed_old<-read.csv("OUTPUT/df.icc.mixed_eff251299c.csv")
- #library(waldo)
- #compare(df.icc.mixed$icc_child_id_corpus, df.icc.mixed_old$icc_child_id_corpus)
- df.icc.mixed$id=paste(df.icc.mixed[,"data_set"],df.icc.mixed[,"metric"])
- df.icc.mixed_old$id=paste(df.icc.mixed_old[,"data_set"],df.icc.mixed_old[,"metric"])
- merge(df.icc.mixed_old,df.icc.mixed,by="id")->merged
- colnames(merged)=gsub(".x",".old",colnames(merged))
- colnames(merged)=gsub(".y",".new",colnames(merged))
- myrange=range(merged[,c("icc_child_id_corpus","icc_child_id")])
- plot(merged[,"icc_child_id_corpus"],merged[,"icc_child_id"],xlim=myrange,ylim=myrange,xlab="new ICC",ylab=("old ICC"))
- lines(c(0,1),c(0,1),lty=2)
- #before, we used to have some cases with 800 obs
- myrange=range(merged[,c("nobs.old","nobs.new")])
- plot(merged[,"nobs.old"],merged[,"nobs.new"],xlim=myrange,ylim=myrange,xlab="nobs old",ylab=("nobs new"))
- # the difference is not due to these
- myrange=range(merged[,c("icc_child_id_corpus","icc_child_id")])
- plot(merged[merged$nobs.new>1500,"icc_child_id_corpus"],merged[merged$nobs.new>1500,"icc_child_id"],xlim=myrange,ylim=myrange,xlab="new ICC",ylab=("old ICC"))
- lines(c(0,1),c(0,1),lty=2)
- #biggest change
- merged$icc_change=merged$icc_adjusted.new-merged$icc_adjusted.old
- merged[order(merged$icc_change,decreasing = T),c("data_set.new","metric.new","icc_change")][1:10,]
- merged[merged$metric.new=="voc_chi_ph",c("age_t.old","age_t.new")]
- myrange=range(merged[,c("iqr.old","iqr.new")])
- plot(merged$iqr.new~merged$iqr.old,ylim=myrange,xlim=myrange)
- ```
- ### changes in metrics files
- Since age changes too, it seems likely something has gone deeply wrong either with our metadata, or with our metrics
- ```{r eval=FALSE}
- old<-read.csv("DATA/aclew_metrics_2297fdb270.csv")
- old$id=paste(old$experiment,old$session_id,old$child_id)
- new<-read.csv("DATA/aclew_metrics.csv")
- new$id=paste(new$experiment,new$session_id,new$child_id)
- dim(old)
- dim(new) #hmm do we remember why we have more lines rec ID?
- new[!(new$id %in% old$id),"id"] #it's a bunch of fausey trios
- summary(old)
- summary(new)
- merge(old,new[,-c(1:3)],by="id")->merged
- colnames(merged)=gsub(".x",".old",colnames(merged))
- colnames(merged)=gsub(".y",".new",colnames(merged))
- merged$voc_chi_ph_change = merged$voc_chi_ph.new - merged$voc_chi_ph.old
- summary(merged$voc_chi_ph_change) #this is just rounding differences
- plot(merged$voc_chi_ph.new ~ merged$voc_chi_ph.old)
- plot(merged$age.new ~ merged$age.old)
- #no differences here either
- ```
- we have a few more lines (about 60, out of 1700) in the new version of metrics, but the lines that are in common between new and old are basically identical, based on inspection of voc_chi_ph and age
- so this strongly suggests we have a bug in our code, and the drop in ICC and age significance comes from our change in functions
|