checking_version_diff.Rmd 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. ---
  2. title: "checking things"
  3. author: "AC"
  4. date: "1/16/2023"
  5. output: html_document
  6. ---
  7. ```{r setup, include=FALSE}
  8. knitr::opts_chunk$set(echo = TRUE)
  9. ```
  10. ## Checking change
  11. Here, we compare our current results against those from [this commit](https://gin.g-node.org/LAAC-LSCP/RELIVAL/src/2297fdb2703440052c1d690569424e2400a792ca/OUTPUT)
  12. ### changes in icc & t for age
  13. ```{r eval=FALSE}
  14. df.icc.mixed<-read.csv("OUTPUT/df.icc.mixed.csv")
  15. df.icc.mixed_old<-read.csv("OUTPUT/df.icc.mixed_eff251299c.csv")
  16. #library(waldo)
  17. #compare(df.icc.mixed$icc_child_id_corpus, df.icc.mixed_old$icc_child_id_corpus)
  18. df.icc.mixed$id=paste(df.icc.mixed[,"data_set"],df.icc.mixed[,"metric"])
  19. df.icc.mixed_old$id=paste(df.icc.mixed_old[,"data_set"],df.icc.mixed_old[,"metric"])
  20. merge(df.icc.mixed_old,df.icc.mixed,by="id")->merged
  21. colnames(merged)=gsub(".x",".old",colnames(merged))
  22. colnames(merged)=gsub(".y",".new",colnames(merged))
  23. myrange=range(merged[,c("icc_child_id_corpus","icc_child_id")])
  24. plot(merged[,"icc_child_id_corpus"],merged[,"icc_child_id"],xlim=myrange,ylim=myrange,xlab="new ICC",ylab=("old ICC"))
  25. lines(c(0,1),c(0,1),lty=2)
  26. #before, we used to have some cases with 800 obs
  27. myrange=range(merged[,c("nobs.old","nobs.new")])
  28. plot(merged[,"nobs.old"],merged[,"nobs.new"],xlim=myrange,ylim=myrange,xlab="nobs old",ylab=("nobs new"))
  29. # the difference is not due to these
  30. myrange=range(merged[,c("icc_child_id_corpus","icc_child_id")])
  31. plot(merged[merged$nobs.new>1500,"icc_child_id_corpus"],merged[merged$nobs.new>1500,"icc_child_id"],xlim=myrange,ylim=myrange,xlab="new ICC",ylab=("old ICC"))
  32. lines(c(0,1),c(0,1),lty=2)
  33. #biggest change
  34. merged$icc_change=merged$icc_adjusted.new-merged$icc_adjusted.old
  35. merged[order(merged$icc_change,decreasing = T),c("data_set.new","metric.new","icc_change")][1:10,]
  36. merged[merged$metric.new=="voc_chi_ph",c("age_t.old","age_t.new")]
  37. myrange=range(merged[,c("iqr.old","iqr.new")])
  38. plot(merged$iqr.new~merged$iqr.old,ylim=myrange,xlim=myrange)
  39. ```
  40. ### changes in metrics files
  41. Since age changes too, it seems likely something has gone deeply wrong either with our metadata, or with our metrics
  42. ```{r eval=FALSE}
  43. old<-read.csv("DATA/aclew_metrics_2297fdb270.csv")
  44. old$id=paste(old$experiment,old$session_id,old$child_id)
  45. new<-read.csv("DATA/aclew_metrics.csv")
  46. new$id=paste(new$experiment,new$session_id,new$child_id)
  47. dim(old)
  48. dim(new) #hmm do we remember why we have more lines rec ID?
  49. new[!(new$id %in% old$id),"id"] #it's a bunch of fausey trios
  50. summary(old)
  51. summary(new)
  52. merge(old,new[,-c(1:3)],by="id")->merged
  53. colnames(merged)=gsub(".x",".old",colnames(merged))
  54. colnames(merged)=gsub(".y",".new",colnames(merged))
  55. merged$voc_chi_ph_change = merged$voc_chi_ph.new - merged$voc_chi_ph.old
  56. summary(merged$voc_chi_ph_change) #this is just rounding differences
  57. plot(merged$voc_chi_ph.new ~ merged$voc_chi_ph.old)
  58. plot(merged$age.new ~ merged$age.old)
  59. #no differences here either
  60. ```
  61. we have a few more lines (about 60, out of 1700) in the new version of metrics, but the lines that are in common between new and old are basically identical, based on inspection of voc_chi_ph and age
  62. so this strongly suggests we have a bug in our code, and the drop in ICC and age significance comes from our change in functions