create-sib-subdataset.R 1.6 KB

12345678910111213141516171819202122232425262728293031323334
  1. mydat_aclew <- read.csv(paste0('../data_output/', 'aclew','_metrics_scaled.csv'))
  2. mydat_aclew <- mydat_aclew[is.element(mydat_aclew$experiment, corpora),]
  3. read.csv("../input/aclew_md.csv")->x
  4. x$labname[x$labname=="ROW"]<-"luc"
  5. x$labname[x$labname=="SOD"]<-"win"
  6. x$ch_id=paste(tolower(x$labname),as.character(x$child_level_id))
  7. x$n_of_siblings<-x$number_older_sibs
  8. x$ch_id[x$labname %in% c("BER")] = paste(tolower(x$labname[x$labname %in% c("BER")]),as.numeric(as.character(x$child_level_id[x$labname %in% c("BER")])))
  9. x=x[!duplicated(x$ch_id),]
  10. mydat_aclew$lab=substr(mydat_aclew$experiment,1,3)
  11. mydat_aclew$ch_id=paste(mydat_aclew$lab,gsub(".* ","",mydat_aclew$child_id))
  12. mydat_aclew$ch_id[mydat_aclew$experiment=="warlaumont"]=gsub(" 0"," ",mydat_aclew$ch_id[mydat_aclew$experiment=="warlaumont"])
  13. mydat_aclew$ch_id[mydat_aclew$experiment=="winnipeg"]=gsub(" C"," CW",mydat_aclew$ch_id[mydat_aclew$experiment=="winnipeg"])
  14. #sort(factor(mydat_aclew$ch_id[mydat_aclew$experiment=="winnipeg"]))
  15. #sort(x$ch_id[x$lab=="win"])
  16. #sum(mydat_aclew$ch_id %in% x$ch_id)
  17. #sum(x$ch_id %in% mydat_aclew$ch_id)
  18. metadata=x[,c("ch_id","n_of_siblings")]
  19. read.csv("../input/quechua_md.csv")->x
  20. x$ch_id=paste("que",x$child_id)
  21. metadata=rbind(metadata,x[,c("ch_id","n_of_siblings")])
  22. mydat2=merge(mydat_aclew,metadata,all.x=T,by="ch_id")
  23. #table(mydat2$n_of_siblings,mydat2$experiment)
  24. mydat2$sib_presence=ifelse(mydat2$n_of_siblings!=0,"present","absent")
  25. write.csv(mydat2[,c("age_s","n_of_siblings","sib_presence","experiment","child_id","voc_dur_och_ph")],"../data_output/dat_sib_ana.csv",row.names=F)