|
@@ -53,7 +53,7 @@ define_contiguous<-function(mydat){
|
|
|
#the next line sorts the table by child id then age
|
|
|
arrange(child_id, age) %>%
|
|
|
#the next line keeps only one line per combination of experiment and child_id
|
|
|
- group_by(experiment, child_id) %>%
|
|
|
+ group_by(child_id) %>%
|
|
|
#the next line, mutate, defines 3 new variables in the dataset, n_rec, age_dist_next_rec, and next_session
|
|
|
mutate( #n_rec = n(), #this var isn't used later
|
|
|
age_dist_next_rec = lead(age) - age,
|
|
@@ -405,20 +405,21 @@ mydat_lena = merge(mydat_lena,dist_contig_lena[,c("session_id","next_session")],
|
|
|
#given those two numbers, with 5 draws we'd cover many combinations in winni, lucid, & trio; but we'll do 10 because there are a lot of recs in cougar & bergelson. Later increased to 20 bc there was a lot of variability still in the average r
|
|
|
|
|
|
mydat_aclew <- read.csv(paste0('../data_output/', "aclew",'_metrics_scaled.csv')) #1254
|
|
|
-mydat_aclew=mydat_aclew[order(mydat_aclew$experiment,mydat_aclew$child_id,mydat_aclew$age),]
|
|
|
+#mydat_aclew=mydat_aclew[order(mydat_aclew$experiment,mydat_aclew$child_id,mydat_aclew$age),]
|
|
|
#dim(mydat_aclew)
|
|
|
-dist_contig_aclew <- define_contiguous(mydat_aclew)
|
|
|
-mydat_aclew = merge(mydat_aclew,dist_contig_aclew[,c("session_id","next_session")],by="session_id", all.x=T)
|
|
|
-
|
|
|
-
|
|
|
# dim(dist_contig_aclew) #686 -- for some reason, we have 2 more eligible recs here... not sure why
|
|
|
#length(dist_contig_aclew$session_id[!(dist_contig_aclew$session_id %in% dist_contig_lena$session_id)]) # in fact, we have lots of sessions not in common!
|
|
|
-
|
|
|
#length(dist_contig_lena$session_id[!(dist_contig_lena$session_id %in% dist_contig_aclew$session_id)])
|
|
|
# they are present in aclew but not in lena
|
|
|
+# NOTE: I have "winnipeg C175 C175_20151201" "winnipeg C175 C175_20160301" for lena but not aclew; and i have "fausey-trio T066 T066/T066_000700" "quechua 1096 20190630_190025_009107" "quechua 1096 20190702_193551_008712" for aclew but not lena? It may well be a bug I introduced myself when adding the ava standard score (but if that were the case, I'd only have some things present in aclew but not LENA -- the fact that I have some in lena but not aclew would remain unexplained).
|
|
|
|
|
|
+#one thing that drove me crazy was that, probably because of the small differences in inclusion (2 recs in aclew & lena respectively), I was ending up with different lists of pairings across aclew & lena. So to simplify, I'll impose the same pairing across both, which involves losing a couple of additional recs in lena
|
|
|
+xxx=mydat_aclew[mydat_aclew$session_id %in% mydat_lena$session_id,]
|
|
|
+rownames(xxx)<-xxx$session_id
|
|
|
+xxx=xxx[mydat_lena$session_id,]
|
|
|
|
|
|
-# NOTE: I have "winnipeg C175 C175_20151201" "winnipeg C175 C175_20160301" for lena but not aclew; and i have "fausey-trio T066 T066/T066_000700" "quechua 1096 20190630_190025_009107" "quechua 1096 20190702_193551_008712" for aclew but not lena? It may well be a bug I introduced myself when adding the ava standard score (but if that were the case, I'd only have some things present in aclew but not LENA -- the fact that I have some in lena but not aclew would remain unexplained).
|
|
|
+dist_contig_aclew <- define_contiguous(xxx)
|
|
|
+mydat_aclew = merge(mydat_aclew,dist_contig_aclew[,c("session_id","next_session")],by="session_id", all.x=T)
|
|
|
|
|
|
|
|
|
|