9 місяців тому · a37a6c19e9
--- a/CODE/SM.Rmd
+++ b/CODE/SM.Rmd
@@ -53,7 +53,7 @@ define_contiguous<-function(mydat){
 
				       #the next line sorts the table by child id then age
			
 
				  arrange(child_id, age) %>% 
			
 
				       #the next line keeps only one line per combination of experiment and child_id
			
 
				-   group_by(experiment, child_id) %>% 
			
 
				+   group_by(child_id) %>% 
			
 
				       #the next line, mutate, defines 3 new variables in the dataset, n_rec, age_dist_next_rec, and next_session
			
 
				   mutate( #n_rec = n(), #this var isn't used later
			
 
				          age_dist_next_rec = lead(age) - age, 
			
@@ -405,20 +405,21 @@ mydat_lena = merge(mydat_lena,dist_contig_lena[,c("session_id","next_session")],
 
				 #given those two numbers, with 5 draws we'd cover many combinations in winni, lucid, & trio; but we'll do 10 because there are a lot of recs in cougar & bergelson. Later increased to 20 bc there was a lot of variability still in the average r
			
 
				 
			
 
				 mydat_aclew <- read.csv(paste0('../data_output/', "aclew",'_metrics_scaled.csv')) #1254
			
 
				-mydat_aclew=mydat_aclew[order(mydat_aclew$experiment,mydat_aclew$child_id,mydat_aclew$age),]
			
 
				+#mydat_aclew=mydat_aclew[order(mydat_aclew$experiment,mydat_aclew$child_id,mydat_aclew$age),]
			
 
				 #dim(mydat_aclew)
			
 
				-dist_contig_aclew <- define_contiguous(mydat_aclew) 
			
 
				-mydat_aclew = merge(mydat_aclew,dist_contig_aclew[,c("session_id","next_session")],by="session_id", all.x=T)
			
 
				-
			
 
				-
			
 
				 # dim(dist_contig_aclew) #686 -- for some reason, we have 2 more eligible recs here... not sure why
			
 
				 #length(dist_contig_aclew$session_id[!(dist_contig_aclew$session_id %in% dist_contig_lena$session_id)])  # in fact, we have lots of sessions not in common!
			
 
				-
			
 
				 #length(dist_contig_lena$session_id[!(dist_contig_lena$session_id %in% dist_contig_aclew$session_id)])
			
 
				 # they are present in aclew but not in lena
			
 
				+# NOTE: I have "winnipeg C175 C175_20151201" "winnipeg C175 C175_20160301" for lena but not aclew; and i have "fausey-trio T066 T066/T066_000700"   "quechua 1096 20190630_190025_009107" "quechua 1096 20190702_193551_008712" for aclew but not lena? It may well be a bug I introduced myself when adding the ava standard score (but if that were the case, I'd only have some things present in aclew but not LENA -- the fact that I have some in lena but not aclew would remain unexplained).
			
 
				 
			
 
				+#one thing that drove me crazy was that, probably because of the small differences in inclusion (2 recs in aclew & lena respectively), I was ending up with different lists of pairings across aclew & lena. So to simplify, I'll impose the same pairing across both, which involves losing a couple of additional recs in lena
			
 
				+xxx=mydat_aclew[mydat_aclew$session_id %in% mydat_lena$session_id,]
			
 
				+rownames(xxx)<-xxx$session_id
			
 
				+xxx=xxx[mydat_lena$session_id,]
			
 
				 
			
 
				-# NOTE: I have "winnipeg C175 C175_20151201" "winnipeg C175 C175_20160301" for lena but not aclew; and i have "fausey-trio T066 T066/T066_000700"   "quechua 1096 20190630_190025_009107" "quechua 1096 20190702_193551_008712" for aclew but not lena? It may well be a bug I introduced myself when adding the ava standard score (but if that were the case, I'd only have some things present in aclew but not LENA -- the fact that I have some in lena but not aclew would remain unexplained).
			
 
				+dist_contig_aclew <- define_contiguous(xxx) 
			
 
				+mydat_aclew = merge(mydat_aclew,dist_contig_aclew[,c("session_id","next_session")],by="session_id", all.x=T)