NeuroGroup_TUNI
/
Comparative_MEA_dataset


			
			
				
					
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
							# Laura Aarnos 190527
# NB from github updated 190430

# There are four things to modify for bin time. Use Ctrl+f to find 'neuro group mod'.

.graythresh <- function(I) { 
     ## Implement the Otsu (1979) thresholding algorithm. 
     I <- as.vector(I) 
     if (min(I) < 0 | max(I) > 1) { 
         stop("Data needs to be between 0 and 1") 
       } 
     I <- I * 256 
   
  
     num_bins <- 256 
   
  
     ## We specify a vector for the break points from 0 to num_bins. 
     ## For num_bins=256, this produces the following bins: 
     ## [0, 1], (1, 2], (2, 3], .... (255,256] 
     ## note the [ on the first bin due to include.lowest = TRUE 
     counts <- hist(I, 0:num_bins, plot = FALSE)$counts # NEW! OLD WAS: counts <- hist(I, num_bins, plot = FALSE)$counts
   
  
     # Variables names are chosen to be similar to the formulas in the Otsu paper. 
     p <- counts / sum(counts) 
     omega <- cumsum(p) 
     mu <- cumsum(p * (1:num_bins)) 
     mu_t <- mu[length(mu)] 
   
  
     sigma_b_squared <- (mu_t * omega - mu) ^ 2 / (omega * (1 - omega)) 
     sigma_b_squared[is.na(sigma_b_squared)] <- 0 
     maxval <- max(sigma_b_squared) 
     isfinite_maxval <- is.finite(maxval) 
     if (isfinite_maxval) { 
         idx <- mean(which(sigma_b_squared == maxval)) 
         # Normalize the threshold to the range [0, 1]. 
         level <- (idx - 1) / (num_bins - 1) 
       } else { 
           level <- 0.0 
         } 
     level 
   } 
 

 

 # convert the spike list to a matrix, probably not necessary anymore 
 # but just to be consistent with matlab for now 
 .nb_prepare <- function(s) { 
     spikes <- s$spikes 
     colnames <- names(spikes) 
   
  
     slength <- sapply(spikes, length) 
     max_elements <- max(slength) 
   
  
     data <- matrix(0, length(colnames), max_elements) 
     for (i in  1:length(colnames)) { 
         data[i, 1:slength[[i]]] <- spikes[[i]] 
       } 
     rownames(data) <- colnames 
     data <- t(data) 
     data 
   } 
 

 # generate binned data for the well 
 .nb_select_and_bin <- function(data, well, sbegin, send, bin) { 
     well_data <- data[, grep(well, colnames(data))] 
     well_data[well_data < sbegin | well_data > send] <- 0 
     if (is.null(dim(well_data))) { 
         dim(well_data) <- c(length(well_data), 1) 
       } 
     if (length(well_data) > 0) { 
         well_data_postive <- well_data[well_data > 0] 
         if (all(is.na(well_data_postive))) { 
             to_add_back <- 0 
           } else { 
               to_add_back <- min(well_data[well_data > 0]) 
             } 
       } else { 
           to_add_back <- 0 
         } 
   
  
   
  
     well_data <- well_data[rowSums(well_data) > 0, ] 
     well_data <- round(well_data * 12500) # Neuro Group MOD MCS: change 12500 to your sampling freq (original: round(well_data * 12500))
     temp <- well_data[well_data > 0] 
     if (length(temp) > 0) { 
         t_min <- min(temp) 
         t_max <- max(temp) 
         well_data <- well_data - t_min + 1 
         range <- (t_max - t_min + 1) 
         bins <- floor(range / bin) 
         if (range %% bin > 0) { 
             bins <- bins + 1 
           } 
         x <- (0:bins) * bin + 1 
         y <- c(- Inf, x, max(x) + bin, + Inf) 
     
    
         if (is.null(dim(well_data))) { 
             dim(well_data) <- c(length(well_data), 1) 
           } 
         well_data_new <- matrix(0, bins + 1, dim(well_data)[2]) 
         for (i in 1: dim(well_data)[2]) { 
             temp <- hist(well_data[, i], breaks = y, plot = FALSE)$counts 
             temp <- temp[2:(length(temp) - 1)] 
             well_data_new[, i] <- temp 
           } 
         well_data_new[well_data_new > 1] <- 1 
       } else { 
           well_data_new <- temp 
         } 
   
  
     list(well_data_new, to_add_back) 
   } 
 

 .nb_gaussian_filter <- function(sigma, well_data, min_electrodes) { 
     sigma_input <- sigma 
     if (length(well_data) == 0) { 
         f2 <- numeric() 
       } else { 
           # a very flat filter. Consider a much shapper one later 
           if (sigma %% 2 == 0) { 
               sigma <- sigma + 1 
             } 
           filter_size <- sigma 
           half_size <- (filter_size - 1) / 2 
           x <- seq(- half_size, half_size, length = filter_size) 
           gauss_filter <- exp(- x ^ 2 / (2 * sigma ^ 2)) 
           gauss_filter <- gauss_filter / sum(gauss_filter) 
           f <- well_data 
           if (length(which(apply(well_data, 2, max) > 0)) >= min_electrodes) { 
               for (i in 1:dim(f)[2]) { 
                   if (max(well_data[, i]) > 0) { 
                       f[, i] <- filter(well_data[, i], gauss_filter, circular = TRUE) 
                       f[, i] <- f[, i] / max(f[, i]) 
                     } 
                 } 
               f1 <- rowSums(f) 
               f1 <- f1 / max(f1) 
               f2 <- filter(f1, gauss_filter, circular = TRUE) 
               f2 <- f2 / max(f2) 
               dim(f2) <- c(length(f2), 1) 
               colnames(f2) <- sigma_input 
             } else { 
                 f2 <- numeric() 
               } 
         } 
     f2 
   } 
 

 .nb_get_spike_stats <- function(well_data, timespan) { 
     stat <- matrix(0, 1, 3) 
     if (length(well_data) > 0) { 
         # number of active electrodes 
         stat[1] <- length(which(apply(well_data, 2, max) > 0)) 
         # well level mfr 
         stat[2] <- sum(well_data) / timespan 
         # mfr by active electrodes 
         if (stat[1] > 0) { 
             stat[3] <- stat[2] / stat[1] 
           } 
       } 
     colnames(stat) <- c("n_active_electrodes",
                         "mfr_per_well", "mfr_per_active_electode") 
     stat 
   } 
 .nb_get_burst_stats_intensities_mod <- 
     function(well_data, f, timespan, bin_time, min_electrodes, 
                    sbegin, send, offset2=0, bin_size=0.00064) { 
       # bin_size is set to 0.002 based on sample rate of 12500/s
       # Neuro Group MOD: bin_size is set to 0.00064 s based on sample rate of 12500/s - three occasions within this function
       n <- length(f) 
       stat <- matrix(NA, 11, n) 
        
       #rownames(stat) <- c("mean.NB.time.per.sec", 
       #                    "total.spikes.in.all.NBs", 
       #                    "percent.of.spikes.in.NBs", 
       #                    "spike.intensity", 
       #                    "spike.intensity.by.aEs", 
       #                    "total.spikes.in.all.NBs,nNB", 
       #                    "[total.spikes.in.all.NBs,nNB],nae", 
       #                    "mean[spikes.in.NB,nae]", 
       #                    "mean[spikes.in.NB,nae,NB.duration]", 
       #                    "mean[spikes.in.NB]", 
       #                    "total.number.of.NBs") 
        
       rownames(stat) <- c("mean_NB_time_per_sec", 
                                   "total_spikes_in_all_NBs", 
                                   "percent_of_spikes_in_NBs", 
                                   "spike_intensity", 
                                   "spike_intensity_by_aEs", 
                                   "total_spikes_in_all_NBs_d_nNB", 
                                   "total_spikes_in_all_NBs_d_nNB_d_nae", 
                                   "mean_spikes_in_NB_d_nae", 
                                   "mean_spikes_in_NB_d_nae_d_NB_duration", 
                                   "mean_spikes_in_NB", 
                                   "total_number_of_NBs") 
       colnames(stat) <- rep("NA", n) 
       for (current in 1:n) { 
           # not all have names 
           if (length(f[[current]] > 0)) { 
               colnames(stat)[current] <- colnames(f[[current]]) # not all have names 
             } 
         } 
       nb_times <- list() 
       length(nb_times) <- length(f) 
       names(nb_times) <- names(f); 
     
    
       for (current  in 1:n) { 
           temp <- f[[current]] 
           if (length(temp) > 0) { 
               n_active_electrodes <- length(which(apply(well_data, 2, max) > 0)) 
               temp[temp < 0] <- 0 
               level <- .graythresh(temp) 
               # get timestamps that are identified as part of network bursts 
               indicator0 <- temp >= level 
         
        
               if (min_electrodes > 0) { 
                   indicator <- c(0, indicator0, 0) # prepare for finding intervals 
                   diff_indicator <- diff(indicator) 
                   burst_start <- which(diff_indicator == 1) 
                   burst_end <- which(diff_indicator == - 1) 
                   if (length(burst_start) != length(burst_end)) { 
                       stop("Burst region no match") 
                     } 
           
          
                   ## filtered regions based on minimum number of active electrodes 
                   for (region in 1: length(burst_start)) { 
                       current_region_nae <- 
                           length(which(colSums(well_data[ 
                               (burst_start[region]):(burst_end[region] - 1), ]) > 0)) 
                       if (current_region_nae < min_electrodes) { 
                           indicator0[burst_start[region]:(burst_end[region] - 1)] <- 0 
                         } 
                     } 
                 } 
         
        
               indicator <- c(0, indicator0, 0) # prepare for finding intervals 
               diff_indicator <- diff(indicator) 
               burst_start <- which(diff_indicator == 1) 
               burst_end <- which(diff_indicator == - 1) 
               if (length(burst_start) != length(burst_end)) { 
                   stop("Burst region no match") 
                 } 
         
        
               nb_times_temp <- cbind.data.frame(burst_start, burst_end) 
               nb_times_temp$start_t <- burst_start * 0.00064 + offset2 # Neuro Group MOD: 0.002 changed to 0.00064 (original: burst_start * 0.002 + offset2)
               nb_times_temp$end_t <- burst_end * 0.00064 + offset2 # Neuro Group MOD: 0.002 changed to 0.00064 (original: burst_end * 0.002 + offset2)
               nb_times[[current]] <- nb_times_temp 
         
        
               # mean network burst time per second 
               stat[1, current] <- sum(indicator0) * bin_time / timespan 
               totalspikes <- rowSums(well_data) 
               spikes_in_network_bursts <- sum(totalspikes * indicator0) 
               stat[2, current] <- spikes_in_network_bursts 
               # percentage of spikes in network bursts 
               stat[3, current] <- stat[2, current] / sum(totalspikes) 
         
        
               total_burst_regions <- sum(burst_end - burst_start) 
               stat[4, current] <- spikes_in_network_bursts / 
                                     total_burst_regions # overall Spike Intensity 
               stat[5, current] <- stat[4, current] / n_active_electrodes 
               stat[6, current] <- spikes_in_network_bursts / length(burst_start) 
               stat[7, current] <- stat[6, current] / n_active_electrodes 
         
        
               burst_region_length <- burst_end - burst_start 
               total_spikes_per_active_electrode <- totalspikes / n_active_electrodes 
               spikes_in_region_per_active_electrode <- matrix(0, length(burst_start), 2) 
               for (region in 1: length(burst_start)) { 
                   spikes_in_region_per_active_electrode[region, 1] <- 
                       sum(total_spikes_per_active_electrode[ 
                            burst_start[region]:(burst_end[region] - 1)]) 
                   # normalized to HZ per active electrode within burst region 
                   spikes_in_region_per_active_electrode[region, 2] <- 
                       spikes_in_region_per_active_electrode[region, 1] / 
                           (burst_region_length[region] * bin_time) 
                 } 
               stat[8, current] <- mean(spikes_in_region_per_active_electrode[, 1]) 
               stat[9, current] <- mean(spikes_in_region_per_active_electrode[, 2]) 
         
        
               stat[10, current] <- stat[8, current] * n_active_electrodes 
               stat[11, current] <- length(burst_start) 
             } 
         } 
       list(stat, nb_times) 
     } 
 # do not change bin and sf for MEA recordings, skip is in seconds (except with Neuro Group data)
 # Neuro Group MOD: bin changed from 25 to 8 - bin = bin_time(s)*sf
 # Neuro Group MOD MCS: additionally change sf (sampling freq) according to data
 .nb_extract_features_mod <- function(s, sigmas, 
                                  min_electrodes, 
                                  local_region_min_nae, 
                                  duration = 0, 
                                  bin = 8, 
                                  sf = 12500, 
                                  skip = 10) { 
     df.spikes <- .nb_prepare(s) 
     wells <- unique(substr(colnames(df.spikes), 1, 2)) 
     output <- list() 
   
  
     sbegin <- floor(min(df.spikes[df.spikes > 0] / skip)) * skip + skip 
     send <- floor(max(df.spikes[df.spikes > 0] / skip)) * skip 
     timespan <- send - sbegin 
     # bin: 25; #2ms, this is a parameter based on our recording
     # Neuro Group: bin: 8; 0.64ms based on our spike detection
     bin_time <- bin / sf 
     nb_times <- list() 
     length(nb_times) <- length(wells) 
     names(nb_times) <- wells 
     cat(paste0("calculating network bursts for recording ", 
                                 basename(s$file), "\n")) 
     for (well.index in 1: length(wells)) { 
         well <- wells[well.index] 
         temp_return <- .nb_select_and_bin(df.spikes, well, sbegin, send, bin) 
         offset2 <- temp_return[[2]] 
         well_data <- temp_return[[1]] 
         f <- list() 
         for (i  in 1: length(sigmas)) { 
             f[[i]] <- .nb_gaussian_filter(sigmas[i], well_data, min_electrodes) 
       
      
           } 
         names(f) <- sigmas 
     
    
         stat0 <- .nb_get_spike_stats(well_data, timespan) 
         res1 <- .nb_get_burst_stats_intensities_mod(well_data, 
                                                                 f, timespan, bin_time, local_region_min_nae, 
                                                           sbegin, send, offset2) 
         stat1 <- res1[[1]] 
         nb_times[[well.index]] <- res1[[2]] 
     
    
         output[[well.index]] <- list() 
         output[[well.index]]$stat0 <- stat0 
         output[[well.index]]$stat1 <- stat1 
         output[[well.index]]$well <- well 
     
    
     
    
       } 
   
  
     list(data = output, 
                  div = unlist(strsplit(unlist( 
                      strsplit(basename(s$file), "_"))[4], "[.]"))[1], 
                  nb_times = nb_times) 
   
  
   } 
 

 .nb_merge_result <- function(s, result, sigmas) { 
     wells <- character() 
     divs <- character() 
     phenotypes <- character() 
     data_all <- numeric() 
   
  
     w_fun <- function(x) { 
         x$well 
       } 
     for (i in 1: length(result)) { 
         current <- result[[i]] 
         current_wells <- sapply(current$data, w_fun) 
         divs <- c(divs, rep(current$div, length(current_wells))) 
         wells <- c(wells, current_wells) 
         phenotypes <- c(phenotypes, s[[i]]$treatment[current_wells]) 
     
    
         data <- matrix(0, length(current_wells), 
                                               length(c(as.vector(current$data[[1]]$stat1), 
                                                                                        as.vector(current$data[[1]]$stat0)))) 
         for (j in 1:length(current_wells)) { 
             data[j, ] <- c(as.vector(current$data[[j]]$stat1), 
                                                     as.vector(current$data[[j]]$stat0)) 
           } 
         data_all <- rbind(data_all, data) 
       } 
   
  
   
  
     feature_names <- character() 
     for (i in 1:length(sigmas)) { 
         feature_names <- c(feature_names, 
                                        paste(rownames(current$data[[1]]$stat1), sigmas[i], sep = "_")) 
       } 
     feature_names <- c(feature_names, colnames(current$data[[1]]$stat0)) 
   
  
     df <- data.frame(divs, wells, phenotypes, data_all) 
     names(df)[4:dim(df)[2]] <- feature_names 
   
  
     divs <- sapply(df["divs"], as.character) 
     df <- df[mixedorder(divs), ] 
   
  
     result <- list() # return the result as matrix and un-altered feature names 
     result$df <- df 
     result$feature_names <- feature_names 
   
  
     result 
   } 
 

 nb_matrix_to_feature_dfs <- function(matrix_and_feature_names) { 
     data <- matrix_and_feature_names$df 
     feature_names <- matrix_and_feature_names$feature_names 
     data <- data.frame(lapply(data, as.character), stringsAsFactors = FALSE) 
     n_features <- dim(data)[2] - 3 # escape the first columns 
     dfs <- list() 
     well_stat <- table(data[, "wells"]) 
   
  
     ref_matrix <- matrix(NA, length(well_stat), length(unique(data[, 1]))) 
     wells <- unique(data[, c("wells", "phenotypes")]) 
     rownames(ref_matrix) <- wells[, "wells"] 
     colnames(ref_matrix) <- unique(data[, 1]) 
   
  
     # now change columnnames to match Ryan's code 
     colnames(wells) <- c("well", "treatment") 
     n <- dim(data)[1] 
     for (index in 1:n_features) { 
         data.matrix <- ref_matrix 
         for (i in 1:n) { 
             data.matrix[data[i, "wells"], data[i, "divs"]] <- data[i, index + 3] 
           } 
         dfs[[index]] <- .sort_df(cbind(wells, data.matrix)) 
         # [] keep it as a data.frame 
         dfs[[index]][] <- lapply(dfs[[index]], as.character) 
       } 
     names(dfs) <- feature_names 
     dfs 
   } 
 

 .wilcox_test_perm <- function(data, np, g1, g2, feature_index) { 
     # now figure out the p from data 
     d1 <- data[data[, "phenotypes"] == g1, feature_index] 
     d1 <- d1[d1 >= 0] 
     d2 <- data[data[, "phenotypes"] == g2, feature_index] 
     d2 <- d2[d2 >= 0] 
     suppressWarnings(data_p <- wilcox.test(d1, d2)$p.value) 
   
  
     # subsetting data to genotypes and feature, 
     #and also reformat into matrix for easy permutation 
     d <- data[(data[, "phenotypes"] == g1) | 
                                   (data[, "phenotypes"] == g2), c(2, feature_index)] 
     d[, "wells"] <- factor(d[, "wells"]) # drop unused levels 
     well_stat <- table(d[, "wells"]) 
     data.matrix <- matrix(NA, length(well_stat), max(well_stat)) 
     rownames(data.matrix) <- names(well_stat) 
     n_cases <- length(unique(data[data[, "phenotypes"] == g1, "wells"])) 
     n <- dim(data.matrix)[1] 
     for (i in 1:n) { 
         temp <- d[d[, "wells"] == rownames(data.matrix)[i], 2] 
         data.matrix[i, 1:length(temp)] <- temp 
       } 
   
  
     outp <- matrix(0, np, 1) 
     for (i in 1:np) { 
         cases <- sample(n, n_cases) 
         d1 <- as.vector(data.matrix[cases, ]) 
         d1 <- d1[d1 >= 0] 
         d2 <- as.vector(data.matrix[- cases, ]) 
         d2 <- d2[d2 >= 0] 
         suppressWarnings(outp[i] <- wilcox.test(d1, d2)$p.value) 
       } 
     outp <- sort(outp) 
   
  
     perm_p <- length(which(outp < data_p)) / np 
     list(perm_p = perm_p, outp = outp) 
   
  
   } 
 

 

 calculate_network_bursts_mod <- 
     function(s, sigmas, min_electrodes, local_region_min_nae) { 
       # extract features and merge features 
       # from different recordings into one data frame 
       nb_structure <- list() 
       nb_structure$summary <- list() 
       nb_structure$nb_all <- list() 
       nb_structure$nb_features <- list() 
       if (length(s) > 0) { 
           features_extracted_all_div <- list() 
           for (i in 1:length(s)) { 
               features_extracted_all_div[[i]] <- 
                   .nb_extract_features_mod(s[[i]], 
                                        sigmas, min_electrodes, local_region_min_nae) 
               features_extracted_one_div <- list() 
               features_extracted_one_div[[1]] <- features_extracted_all_div[[i]] 
               nb_structure$nb_all[[i]] <- features_extracted_all_div[[i]]$nb_times 
               nb_structure$result[[i]] <- features_extracted_all_div[[i]] 
               nb_structure$nb_features[[i]] <- 
                   .nb_merge_result(s, features_extracted_one_div, sigmas) 
         
        
             } 
           nb_structure$nb_features_merged <- 
               .nb_merge_result(s, nb_structure$result, sigmas) 
         } 
       nb_structure 
     } 
 
 .sort_df <- function(df) {
   # natural order sorting
   df_divs <- df[3:ncol(df)]
   df_sorted <- df_divs[, mixedorder(names(df_divs)), drop = FALSE]
   df_sorted <- cbind(treatment = df$treatment, df_sorted)
   df_sorted <- cbind(well = df$well, df_sorted)
   return(df_sorted)
 }