123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546 |
- # Laura Aarnos 190527
- # NB from github updated 190430
- # There are four things to modify for bin time. Use Ctrl+f to find 'neuro group mod'.
- .graythresh <- function(I) {
- ## Implement the Otsu (1979) thresholding algorithm.
- I <- as.vector(I)
- if (min(I) < 0 | max(I) > 1) {
- stop("Data needs to be between 0 and 1")
- }
- I <- I * 256
-
-
- num_bins <- 256
-
-
- ## We specify a vector for the break points from 0 to num_bins.
- ## For num_bins=256, this produces the following bins:
- ## [0, 1], (1, 2], (2, 3], .... (255,256]
- ## note the [ on the first bin due to include.lowest = TRUE
- counts <- hist(I, 0:num_bins, plot = FALSE)$counts # NEW! OLD WAS: counts <- hist(I, num_bins, plot = FALSE)$counts
-
-
- # Variables names are chosen to be similar to the formulas in the Otsu paper.
- p <- counts / sum(counts)
- omega <- cumsum(p)
- mu <- cumsum(p * (1:num_bins))
- mu_t <- mu[length(mu)]
-
-
- sigma_b_squared <- (mu_t * omega - mu) ^ 2 / (omega * (1 - omega))
- sigma_b_squared[is.na(sigma_b_squared)] <- 0
- maxval <- max(sigma_b_squared)
- isfinite_maxval <- is.finite(maxval)
- if (isfinite_maxval) {
- idx <- mean(which(sigma_b_squared == maxval))
- # Normalize the threshold to the range [0, 1].
- level <- (idx - 1) / (num_bins - 1)
- } else {
- level <- 0.0
- }
- level
- }
-
-
- # convert the spike list to a matrix, probably not necessary anymore
- # but just to be consistent with matlab for now
- .nb_prepare <- function(s) {
- spikes <- s$spikes
- colnames <- names(spikes)
-
-
- slength <- sapply(spikes, length)
- max_elements <- max(slength)
-
-
- data <- matrix(0, length(colnames), max_elements)
- for (i in 1:length(colnames)) {
- data[i, 1:slength[[i]]] <- spikes[[i]]
- }
- rownames(data) <- colnames
- data <- t(data)
- data
- }
-
- # generate binned data for the well
- .nb_select_and_bin <- function(data, well, sbegin, send, bin) {
- well_data <- data[, grep(well, colnames(data))]
- well_data[well_data < sbegin | well_data > send] <- 0
- if (is.null(dim(well_data))) {
- dim(well_data) <- c(length(well_data), 1)
- }
- if (length(well_data) > 0) {
- well_data_postive <- well_data[well_data > 0]
- if (all(is.na(well_data_postive))) {
- to_add_back <- 0
- } else {
- to_add_back <- min(well_data[well_data > 0])
- }
- } else {
- to_add_back <- 0
- }
-
-
-
-
- well_data <- well_data[rowSums(well_data) > 0, ]
- well_data <- round(well_data * 12500) # Neuro Group MOD MCS: change 12500 to your sampling freq (original: round(well_data * 12500))
- temp <- well_data[well_data > 0]
- if (length(temp) > 0) {
- t_min <- min(temp)
- t_max <- max(temp)
- well_data <- well_data - t_min + 1
- range <- (t_max - t_min + 1)
- bins <- floor(range / bin)
- if (range %% bin > 0) {
- bins <- bins + 1
- }
- x <- (0:bins) * bin + 1
- y <- c(- Inf, x, max(x) + bin, + Inf)
-
-
- if (is.null(dim(well_data))) {
- dim(well_data) <- c(length(well_data), 1)
- }
- well_data_new <- matrix(0, bins + 1, dim(well_data)[2])
- for (i in 1: dim(well_data)[2]) {
- temp <- hist(well_data[, i], breaks = y, plot = FALSE)$counts
- temp <- temp[2:(length(temp) - 1)]
- well_data_new[, i] <- temp
- }
- well_data_new[well_data_new > 1] <- 1
- } else {
- well_data_new <- temp
- }
-
-
- list(well_data_new, to_add_back)
- }
-
- .nb_gaussian_filter <- function(sigma, well_data, min_electrodes) {
- sigma_input <- sigma
- if (length(well_data) == 0) {
- f2 <- numeric()
- } else {
- # a very flat filter. Consider a much shapper one later
- if (sigma %% 2 == 0) {
- sigma <- sigma + 1
- }
- filter_size <- sigma
- half_size <- (filter_size - 1) / 2
- x <- seq(- half_size, half_size, length = filter_size)
- gauss_filter <- exp(- x ^ 2 / (2 * sigma ^ 2))
- gauss_filter <- gauss_filter / sum(gauss_filter)
- f <- well_data
- if (length(which(apply(well_data, 2, max) > 0)) >= min_electrodes) {
- for (i in 1:dim(f)[2]) {
- if (max(well_data[, i]) > 0) {
- f[, i] <- filter(well_data[, i], gauss_filter, circular = TRUE)
- f[, i] <- f[, i] / max(f[, i])
- }
- }
- f1 <- rowSums(f)
- f1 <- f1 / max(f1)
- f2 <- filter(f1, gauss_filter, circular = TRUE)
- f2 <- f2 / max(f2)
- dim(f2) <- c(length(f2), 1)
- colnames(f2) <- sigma_input
- } else {
- f2 <- numeric()
- }
- }
- f2
- }
-
- .nb_get_spike_stats <- function(well_data, timespan) {
- stat <- matrix(0, 1, 3)
- if (length(well_data) > 0) {
- # number of active electrodes
- stat[1] <- length(which(apply(well_data, 2, max) > 0))
- # well level mfr
- stat[2] <- sum(well_data) / timespan
- # mfr by active electrodes
- if (stat[1] > 0) {
- stat[3] <- stat[2] / stat[1]
- }
- }
- colnames(stat) <- c("n_active_electrodes",
- "mfr_per_well", "mfr_per_active_electode")
- stat
- }
- .nb_get_burst_stats_intensities_mod <-
- function(well_data, f, timespan, bin_time, min_electrodes,
- sbegin, send, offset2=0, bin_size=0.00064) {
- # bin_size is set to 0.002 based on sample rate of 12500/s
- # Neuro Group MOD: bin_size is set to 0.00064 s based on sample rate of 12500/s - three occasions within this function
- n <- length(f)
- stat <- matrix(NA, 11, n)
-
- #rownames(stat) <- c("mean.NB.time.per.sec",
- # "total.spikes.in.all.NBs",
- # "percent.of.spikes.in.NBs",
- # "spike.intensity",
- # "spike.intensity.by.aEs",
- # "total.spikes.in.all.NBs,nNB",
- # "[total.spikes.in.all.NBs,nNB],nae",
- # "mean[spikes.in.NB,nae]",
- # "mean[spikes.in.NB,nae,NB.duration]",
- # "mean[spikes.in.NB]",
- # "total.number.of.NBs")
-
- rownames(stat) <- c("mean_NB_time_per_sec",
- "total_spikes_in_all_NBs",
- "percent_of_spikes_in_NBs",
- "spike_intensity",
- "spike_intensity_by_aEs",
- "total_spikes_in_all_NBs_d_nNB",
- "total_spikes_in_all_NBs_d_nNB_d_nae",
- "mean_spikes_in_NB_d_nae",
- "mean_spikes_in_NB_d_nae_d_NB_duration",
- "mean_spikes_in_NB",
- "total_number_of_NBs")
- colnames(stat) <- rep("NA", n)
- for (current in 1:n) {
- # not all have names
- if (length(f[[current]] > 0)) {
- colnames(stat)[current] <- colnames(f[[current]]) # not all have names
- }
- }
- nb_times <- list()
- length(nb_times) <- length(f)
- names(nb_times) <- names(f);
-
-
- for (current in 1:n) {
- temp <- f[[current]]
- if (length(temp) > 0) {
- n_active_electrodes <- length(which(apply(well_data, 2, max) > 0))
- temp[temp < 0] <- 0
- level <- .graythresh(temp)
- # get timestamps that are identified as part of network bursts
- indicator0 <- temp >= level
-
-
- if (min_electrodes > 0) {
- indicator <- c(0, indicator0, 0) # prepare for finding intervals
- diff_indicator <- diff(indicator)
- burst_start <- which(diff_indicator == 1)
- burst_end <- which(diff_indicator == - 1)
- if (length(burst_start) != length(burst_end)) {
- stop("Burst region no match")
- }
-
-
- ## filtered regions based on minimum number of active electrodes
- for (region in 1: length(burst_start)) {
- current_region_nae <-
- length(which(colSums(well_data[
- (burst_start[region]):(burst_end[region] - 1), ]) > 0))
- if (current_region_nae < min_electrodes) {
- indicator0[burst_start[region]:(burst_end[region] - 1)] <- 0
- }
- }
- }
-
-
- indicator <- c(0, indicator0, 0) # prepare for finding intervals
- diff_indicator <- diff(indicator)
- burst_start <- which(diff_indicator == 1)
- burst_end <- which(diff_indicator == - 1)
- if (length(burst_start) != length(burst_end)) {
- stop("Burst region no match")
- }
-
-
- nb_times_temp <- cbind.data.frame(burst_start, burst_end)
- nb_times_temp$start_t <- burst_start * 0.00064 + offset2 # Neuro Group MOD: 0.002 changed to 0.00064 (original: burst_start * 0.002 + offset2)
- nb_times_temp$end_t <- burst_end * 0.00064 + offset2 # Neuro Group MOD: 0.002 changed to 0.00064 (original: burst_end * 0.002 + offset2)
- nb_times[[current]] <- nb_times_temp
-
-
- # mean network burst time per second
- stat[1, current] <- sum(indicator0) * bin_time / timespan
- totalspikes <- rowSums(well_data)
- spikes_in_network_bursts <- sum(totalspikes * indicator0)
- stat[2, current] <- spikes_in_network_bursts
- # percentage of spikes in network bursts
- stat[3, current] <- stat[2, current] / sum(totalspikes)
-
-
- total_burst_regions <- sum(burst_end - burst_start)
- stat[4, current] <- spikes_in_network_bursts /
- total_burst_regions # overall Spike Intensity
- stat[5, current] <- stat[4, current] / n_active_electrodes
- stat[6, current] <- spikes_in_network_bursts / length(burst_start)
- stat[7, current] <- stat[6, current] / n_active_electrodes
-
-
- burst_region_length <- burst_end - burst_start
- total_spikes_per_active_electrode <- totalspikes / n_active_electrodes
- spikes_in_region_per_active_electrode <- matrix(0, length(burst_start), 2)
- for (region in 1: length(burst_start)) {
- spikes_in_region_per_active_electrode[region, 1] <-
- sum(total_spikes_per_active_electrode[
- burst_start[region]:(burst_end[region] - 1)])
- # normalized to HZ per active electrode within burst region
- spikes_in_region_per_active_electrode[region, 2] <-
- spikes_in_region_per_active_electrode[region, 1] /
- (burst_region_length[region] * bin_time)
- }
- stat[8, current] <- mean(spikes_in_region_per_active_electrode[, 1])
- stat[9, current] <- mean(spikes_in_region_per_active_electrode[, 2])
-
-
- stat[10, current] <- stat[8, current] * n_active_electrodes
- stat[11, current] <- length(burst_start)
- }
- }
- list(stat, nb_times)
- }
- # do not change bin and sf for MEA recordings, skip is in seconds (except with Neuro Group data)
- # Neuro Group MOD: bin changed from 25 to 8 - bin = bin_time(s)*sf
- # Neuro Group MOD MCS: additionally change sf (sampling freq) according to data
- .nb_extract_features_mod <- function(s, sigmas,
- min_electrodes,
- local_region_min_nae,
- duration = 0,
- bin = 8,
- sf = 12500,
- skip = 10) {
- df.spikes <- .nb_prepare(s)
- wells <- unique(substr(colnames(df.spikes), 1, 2))
- output <- list()
-
-
- sbegin <- floor(min(df.spikes[df.spikes > 0] / skip)) * skip + skip
- send <- floor(max(df.spikes[df.spikes > 0] / skip)) * skip
- timespan <- send - sbegin
- # bin: 25; #2ms, this is a parameter based on our recording
- # Neuro Group: bin: 8; 0.64ms based on our spike detection
- bin_time <- bin / sf
- nb_times <- list()
- length(nb_times) <- length(wells)
- names(nb_times) <- wells
- cat(paste0("calculating network bursts for recording ",
- basename(s$file), "\n"))
- for (well.index in 1: length(wells)) {
- well <- wells[well.index]
- temp_return <- .nb_select_and_bin(df.spikes, well, sbegin, send, bin)
- offset2 <- temp_return[[2]]
- well_data <- temp_return[[1]]
- f <- list()
- for (i in 1: length(sigmas)) {
- f[[i]] <- .nb_gaussian_filter(sigmas[i], well_data, min_electrodes)
-
-
- }
- names(f) <- sigmas
-
-
- stat0 <- .nb_get_spike_stats(well_data, timespan)
- res1 <- .nb_get_burst_stats_intensities_mod(well_data,
- f, timespan, bin_time, local_region_min_nae,
- sbegin, send, offset2)
- stat1 <- res1[[1]]
- nb_times[[well.index]] <- res1[[2]]
-
-
- output[[well.index]] <- list()
- output[[well.index]]$stat0 <- stat0
- output[[well.index]]$stat1 <- stat1
- output[[well.index]]$well <- well
-
-
-
-
- }
-
-
- list(data = output,
- div = unlist(strsplit(unlist(
- strsplit(basename(s$file), "_"))[4], "[.]"))[1],
- nb_times = nb_times)
-
-
- }
-
- .nb_merge_result <- function(s, result, sigmas) {
- wells <- character()
- divs <- character()
- phenotypes <- character()
- data_all <- numeric()
-
-
- w_fun <- function(x) {
- x$well
- }
- for (i in 1: length(result)) {
- current <- result[[i]]
- current_wells <- sapply(current$data, w_fun)
- divs <- c(divs, rep(current$div, length(current_wells)))
- wells <- c(wells, current_wells)
- phenotypes <- c(phenotypes, s[[i]]$treatment[current_wells])
-
-
- data <- matrix(0, length(current_wells),
- length(c(as.vector(current$data[[1]]$stat1),
- as.vector(current$data[[1]]$stat0))))
- for (j in 1:length(current_wells)) {
- data[j, ] <- c(as.vector(current$data[[j]]$stat1),
- as.vector(current$data[[j]]$stat0))
- }
- data_all <- rbind(data_all, data)
- }
-
-
-
-
- feature_names <- character()
- for (i in 1:length(sigmas)) {
- feature_names <- c(feature_names,
- paste(rownames(current$data[[1]]$stat1), sigmas[i], sep = "_"))
- }
- feature_names <- c(feature_names, colnames(current$data[[1]]$stat0))
-
-
- df <- data.frame(divs, wells, phenotypes, data_all)
- names(df)[4:dim(df)[2]] <- feature_names
-
-
- divs <- sapply(df["divs"], as.character)
- df <- df[mixedorder(divs), ]
-
-
- result <- list() # return the result as matrix and un-altered feature names
- result$df <- df
- result$feature_names <- feature_names
-
-
- result
- }
-
- nb_matrix_to_feature_dfs <- function(matrix_and_feature_names) {
- data <- matrix_and_feature_names$df
- feature_names <- matrix_and_feature_names$feature_names
- data <- data.frame(lapply(data, as.character), stringsAsFactors = FALSE)
- n_features <- dim(data)[2] - 3 # escape the first columns
- dfs <- list()
- well_stat <- table(data[, "wells"])
-
-
- ref_matrix <- matrix(NA, length(well_stat), length(unique(data[, 1])))
- wells <- unique(data[, c("wells", "phenotypes")])
- rownames(ref_matrix) <- wells[, "wells"]
- colnames(ref_matrix) <- unique(data[, 1])
-
-
- # now change columnnames to match Ryan's code
- colnames(wells) <- c("well", "treatment")
- n <- dim(data)[1]
- for (index in 1:n_features) {
- data.matrix <- ref_matrix
- for (i in 1:n) {
- data.matrix[data[i, "wells"], data[i, "divs"]] <- data[i, index + 3]
- }
- dfs[[index]] <- .sort_df(cbind(wells, data.matrix))
- # [] keep it as a data.frame
- dfs[[index]][] <- lapply(dfs[[index]], as.character)
- }
- names(dfs) <- feature_names
- dfs
- }
-
- .wilcox_test_perm <- function(data, np, g1, g2, feature_index) {
- # now figure out the p from data
- d1 <- data[data[, "phenotypes"] == g1, feature_index]
- d1 <- d1[d1 >= 0]
- d2 <- data[data[, "phenotypes"] == g2, feature_index]
- d2 <- d2[d2 >= 0]
- suppressWarnings(data_p <- wilcox.test(d1, d2)$p.value)
-
-
- # subsetting data to genotypes and feature,
- #and also reformat into matrix for easy permutation
- d <- data[(data[, "phenotypes"] == g1) |
- (data[, "phenotypes"] == g2), c(2, feature_index)]
- d[, "wells"] <- factor(d[, "wells"]) # drop unused levels
- well_stat <- table(d[, "wells"])
- data.matrix <- matrix(NA, length(well_stat), max(well_stat))
- rownames(data.matrix) <- names(well_stat)
- n_cases <- length(unique(data[data[, "phenotypes"] == g1, "wells"]))
- n <- dim(data.matrix)[1]
- for (i in 1:n) {
- temp <- d[d[, "wells"] == rownames(data.matrix)[i], 2]
- data.matrix[i, 1:length(temp)] <- temp
- }
-
-
- outp <- matrix(0, np, 1)
- for (i in 1:np) {
- cases <- sample(n, n_cases)
- d1 <- as.vector(data.matrix[cases, ])
- d1 <- d1[d1 >= 0]
- d2 <- as.vector(data.matrix[- cases, ])
- d2 <- d2[d2 >= 0]
- suppressWarnings(outp[i] <- wilcox.test(d1, d2)$p.value)
- }
- outp <- sort(outp)
-
-
- perm_p <- length(which(outp < data_p)) / np
- list(perm_p = perm_p, outp = outp)
-
-
- }
-
-
- calculate_network_bursts_mod <-
- function(s, sigmas, min_electrodes, local_region_min_nae) {
- # extract features and merge features
- # from different recordings into one data frame
- nb_structure <- list()
- nb_structure$summary <- list()
- nb_structure$nb_all <- list()
- nb_structure$nb_features <- list()
- if (length(s) > 0) {
- features_extracted_all_div <- list()
- for (i in 1:length(s)) {
- features_extracted_all_div[[i]] <-
- .nb_extract_features_mod(s[[i]],
- sigmas, min_electrodes, local_region_min_nae)
- features_extracted_one_div <- list()
- features_extracted_one_div[[1]] <- features_extracted_all_div[[i]]
- nb_structure$nb_all[[i]] <- features_extracted_all_div[[i]]$nb_times
- nb_structure$result[[i]] <- features_extracted_all_div[[i]]
- nb_structure$nb_features[[i]] <-
- .nb_merge_result(s, features_extracted_one_div, sigmas)
-
-
- }
- nb_structure$nb_features_merged <-
- .nb_merge_result(s, nb_structure$result, sigmas)
- }
- nb_structure
- }
-
- .sort_df <- function(df) {
- # natural order sorting
- df_divs <- df[3:ncol(df)]
- df_sorted <- df_divs[, mixedorder(names(df_divs)), drop = FALSE]
- df_sorted <- cbind(treatment = df$treatment, df_sorted)
- df_sorted <- cbind(well = df$well, df_sorted)
- return(df_sorted)
- }
-
|