github_nb_bursts_modifications_LAa.R 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. # Laura Aarnos 190527
  2. # NB from github updated 190430
  3. # There are four things to modify for bin time. Use Ctrl+f to find 'neuro group mod'.
  4. .graythresh <- function(I) {
  5. ## Implement the Otsu (1979) thresholding algorithm.
  6. I <- as.vector(I)
  7. if (min(I) < 0 | max(I) > 1) {
  8. stop("Data needs to be between 0 and 1")
  9. }
  10. I <- I * 256
  11. num_bins <- 256
  12. ## We specify a vector for the break points from 0 to num_bins.
  13. ## For num_bins=256, this produces the following bins:
  14. ## [0, 1], (1, 2], (2, 3], .... (255,256]
  15. ## note the [ on the first bin due to include.lowest = TRUE
  16. counts <- hist(I, 0:num_bins, plot = FALSE)$counts # NEW! OLD WAS: counts <- hist(I, num_bins, plot = FALSE)$counts
  17. # Variables names are chosen to be similar to the formulas in the Otsu paper.
  18. p <- counts / sum(counts)
  19. omega <- cumsum(p)
  20. mu <- cumsum(p * (1:num_bins))
  21. mu_t <- mu[length(mu)]
  22. sigma_b_squared <- (mu_t * omega - mu) ^ 2 / (omega * (1 - omega))
  23. sigma_b_squared[is.na(sigma_b_squared)] <- 0
  24. maxval <- max(sigma_b_squared)
  25. isfinite_maxval <- is.finite(maxval)
  26. if (isfinite_maxval) {
  27. idx <- mean(which(sigma_b_squared == maxval))
  28. # Normalize the threshold to the range [0, 1].
  29. level <- (idx - 1) / (num_bins - 1)
  30. } else {
  31. level <- 0.0
  32. }
  33. level
  34. }
  35. # convert the spike list to a matrix, probably not necessary anymore
  36. # but just to be consistent with matlab for now
  37. .nb_prepare <- function(s) {
  38. spikes <- s$spikes
  39. colnames <- names(spikes)
  40. slength <- sapply(spikes, length)
  41. max_elements <- max(slength)
  42. data <- matrix(0, length(colnames), max_elements)
  43. for (i in 1:length(colnames)) {
  44. data[i, 1:slength[[i]]] <- spikes[[i]]
  45. }
  46. rownames(data) <- colnames
  47. data <- t(data)
  48. data
  49. }
  50. # generate binned data for the well
  51. .nb_select_and_bin <- function(data, well, sbegin, send, bin) {
  52. well_data <- data[, grep(well, colnames(data))]
  53. well_data[well_data < sbegin | well_data > send] <- 0
  54. if (is.null(dim(well_data))) {
  55. dim(well_data) <- c(length(well_data), 1)
  56. }
  57. if (length(well_data) > 0) {
  58. well_data_postive <- well_data[well_data > 0]
  59. if (all(is.na(well_data_postive))) {
  60. to_add_back <- 0
  61. } else {
  62. to_add_back <- min(well_data[well_data > 0])
  63. }
  64. } else {
  65. to_add_back <- 0
  66. }
  67. well_data <- well_data[rowSums(well_data) > 0, ]
  68. well_data <- round(well_data * 12500) # Neuro Group MOD MCS: change 12500 to your sampling freq (original: round(well_data * 12500))
  69. temp <- well_data[well_data > 0]
  70. if (length(temp) > 0) {
  71. t_min <- min(temp)
  72. t_max <- max(temp)
  73. well_data <- well_data - t_min + 1
  74. range <- (t_max - t_min + 1)
  75. bins <- floor(range / bin)
  76. if (range %% bin > 0) {
  77. bins <- bins + 1
  78. }
  79. x <- (0:bins) * bin + 1
  80. y <- c(- Inf, x, max(x) + bin, + Inf)
  81. if (is.null(dim(well_data))) {
  82. dim(well_data) <- c(length(well_data), 1)
  83. }
  84. well_data_new <- matrix(0, bins + 1, dim(well_data)[2])
  85. for (i in 1: dim(well_data)[2]) {
  86. temp <- hist(well_data[, i], breaks = y, plot = FALSE)$counts
  87. temp <- temp[2:(length(temp) - 1)]
  88. well_data_new[, i] <- temp
  89. }
  90. well_data_new[well_data_new > 1] <- 1
  91. } else {
  92. well_data_new <- temp
  93. }
  94. list(well_data_new, to_add_back)
  95. }
  96. .nb_gaussian_filter <- function(sigma, well_data, min_electrodes) {
  97. sigma_input <- sigma
  98. if (length(well_data) == 0) {
  99. f2 <- numeric()
  100. } else {
  101. # a very flat filter. Consider a much shapper one later
  102. if (sigma %% 2 == 0) {
  103. sigma <- sigma + 1
  104. }
  105. filter_size <- sigma
  106. half_size <- (filter_size - 1) / 2
  107. x <- seq(- half_size, half_size, length = filter_size)
  108. gauss_filter <- exp(- x ^ 2 / (2 * sigma ^ 2))
  109. gauss_filter <- gauss_filter / sum(gauss_filter)
  110. f <- well_data
  111. if (length(which(apply(well_data, 2, max) > 0)) >= min_electrodes) {
  112. for (i in 1:dim(f)[2]) {
  113. if (max(well_data[, i]) > 0) {
  114. f[, i] <- filter(well_data[, i], gauss_filter, circular = TRUE)
  115. f[, i] <- f[, i] / max(f[, i])
  116. }
  117. }
  118. f1 <- rowSums(f)
  119. f1 <- f1 / max(f1)
  120. f2 <- filter(f1, gauss_filter, circular = TRUE)
  121. f2 <- f2 / max(f2)
  122. dim(f2) <- c(length(f2), 1)
  123. colnames(f2) <- sigma_input
  124. } else {
  125. f2 <- numeric()
  126. }
  127. }
  128. f2
  129. }
  130. .nb_get_spike_stats <- function(well_data, timespan) {
  131. stat <- matrix(0, 1, 3)
  132. if (length(well_data) > 0) {
  133. # number of active electrodes
  134. stat[1] <- length(which(apply(well_data, 2, max) > 0))
  135. # well level mfr
  136. stat[2] <- sum(well_data) / timespan
  137. # mfr by active electrodes
  138. if (stat[1] > 0) {
  139. stat[3] <- stat[2] / stat[1]
  140. }
  141. }
  142. colnames(stat) <- c("n_active_electrodes",
  143. "mfr_per_well", "mfr_per_active_electode")
  144. stat
  145. }
  146. .nb_get_burst_stats_intensities_mod <-
  147. function(well_data, f, timespan, bin_time, min_electrodes,
  148. sbegin, send, offset2=0, bin_size=0.00064) {
  149. # bin_size is set to 0.002 based on sample rate of 12500/s
  150. # Neuro Group MOD: bin_size is set to 0.00064 s based on sample rate of 12500/s - three occasions within this function
  151. n <- length(f)
  152. stat <- matrix(NA, 11, n)
  153. #rownames(stat) <- c("mean.NB.time.per.sec",
  154. # "total.spikes.in.all.NBs",
  155. # "percent.of.spikes.in.NBs",
  156. # "spike.intensity",
  157. # "spike.intensity.by.aEs",
  158. # "total.spikes.in.all.NBs,nNB",
  159. # "[total.spikes.in.all.NBs,nNB],nae",
  160. # "mean[spikes.in.NB,nae]",
  161. # "mean[spikes.in.NB,nae,NB.duration]",
  162. # "mean[spikes.in.NB]",
  163. # "total.number.of.NBs")
  164. rownames(stat) <- c("mean_NB_time_per_sec",
  165. "total_spikes_in_all_NBs",
  166. "percent_of_spikes_in_NBs",
  167. "spike_intensity",
  168. "spike_intensity_by_aEs",
  169. "total_spikes_in_all_NBs_d_nNB",
  170. "total_spikes_in_all_NBs_d_nNB_d_nae",
  171. "mean_spikes_in_NB_d_nae",
  172. "mean_spikes_in_NB_d_nae_d_NB_duration",
  173. "mean_spikes_in_NB",
  174. "total_number_of_NBs")
  175. colnames(stat) <- rep("NA", n)
  176. for (current in 1:n) {
  177. # not all have names
  178. if (length(f[[current]] > 0)) {
  179. colnames(stat)[current] <- colnames(f[[current]]) # not all have names
  180. }
  181. }
  182. nb_times <- list()
  183. length(nb_times) <- length(f)
  184. names(nb_times) <- names(f);
  185. for (current in 1:n) {
  186. temp <- f[[current]]
  187. if (length(temp) > 0) {
  188. n_active_electrodes <- length(which(apply(well_data, 2, max) > 0))
  189. temp[temp < 0] <- 0
  190. level <- .graythresh(temp)
  191. # get timestamps that are identified as part of network bursts
  192. indicator0 <- temp >= level
  193. if (min_electrodes > 0) {
  194. indicator <- c(0, indicator0, 0) # prepare for finding intervals
  195. diff_indicator <- diff(indicator)
  196. burst_start <- which(diff_indicator == 1)
  197. burst_end <- which(diff_indicator == - 1)
  198. if (length(burst_start) != length(burst_end)) {
  199. stop("Burst region no match")
  200. }
  201. ## filtered regions based on minimum number of active electrodes
  202. for (region in 1: length(burst_start)) {
  203. current_region_nae <-
  204. length(which(colSums(well_data[
  205. (burst_start[region]):(burst_end[region] - 1), ]) > 0))
  206. if (current_region_nae < min_electrodes) {
  207. indicator0[burst_start[region]:(burst_end[region] - 1)] <- 0
  208. }
  209. }
  210. }
  211. indicator <- c(0, indicator0, 0) # prepare for finding intervals
  212. diff_indicator <- diff(indicator)
  213. burst_start <- which(diff_indicator == 1)
  214. burst_end <- which(diff_indicator == - 1)
  215. if (length(burst_start) != length(burst_end)) {
  216. stop("Burst region no match")
  217. }
  218. nb_times_temp <- cbind.data.frame(burst_start, burst_end)
  219. nb_times_temp$start_t <- burst_start * 0.00064 + offset2 # Neuro Group MOD: 0.002 changed to 0.00064 (original: burst_start * 0.002 + offset2)
  220. nb_times_temp$end_t <- burst_end * 0.00064 + offset2 # Neuro Group MOD: 0.002 changed to 0.00064 (original: burst_end * 0.002 + offset2)
  221. nb_times[[current]] <- nb_times_temp
  222. # mean network burst time per second
  223. stat[1, current] <- sum(indicator0) * bin_time / timespan
  224. totalspikes <- rowSums(well_data)
  225. spikes_in_network_bursts <- sum(totalspikes * indicator0)
  226. stat[2, current] <- spikes_in_network_bursts
  227. # percentage of spikes in network bursts
  228. stat[3, current] <- stat[2, current] / sum(totalspikes)
  229. total_burst_regions <- sum(burst_end - burst_start)
  230. stat[4, current] <- spikes_in_network_bursts /
  231. total_burst_regions # overall Spike Intensity
  232. stat[5, current] <- stat[4, current] / n_active_electrodes
  233. stat[6, current] <- spikes_in_network_bursts / length(burst_start)
  234. stat[7, current] <- stat[6, current] / n_active_electrodes
  235. burst_region_length <- burst_end - burst_start
  236. total_spikes_per_active_electrode <- totalspikes / n_active_electrodes
  237. spikes_in_region_per_active_electrode <- matrix(0, length(burst_start), 2)
  238. for (region in 1: length(burst_start)) {
  239. spikes_in_region_per_active_electrode[region, 1] <-
  240. sum(total_spikes_per_active_electrode[
  241. burst_start[region]:(burst_end[region] - 1)])
  242. # normalized to HZ per active electrode within burst region
  243. spikes_in_region_per_active_electrode[region, 2] <-
  244. spikes_in_region_per_active_electrode[region, 1] /
  245. (burst_region_length[region] * bin_time)
  246. }
  247. stat[8, current] <- mean(spikes_in_region_per_active_electrode[, 1])
  248. stat[9, current] <- mean(spikes_in_region_per_active_electrode[, 2])
  249. stat[10, current] <- stat[8, current] * n_active_electrodes
  250. stat[11, current] <- length(burst_start)
  251. }
  252. }
  253. list(stat, nb_times)
  254. }
  255. # do not change bin and sf for MEA recordings, skip is in seconds (except with Neuro Group data)
  256. # Neuro Group MOD: bin changed from 25 to 8 - bin = bin_time(s)*sf
  257. # Neuro Group MOD MCS: additionally change sf (sampling freq) according to data
  258. .nb_extract_features_mod <- function(s, sigmas,
  259. min_electrodes,
  260. local_region_min_nae,
  261. duration = 0,
  262. bin = 8,
  263. sf = 12500,
  264. skip = 10) {
  265. df.spikes <- .nb_prepare(s)
  266. wells <- unique(substr(colnames(df.spikes), 1, 2))
  267. output <- list()
  268. sbegin <- floor(min(df.spikes[df.spikes > 0] / skip)) * skip + skip
  269. send <- floor(max(df.spikes[df.spikes > 0] / skip)) * skip
  270. timespan <- send - sbegin
  271. # bin: 25; #2ms, this is a parameter based on our recording
  272. # Neuro Group: bin: 8; 0.64ms based on our spike detection
  273. bin_time <- bin / sf
  274. nb_times <- list()
  275. length(nb_times) <- length(wells)
  276. names(nb_times) <- wells
  277. cat(paste0("calculating network bursts for recording ",
  278. basename(s$file), "\n"))
  279. for (well.index in 1: length(wells)) {
  280. well <- wells[well.index]
  281. temp_return <- .nb_select_and_bin(df.spikes, well, sbegin, send, bin)
  282. offset2 <- temp_return[[2]]
  283. well_data <- temp_return[[1]]
  284. f <- list()
  285. for (i in 1: length(sigmas)) {
  286. f[[i]] <- .nb_gaussian_filter(sigmas[i], well_data, min_electrodes)
  287. }
  288. names(f) <- sigmas
  289. stat0 <- .nb_get_spike_stats(well_data, timespan)
  290. res1 <- .nb_get_burst_stats_intensities_mod(well_data,
  291. f, timespan, bin_time, local_region_min_nae,
  292. sbegin, send, offset2)
  293. stat1 <- res1[[1]]
  294. nb_times[[well.index]] <- res1[[2]]
  295. output[[well.index]] <- list()
  296. output[[well.index]]$stat0 <- stat0
  297. output[[well.index]]$stat1 <- stat1
  298. output[[well.index]]$well <- well
  299. }
  300. list(data = output,
  301. div = unlist(strsplit(unlist(
  302. strsplit(basename(s$file), "_"))[4], "[.]"))[1],
  303. nb_times = nb_times)
  304. }
  305. .nb_merge_result <- function(s, result, sigmas) {
  306. wells <- character()
  307. divs <- character()
  308. phenotypes <- character()
  309. data_all <- numeric()
  310. w_fun <- function(x) {
  311. x$well
  312. }
  313. for (i in 1: length(result)) {
  314. current <- result[[i]]
  315. current_wells <- sapply(current$data, w_fun)
  316. divs <- c(divs, rep(current$div, length(current_wells)))
  317. wells <- c(wells, current_wells)
  318. phenotypes <- c(phenotypes, s[[i]]$treatment[current_wells])
  319. data <- matrix(0, length(current_wells),
  320. length(c(as.vector(current$data[[1]]$stat1),
  321. as.vector(current$data[[1]]$stat0))))
  322. for (j in 1:length(current_wells)) {
  323. data[j, ] <- c(as.vector(current$data[[j]]$stat1),
  324. as.vector(current$data[[j]]$stat0))
  325. }
  326. data_all <- rbind(data_all, data)
  327. }
  328. feature_names <- character()
  329. for (i in 1:length(sigmas)) {
  330. feature_names <- c(feature_names,
  331. paste(rownames(current$data[[1]]$stat1), sigmas[i], sep = "_"))
  332. }
  333. feature_names <- c(feature_names, colnames(current$data[[1]]$stat0))
  334. df <- data.frame(divs, wells, phenotypes, data_all)
  335. names(df)[4:dim(df)[2]] <- feature_names
  336. divs <- sapply(df["divs"], as.character)
  337. df <- df[mixedorder(divs), ]
  338. result <- list() # return the result as matrix and un-altered feature names
  339. result$df <- df
  340. result$feature_names <- feature_names
  341. result
  342. }
  343. nb_matrix_to_feature_dfs <- function(matrix_and_feature_names) {
  344. data <- matrix_and_feature_names$df
  345. feature_names <- matrix_and_feature_names$feature_names
  346. data <- data.frame(lapply(data, as.character), stringsAsFactors = FALSE)
  347. n_features <- dim(data)[2] - 3 # escape the first columns
  348. dfs <- list()
  349. well_stat <- table(data[, "wells"])
  350. ref_matrix <- matrix(NA, length(well_stat), length(unique(data[, 1])))
  351. wells <- unique(data[, c("wells", "phenotypes")])
  352. rownames(ref_matrix) <- wells[, "wells"]
  353. colnames(ref_matrix) <- unique(data[, 1])
  354. # now change columnnames to match Ryan's code
  355. colnames(wells) <- c("well", "treatment")
  356. n <- dim(data)[1]
  357. for (index in 1:n_features) {
  358. data.matrix <- ref_matrix
  359. for (i in 1:n) {
  360. data.matrix[data[i, "wells"], data[i, "divs"]] <- data[i, index + 3]
  361. }
  362. dfs[[index]] <- .sort_df(cbind(wells, data.matrix))
  363. # [] keep it as a data.frame
  364. dfs[[index]][] <- lapply(dfs[[index]], as.character)
  365. }
  366. names(dfs) <- feature_names
  367. dfs
  368. }
  369. .wilcox_test_perm <- function(data, np, g1, g2, feature_index) {
  370. # now figure out the p from data
  371. d1 <- data[data[, "phenotypes"] == g1, feature_index]
  372. d1 <- d1[d1 >= 0]
  373. d2 <- data[data[, "phenotypes"] == g2, feature_index]
  374. d2 <- d2[d2 >= 0]
  375. suppressWarnings(data_p <- wilcox.test(d1, d2)$p.value)
  376. # subsetting data to genotypes and feature,
  377. #and also reformat into matrix for easy permutation
  378. d <- data[(data[, "phenotypes"] == g1) |
  379. (data[, "phenotypes"] == g2), c(2, feature_index)]
  380. d[, "wells"] <- factor(d[, "wells"]) # drop unused levels
  381. well_stat <- table(d[, "wells"])
  382. data.matrix <- matrix(NA, length(well_stat), max(well_stat))
  383. rownames(data.matrix) <- names(well_stat)
  384. n_cases <- length(unique(data[data[, "phenotypes"] == g1, "wells"]))
  385. n <- dim(data.matrix)[1]
  386. for (i in 1:n) {
  387. temp <- d[d[, "wells"] == rownames(data.matrix)[i], 2]
  388. data.matrix[i, 1:length(temp)] <- temp
  389. }
  390. outp <- matrix(0, np, 1)
  391. for (i in 1:np) {
  392. cases <- sample(n, n_cases)
  393. d1 <- as.vector(data.matrix[cases, ])
  394. d1 <- d1[d1 >= 0]
  395. d2 <- as.vector(data.matrix[- cases, ])
  396. d2 <- d2[d2 >= 0]
  397. suppressWarnings(outp[i] <- wilcox.test(d1, d2)$p.value)
  398. }
  399. outp <- sort(outp)
  400. perm_p <- length(which(outp < data_p)) / np
  401. list(perm_p = perm_p, outp = outp)
  402. }
  403. calculate_network_bursts_mod <-
  404. function(s, sigmas, min_electrodes, local_region_min_nae) {
  405. # extract features and merge features
  406. # from different recordings into one data frame
  407. nb_structure <- list()
  408. nb_structure$summary <- list()
  409. nb_structure$nb_all <- list()
  410. nb_structure$nb_features <- list()
  411. if (length(s) > 0) {
  412. features_extracted_all_div <- list()
  413. for (i in 1:length(s)) {
  414. features_extracted_all_div[[i]] <-
  415. .nb_extract_features_mod(s[[i]],
  416. sigmas, min_electrodes, local_region_min_nae)
  417. features_extracted_one_div <- list()
  418. features_extracted_one_div[[1]] <- features_extracted_all_div[[i]]
  419. nb_structure$nb_all[[i]] <- features_extracted_all_div[[i]]$nb_times
  420. nb_structure$result[[i]] <- features_extracted_all_div[[i]]
  421. nb_structure$nb_features[[i]] <-
  422. .nb_merge_result(s, features_extracted_one_div, sigmas)
  423. }
  424. nb_structure$nb_features_merged <-
  425. .nb_merge_result(s, nb_structure$result, sigmas)
  426. }
  427. nb_structure
  428. }
  429. .sort_df <- function(df) {
  430. # natural order sorting
  431. df_divs <- df[3:ncol(df)]
  432. df_sorted <- df_divs[, mixedorder(names(df_divs)), drop = FALSE]
  433. df_sorted <- cbind(treatment = df$treatment, df_sorted)
  434. df_sorted <- cbind(well = df$well, df_sorted)
  435. return(df_sorted)
  436. }