aggregate_features_algorithm_LAa.R 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. # Laura Aarnos 190213
  2. # meaRtools: aggegate features (burst)
  3. # original functions from meaRtools 2018, modification made by LAa and marked with _mod
  4. ###############################################################################
  5. # Purpose: Functions for aggregating data from s objects into single #
  6. # dataframes per feature #
  7. # Author: Ryan Dhindsa #
  8. ###############################################################################
  9. # These functions take data from S object to make dataframes
  10. .write_spike_summary <- function(s) {
  11. # Creates a list of dataframes for spike features. Each df corresponds to
  12. # a single DIV and contains values for each feature
  13. #
  14. # Args:
  15. # s object
  16. #
  17. # Returns:
  18. # list of data frames containing spike data
  19. # initiate empty list to store dataframes
  20. divs_df <- list()
  21. # loop through DIVs in s object and create 1 df per DIV.
  22. # Each df gets stored in divs_df
  23. for (i in 1:length(s)) {
  24. div <- paste("div", .get_div(s[[i]]), sep = "")
  25. df <- .spike_summary_by_well(s[[i]])
  26. df <- cbind(rownames(df), df)
  27. df <- as.data.frame(unclass(df)) # convert strings to factors
  28. colnames(df)[1] <- "well"
  29. divs_df[[div]] <- df
  30. }
  31. divs_df <- do.call(rbind, lapply(names(divs_df),
  32. function(x) cbind(div = x, divs_df[[x]])))
  33. return(divs_df)
  34. }
  35. .compile_ns <- function(s, nspikes) {
  36. # Calculates network spikes
  37. # Called from .write_network_spike_summary
  38. active_wells <- .active_wells_network_spikes(nspikes)$ns_all
  39. if (length(active_wells) > 0) {
  40. newcol <- 3
  41. # 2 to peak_min and peak_max
  42. p <- length(active_wells[[1]]$brief) +
  43. length(active_wells[[1]]$mean) + newcol
  44. nsdata <- matrix(0, length(s$well), p)
  45. temp <- c()
  46. length_temp_mean <- length(active_wells[[1]]$mean)
  47. for (j in 1:length(s$well)) {
  48. cur_well <- s$well[j]
  49. if (is.element(cur_well, names(active_wells))){
  50. temp <- active_wells[[cur_well]]
  51. nsdata[j, 1:length(temp$brief)] <- temp$brief
  52. nsdata[j, length(temp$brief) + 1] <- min(temp$measures[, "peak_val"])
  53. nsdata[j, length(temp$brief) + 2] <- max(temp$measures[, "peak_val"])
  54. nsdata[j, length(temp$brief) + 3] <- s$treatment[cur_well]
  55. nsdata[j, (length(temp$brief) + newcol + 1):p] <- as.double(temp$mean)
  56. } else {
  57. temp$brief <- c(0, rep(NA, 4), 0, NA, NA)
  58. nsdata[j, 1:length(temp$brief)] <- temp$brief
  59. nsdata[j, length(temp$brief) + 1] <- NA
  60. nsdata[j, length(temp$brief) + 2] <- NA
  61. nsdata[j, length(temp$brief) + 3] <- NA
  62. nsdata[j, (length(temp$brief) + newcol + 1):p] <-
  63. rep(0, length_temp_mean)
  64. }
  65. }
  66. nsdata <- data.frame(nsdata)
  67. names(nsdata)[1:length(temp$brief)] <- names(active_wells[[1]]$brief)
  68. names(nsdata)[(length(temp$brief) + 1):(length(temp$brief) + newcol)] <-
  69. c("peak_min", "peak_max", "treatment")
  70. for (j in 1:(p - length(temp$brief) - newcol)) {
  71. names(nsdata)[j + newcol + length(temp$brief)] <- paste("t", j, sep = "")
  72. }
  73. nsdata <- cbind(s$well, nsdata)
  74. names(nsdata)[1] <- "well"
  75. return(nsdata)
  76. }
  77. }
  78. .write_network_spike_summary <- function(s, parameters) {
  79. # Creates a list of dataframes for network spike features.
  80. # Each df corresponds to a single DIV and contains values for each feature
  81. #
  82. # Args:
  83. # s object
  84. #
  85. # Returns:
  86. # list of data frames containing spike data
  87. # initiate empty list to store dataframes
  88. divs_df <- list()
  89. # calculate network spikes
  90. for (i in 1:length(s)) {
  91. div <- paste("div", .get_div(s[[i]]), sep = "")
  92. nspikes_old <- calculate_network_spikes(s[[i]], parameters$sur,
  93. parameters$ns_n, parameters$ns_t)
  94. nspikes <- summarize_network_spikes(s[[i]], nspikes_old,
  95. ns_e = 1, parameters$sur)
  96. basename <- strsplit(basename(s[[i]]$file), "[.]")[[1]][1]
  97. df <- .compile_ns(s[[i]], nspikes)
  98. df <- as.data.frame(unclass(df)) # convert strings to factors
  99. df <- df[, - grep("^t[0-9]", colnames(df))]
  100. divs_df[[div]] <- df
  101. }
  102. for (name in names(divs_df)) {
  103. # If no data for DIV - remove the DIV
  104. if (length(divs_df[[name]]) == 0) {
  105. divs_df[[name]] <- NULL
  106. }
  107. }
  108. divs_df <- do.call(rbind, lapply(names(divs_df),
  109. function(x) cbind(div = x, divs_df[[x]])))
  110. return(divs_df)
  111. }
  112. .write_burst_summary_mod <- function(s) {
  113. # Creates a list of dataframes for bursting features. Each df corresponds to
  114. # a single DIV and contains values for each feature
  115. #
  116. # Args:
  117. # s object
  118. #
  119. # Returns:
  120. # list of data frames containing spike data
  121. master_sum <- .get_mean_burst_info_per_well_mod(s)
  122. divs_df <- list()
  123. for (i in 1:length(s)) {
  124. div <- paste("div", .get_div(s[[i]]), sep = "")
  125. ########## data frame summarized over well
  126. # get number of object in master_sum[[1]] list
  127. tempdf <- c(); tempcolnames <- c()
  128. for (j in 2:length(master_sum[[i]])) {
  129. tempc <- unlist(master_sum[[i]][j])
  130. tempdf <- cbind(tempdf, tempc)
  131. tempcolnames <- c(tempcolnames, names(master_sum[[i]][j]))
  132. } # end of loop through master_sum list objects
  133. # need to switch around columns so first columns come first
  134. if (dim(tempdf)[2] > 20) {
  135. if (dim(tempdf)[1] == 1) {
  136. df <- cbind(t(tempdf[, 21:25]), t(tempdf[, 1:20]))
  137. } else {
  138. df <- cbind(tempdf[, 21:25], tempdf[, 1:20])
  139. }
  140. colnames <- c(tempcolnames[21:25], tempcolnames[1:20])
  141. colnames(df) <- colnames
  142. }
  143. df <- as.data.frame(unclass(df)) # convert strings to factors
  144. df$file <- NULL
  145. row.names(df) <- df$well
  146. divs_df[[div]] <- df
  147. }
  148. divs_df <- do.call(rbind, lapply(names(divs_df),
  149. function(x) cbind(div = x, divs_df[[x]])))
  150. return(divs_df)
  151. }
  152. .create_feat_df <- function(s, df, feature, all_feat_list) {
  153. df <- df[!is.na(df$treatment) & df$treatment != "", ]
  154. x <- data.frame(df$div, df$well, df$treatment, df[, feature])
  155. colnames(x) <- c("div", "well", "treatment", feature)
  156. y <- dcast(x, well~div, value.var = feature)
  157. well_to_treatment <- x[!duplicated(x$well), c("well", "treatment")]
  158. # reorder in case early wells show up in later DIVs
  159. well_to_treatment <-
  160. well_to_treatment[order(as.character(well_to_treatment$well)), ]
  161. ymerged <- merge(x = y, y = well_to_treatment, by = c("well"), all.x = TRUE)
  162. ymerged <- ymerged[order(as.character(ymerged$well)), ]
  163. y <- ymerged[c(1, dim(ymerged)[2], 2:(dim(ymerged)[2] - 1))]
  164. y <- .sort_df(y)
  165. return(y)
  166. }
  167. .sort_df <- function(df) {
  168. # natural order sorting
  169. df_divs <- df[3:ncol(df)]
  170. df_sorted <- df_divs[, mixedorder(names(df_divs)), drop = FALSE]
  171. df_sorted <- cbind(treatment = df$treatment, df_sorted)
  172. df_sorted <- cbind(well = df$well, df_sorted)
  173. return(df_sorted)
  174. }
  175. aggregate_features_mod <- function(s, feat_type, parameters=list()) {
  176. # Takes in s object and creates a dataframe for each feature.
  177. # based on the feature type (spikes, ns, etc), it calls appropriate
  178. # function (e.g. .write_spike_summary if feat_type is "spikes")
  179. #
  180. # Args:
  181. # s object
  182. # feat_type = "spike", "ns", or "burst"
  183. #
  184. # Returns:
  185. # list of data frames (one df per feature)
  186. # write feature summaries (calls xxx.summary.by.well from meaRtools)
  187. if (feat_type == "spike") {
  188. divs_df <- .write_spike_summary(s)
  189. } else if (feat_type == "ns"){
  190. divs_df <- .write_network_spike_summary(s, parameters)
  191. } else if (feat_type == "burst") {
  192. divs_df <- .write_burst_summary_mod(s)
  193. divs_df$size <- NULL;
  194. divs_df$dose <- NULL;
  195. }
  196. all_features <- list()
  197. if (!is.null(divs_df)){
  198. # create list of dataframes (one dataframe per feature)
  199. feature_names <- colnames(divs_df)
  200. remove <- c("div", "treatment", "well")
  201. feature_names <- setdiff(feature_names, remove)
  202. # test
  203. for (i in 1:length(feature_names)) {
  204. df <- .create_feat_df(s, divs_df, feature_names[i], all_features)
  205. all_features[[feature_names[i]]] <- df
  206. }
  207. } else {
  208. all_features <- NULL
  209. }
  210. return(all_features)
  211. }
  212. .get_mean_burst_info_per_well_mod <- function(s) {
  213. master_sum <- list() # summary over all files
  214. for (i in 1:length(s)) {
  215. sum <- list() # summary for each timepoint
  216. # calculate bursting variables for current data File
  217. nbursts <- sapply(s[[i]]$allb, nrow)
  218. allb <- s[[i]]$allb
  219. tempsum <- calc_burst_summary(s[[i]])
  220. ###### NEW PART 1/3 ######
  221. for (t in 1:nrow(tempsum)) {
  222. if (tempsum[t,'nbursts'] == 0) {
  223. tempsum[t,'bursts_per_sec'] <- NA
  224. tempsum[t,'bursts_per_min'] <- NA
  225. tempsum[t,'mean_dur'] <- NA
  226. tempsum[t,'mean_spikes'] <- NA
  227. }
  228. }
  229. ###### NEW PART 1/3 ENDS ######
  230. # isis: gets the ISI for each channel of s[[i]]
  231. isis <- .calc_all_isi(s[[i]], allb)
  232. # IBIs get IBI's across all inter burst intervals across all data
  233. temp_ibis <- .calc_all_ibi(s[[i]], allb)
  234. # loop through goodwells
  235. for (j in 1:length(s[[i]]$goodwells)) {
  236. # indicator of current well
  237. icurrentwell <- (s[[i]]$goodwells[j] == s[[i]]$cw)
  238. # index of current well
  239. incurrentwell <- which(s[[i]]$goodwells[j] == s[[i]]$cw)
  240. if (sum(icurrentwell) != 0){
  241. ##### variables that need summing and averaging
  242. # total spikes across all AE in current well
  243. sum$nspikes[j] <- sum(tempsum$spikes[icurrentwell], na.rm = TRUE)
  244. sum$nAB[j] <- length(which(nbursts[incurrentwell] > 0))
  245. # Total recorded time on current well= recording time * nae
  246. sum$duration[j] <-
  247. length(incurrentwell) * (s[[i]]$rec_time[2] - s[[i]]$rec_time[1])
  248. # mean duration
  249. sum$mean_dur[j] <- mean(tempsum$mean_dur[incurrentwell], na.rm = TRUE)
  250. # mean spikes per second
  251. sum$mean_freq[j] <- mean(tempsum$mean_freq[incurrentwell], na.rm = TRUE)
  252. # total number of bursts
  253. sum$nbursts[j] <- sum(tempsum$nbursts[icurrentwell], na.rm = TRUE)
  254. # mean burst per second
  255. sum$bursts_per_sec[j] <- mean(tempsum$bursts_per_sec[incurrentwell], na.rm = TRUE)
  256. # mean burst per minute
  257. sum$bursts_per_min[j] <- sum$bursts_per_sec[j] * 60
  258. # finds the mean of duration for a particular well (across all channels)
  259. # get_burst_info(allb[icurrentwell],"durn")
  260. # takes out the column "durn" of all
  261. # matricies allb among the indicator set icurrentwell
  262. # get duration data across all channels of current well
  263. #sum$mean_dur[j] <-
  264. # mean(unlist(get_burst_info(allb[icurrentwell], "durn")), na.rm = TRUE)
  265. ######## NEW PART 2/3 #########
  266. my.dur <- unlist(get_burst_info(allb[icurrentwell], "durn"))
  267. my.len <- unlist(get_burst_info(allb[incurrentwell], "len"))
  268. for (m in 1:length(my.dur)) {
  269. if (my.dur[m] == 0) {
  270. my.dur[m] <- NA
  271. }
  272. }
  273. for (n in 1:length(my.len)) {
  274. if (my.len[n] == 0) {
  275. my.len[n] <- NA
  276. }
  277. }
  278. # get duration data across all channels of current well
  279. sum$mean_dur[j] <-
  280. mean(my.dur, na.rm = TRUE)
  281. # sd of current well burst durations
  282. sum$sd_dur[j] <- sd(my.dur, na.rm = TRUE)
  283. # mean frequency within a burst
  284. sum$mean_freq_in_burst[j] <-
  285. mean(my.len /
  286. my.dur, na.rm = TRUE)
  287. # sd frequency within a burst
  288. sum$sd_freq_in_burst[j] <-
  289. sd(my.len /
  290. my.dur, na.rm = TRUE)
  291. ######## NEW PART 2/3 ENDS ########
  292. # sd of current well burst durations
  293. #sum$sd_dur[j] <- sd(unlist(get_burst_info(allb[icurrentwell], "durn")))
  294. # mean frequency within a burst
  295. #sum$mean_freq_in_burst[j] <-
  296. # mean(unlist(get_burst_info(allb[incurrentwell], "len")) /
  297. # unlist(get_burst_info(allb[incurrentwell], "durn")), na.rm = TRUE)
  298. # sd frequency within a burst
  299. #sum$sd_freq_in_burst[j] <-
  300. # sd(unlist(get_burst_info(allb[incurrentwell], "len")) /
  301. # unlist(get_burst_info(allb[incurrentwell], "durn")), na.rm = TRUE)
  302. # mean of ISI across all channels in current well
  303. sum$mean_isis[j] <- mean(unlist(isis[incurrentwell]), na.rm = TRUE)
  304. # finds sd of ISI across all channels in current well
  305. sum$sd_isis[j] <- sd(unlist(isis[incurrentwell]), na.rm = TRUE)
  306. ######## NEW PART 3/3 ########
  307. # mean spikes in burst
  308. sum$mean_spikes_in_burst[j] <- round(mean(my.len, na.rm = TRUE), 3)
  309. # sd of spikes in burst
  310. sum$sd_spikes_in_burst[j] <- round(sd(my.len, na.rm = TRUE), 3)
  311. # total number of spikes arcross all bursts
  312. sum$total_spikes_in_burst[j] <- sum(unlist(get_burst_info(allb[incurrentwell], "len")), na.rm = TRUE)
  313. ######## NEW PART 3/3 ENDS ########
  314. # len=#spikes in burst (length of burst in bursts)
  315. # mean_spikes_in_burst
  316. #ns <- unlist(get_burst_info(allb[icurrentwell], "len"))
  317. #sum$mean_spikes_in_burst[j] <- round(mean(ns, na.rm = TRUE), 3)
  318. # sd of spikes in burst
  319. #sum$sd_spikes_in_burst[j] <- round(sd(ns, na.rm = TRUE), 3)
  320. # total number of spikes arcross all bursts
  321. #sum$total_spikes_in_burst[j] <- sum(ns, na.rm = TRUE)
  322. # percent of spikes in bursts
  323. sum$per_spikes_in_burst[j] <-
  324. round(100 * (sum$total_spikes_in_burst[j] / sum$nspikes[j]), 3)
  325. # mean IBI
  326. sum$mean_ibis[j] <-
  327. round(mean(unlist(temp_ibis[incurrentwell]), na.rm = TRUE), 3)
  328. # sd IBI
  329. sum$sd_ibis[j] <-
  330. round(sd(unlist(temp_ibis[incurrentwell]), na.rm = TRUE), 3)
  331. # cv IBI
  332. sum$cv_ibis[j] <- round(sum$mean_ibis[j] / sum$sd_ibis[j], 3)
  333. } else {
  334. sum$nspikes[j] <- NA
  335. sum$nAB[j] <- NA
  336. sum$duration[j] <- NA
  337. sum$mean_dur[j] <- NA
  338. sum$mean_freq[j] <- NA
  339. sum$nbursts[j] <- NA
  340. sum$bursts_per_sec[j] <- NA
  341. sum$bursts_per_min[j] <- NA
  342. sum$mean_dur[j] <- NA
  343. sum$sd_dur[j] <- NA
  344. sum$mean_freq_in_burst[j] <- NA
  345. sum$sd_freq_in_burst[j] <- NA
  346. sum$mean_isis[j] <- NA
  347. sum$sd_isis[j] <- NA
  348. sum$mean_spikes_in_burst[j] <- NA
  349. sum$sd_spikes_in_burst[j] <- NA
  350. sum$total_spikes_in_burst[j] <- NA
  351. sum$per_spikes_in_burst[j] <- NA
  352. sum$mean_ibis[j] <- NA
  353. sum$sd_ibis[j] <- NA
  354. sum$cv_ibis[j] <- NA
  355. }
  356. }
  357. ### Set all names
  358. for (k in 1:length(names(sum))) {
  359. names(sum[[k]]) <- s[[i]]$goodwells
  360. }
  361. # make a master_sum, that is a list of all the summaries
  362. goodwellindex <- which(is.element(s[[i]]$well, s[[i]]$goodwells))
  363. master_sum[[i]] <- sum
  364. master_sum[[i]]$file <- strsplit(basename(s[[i]]$file), ".RData")[[1]][1]
  365. master_sum[[i]]$treatment <- s[[i]]$treatment[goodwellindex]
  366. master_sum[[i]]$size <- s[[i]]$size[goodwellindex]
  367. master_sum[[i]]$dose <- s[[i]]$dose[goodwellindex]
  368. master_sum[[i]]$well <- s[[i]]$well[goodwellindex]
  369. master_sum[[i]]$nae <- s[[i]]$nae[goodwellindex]
  370. master_sum[[i]]$timepoint <-
  371. rep(s[[i]]$timepoint[1], length(s[[i]]$goodwells))
  372. master_sum[[i]]$start_rec_time <-
  373. rep(s[[i]]$rec_time[1], length(s[[i]]$goodwells))
  374. master_sum[[i]]$end_rec_time <-
  375. rep(s[[i]]$rec_time[2], length(s[[i]]$goodwells))
  376. master_sum[[i]]$goodwells <- s[[i]]$goodwells
  377. }
  378. master_sum
  379. }
  380. .calc_all_isi <- function(s, allb) {
  381. ## Compute ISI within bursts for all spike trains.
  382. calc_isi <- function(spikes, b) {
  383. ## for one spike train, get all isis within bursts in that train.
  384. if (.num_bursts(b) == 0) {
  385. return(NA)
  386. }
  387. ## as.vector is needed below in case each burst is of the same
  388. ## length (in which case an array is returned by apply). In
  389. ## normal cases, bursts are of different lengths, so "apply"
  390. ## returns a list.
  391. isis <- as.vector(
  392. unlist(apply(b, 1,
  393. function(x) {
  394. diff(spikes[x[1]:x[2]])
  395. })))
  396. }
  397. nchannels <- s$NCells
  398. isis <- list()
  399. for (i in 1:nchannels) {
  400. isis[[i]] <- calc_isi(s$spikes[[i]], allb[[i]])
  401. }
  402. isis
  403. }
  404. .num_bursts <- function(b) {
  405. ## Return the number of bursts found for a spike train.
  406. if (is.na(b[1]))
  407. 0
  408. else
  409. nrow(b)
  410. }
  411. .calc_all_ibi <- function(s, allb) {
  412. ## Compute IBI for all spike trains.
  413. nchannels <- s$NCells
  414. ibis <- list()
  415. for (i in 1:nchannels) {
  416. ibis[[i]] <- .calc_ibi(s$spikes[[i]], allb[[i]])
  417. }
  418. ibis
  419. }
  420. .calc_ibi <- function(spikes, b) {
  421. ## Compute the interburst intervals (IBI) for one spike train.
  422. ## Only valid if more than one burst.
  423. nburst <- .num_bursts(b)
  424. if (nburst == 0) {
  425. res <- NA # no bursts
  426. } else {
  427. if (nburst == 1) {
  428. res <- NA # cannot compute IBI w/only 1 burst.
  429. } else {
  430. ## find end spike in each burst.
  431. end <- b[, "beg"] + b[, "len"] - 1
  432. ## for n bursts, there will be n bursts minus 1 IBIs.
  433. start_spikes <- b[2:nburst, "beg"]
  434. end_spikes <- end[1:(nburst - 1)]
  435. ## NEX uses a strange definition of IBI: it counts the time between
  436. ## the first spike of n burst and the first spike of n burst
  437. ## plus one as the IBI.
  438. ## If we want to use that definition, use the following line:
  439. ## end.spikes equal b where 1 to nburst minus 1
  440. res <- spikes[start_spikes] - spikes[end_spikes]
  441. }
  442. }
  443. res
  444. }
  445. .get_div <- function(s) {
  446. div <- NA
  447. if(length(s$div>0)){
  448. div<-s$div
  449. } else {
  450. t1 <- strsplit(s$file, split = "_", fixed = TRUE)
  451. for (i in t1[[1]]) {
  452. i <- toupper(i)
  453. if (nchar(i) > 2 && substr(i, 1, 3) == "DIV") {
  454. if (nchar(i) > 5) {
  455. i <- unlist(strsplit(i, split = ".", fixed = T))[1]
  456. }
  457. div <- as.numeric(substr(i, 4, nchar(i)))
  458. }
  459. }
  460. }
  461. # set default div as 1
  462. if (is.na(div)){ div <- 1}
  463. div
  464. }