123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560 |
- # Laura Aarnos 190213
- # meaRtools: aggegate features (burst)
- # original functions from meaRtools 2018, modification made by LAa and marked with _mod
- ###############################################################################
- # Purpose: Functions for aggregating data from s objects into single #
- # dataframes per feature #
- # Author: Ryan Dhindsa #
- ###############################################################################
- # These functions take data from S object to make dataframes
- .write_spike_summary <- function(s) {
- # Creates a list of dataframes for spike features. Each df corresponds to
- # a single DIV and contains values for each feature
- #
- # Args:
- # s object
- #
- # Returns:
- # list of data frames containing spike data
-
- # initiate empty list to store dataframes
- divs_df <- list()
-
- # loop through DIVs in s object and create 1 df per DIV.
- # Each df gets stored in divs_df
- for (i in 1:length(s)) {
- div <- paste("div", .get_div(s[[i]]), sep = "")
-
- df <- .spike_summary_by_well(s[[i]])
- df <- cbind(rownames(df), df)
- df <- as.data.frame(unclass(df)) # convert strings to factors
-
- colnames(df)[1] <- "well"
- divs_df[[div]] <- df
- }
-
- divs_df <- do.call(rbind, lapply(names(divs_df),
- function(x) cbind(div = x, divs_df[[x]])))
- return(divs_df)
- }
- .compile_ns <- function(s, nspikes) {
- # Calculates network spikes
- # Called from .write_network_spike_summary
-
- active_wells <- .active_wells_network_spikes(nspikes)$ns_all
- if (length(active_wells) > 0) {
- newcol <- 3
- # 2 to peak_min and peak_max
- p <- length(active_wells[[1]]$brief) +
- length(active_wells[[1]]$mean) + newcol
- nsdata <- matrix(0, length(s$well), p)
- temp <- c()
- length_temp_mean <- length(active_wells[[1]]$mean)
- for (j in 1:length(s$well)) {
- cur_well <- s$well[j]
- if (is.element(cur_well, names(active_wells))){
- temp <- active_wells[[cur_well]]
- nsdata[j, 1:length(temp$brief)] <- temp$brief
- nsdata[j, length(temp$brief) + 1] <- min(temp$measures[, "peak_val"])
- nsdata[j, length(temp$brief) + 2] <- max(temp$measures[, "peak_val"])
- nsdata[j, length(temp$brief) + 3] <- s$treatment[cur_well]
- nsdata[j, (length(temp$brief) + newcol + 1):p] <- as.double(temp$mean)
-
- } else {
- temp$brief <- c(0, rep(NA, 4), 0, NA, NA)
- nsdata[j, 1:length(temp$brief)] <- temp$brief
- nsdata[j, length(temp$brief) + 1] <- NA
- nsdata[j, length(temp$brief) + 2] <- NA
- nsdata[j, length(temp$brief) + 3] <- NA
- nsdata[j, (length(temp$brief) + newcol + 1):p] <-
- rep(0, length_temp_mean)
- }
- }
-
- nsdata <- data.frame(nsdata)
- names(nsdata)[1:length(temp$brief)] <- names(active_wells[[1]]$brief)
- names(nsdata)[(length(temp$brief) + 1):(length(temp$brief) + newcol)] <-
- c("peak_min", "peak_max", "treatment")
-
- for (j in 1:(p - length(temp$brief) - newcol)) {
- names(nsdata)[j + newcol + length(temp$brief)] <- paste("t", j, sep = "")
- }
- nsdata <- cbind(s$well, nsdata)
- names(nsdata)[1] <- "well"
-
- return(nsdata)
- }
- }
- .write_network_spike_summary <- function(s, parameters) {
- # Creates a list of dataframes for network spike features.
- # Each df corresponds to a single DIV and contains values for each feature
- #
- # Args:
- # s object
- #
- # Returns:
- # list of data frames containing spike data
-
- # initiate empty list to store dataframes
- divs_df <- list()
-
- # calculate network spikes
- for (i in 1:length(s)) {
- div <- paste("div", .get_div(s[[i]]), sep = "")
-
- nspikes_old <- calculate_network_spikes(s[[i]], parameters$sur,
- parameters$ns_n, parameters$ns_t)
- nspikes <- summarize_network_spikes(s[[i]], nspikes_old,
- ns_e = 1, parameters$sur)
- basename <- strsplit(basename(s[[i]]$file), "[.]")[[1]][1]
-
- df <- .compile_ns(s[[i]], nspikes)
- df <- as.data.frame(unclass(df)) # convert strings to factors
-
- df <- df[, - grep("^t[0-9]", colnames(df))]
-
- divs_df[[div]] <- df
- }
-
- for (name in names(divs_df)) {
- # If no data for DIV - remove the DIV
- if (length(divs_df[[name]]) == 0) {
- divs_df[[name]] <- NULL
- }
- }
-
- divs_df <- do.call(rbind, lapply(names(divs_df),
- function(x) cbind(div = x, divs_df[[x]])))
-
- return(divs_df)
- }
- .write_burst_summary_mod <- function(s) {
- # Creates a list of dataframes for bursting features. Each df corresponds to
- # a single DIV and contains values for each feature
- #
- # Args:
- # s object
- #
- # Returns:
- # list of data frames containing spike data
-
- master_sum <- .get_mean_burst_info_per_well_mod(s)
-
- divs_df <- list()
- for (i in 1:length(s)) {
- div <- paste("div", .get_div(s[[i]]), sep = "")
-
- ########## data frame summarized over well
- # get number of object in master_sum[[1]] list
- tempdf <- c(); tempcolnames <- c()
- for (j in 2:length(master_sum[[i]])) {
- tempc <- unlist(master_sum[[i]][j])
- tempdf <- cbind(tempdf, tempc)
- tempcolnames <- c(tempcolnames, names(master_sum[[i]][j]))
- } # end of loop through master_sum list objects
-
- # need to switch around columns so first columns come first
- if (dim(tempdf)[2] > 20) {
- if (dim(tempdf)[1] == 1) {
- df <- cbind(t(tempdf[, 21:25]), t(tempdf[, 1:20]))
- } else {
- df <- cbind(tempdf[, 21:25], tempdf[, 1:20])
- }
- colnames <- c(tempcolnames[21:25], tempcolnames[1:20])
- colnames(df) <- colnames
- }
-
- df <- as.data.frame(unclass(df)) # convert strings to factors
- df$file <- NULL
- row.names(df) <- df$well
- divs_df[[div]] <- df
- }
-
- divs_df <- do.call(rbind, lapply(names(divs_df),
- function(x) cbind(div = x, divs_df[[x]])))
- return(divs_df)
- }
- .create_feat_df <- function(s, df, feature, all_feat_list) {
-
- df <- df[!is.na(df$treatment) & df$treatment != "", ]
- x <- data.frame(df$div, df$well, df$treatment, df[, feature])
- colnames(x) <- c("div", "well", "treatment", feature)
- y <- dcast(x, well~div, value.var = feature)
- well_to_treatment <- x[!duplicated(x$well), c("well", "treatment")]
- # reorder in case early wells show up in later DIVs
- well_to_treatment <-
- well_to_treatment[order(as.character(well_to_treatment$well)), ]
-
- ymerged <- merge(x = y, y = well_to_treatment, by = c("well"), all.x = TRUE)
- ymerged <- ymerged[order(as.character(ymerged$well)), ]
- y <- ymerged[c(1, dim(ymerged)[2], 2:(dim(ymerged)[2] - 1))]
-
- y <- .sort_df(y)
- return(y)
- }
- .sort_df <- function(df) {
- # natural order sorting
- df_divs <- df[3:ncol(df)]
- df_sorted <- df_divs[, mixedorder(names(df_divs)), drop = FALSE]
- df_sorted <- cbind(treatment = df$treatment, df_sorted)
- df_sorted <- cbind(well = df$well, df_sorted)
- return(df_sorted)
- }
- aggregate_features_mod <- function(s, feat_type, parameters=list()) {
-
- # Takes in s object and creates a dataframe for each feature.
- # based on the feature type (spikes, ns, etc), it calls appropriate
- # function (e.g. .write_spike_summary if feat_type is "spikes")
- #
- # Args:
- # s object
- # feat_type = "spike", "ns", or "burst"
- #
- # Returns:
- # list of data frames (one df per feature)
-
- # write feature summaries (calls xxx.summary.by.well from meaRtools)
- if (feat_type == "spike") {
- divs_df <- .write_spike_summary(s)
- } else if (feat_type == "ns"){
- divs_df <- .write_network_spike_summary(s, parameters)
- } else if (feat_type == "burst") {
- divs_df <- .write_burst_summary_mod(s)
- divs_df$size <- NULL;
- divs_df$dose <- NULL;
- }
-
- all_features <- list()
-
- if (!is.null(divs_df)){
- # create list of dataframes (one dataframe per feature)
- feature_names <- colnames(divs_df)
- remove <- c("div", "treatment", "well")
- feature_names <- setdiff(feature_names, remove)
-
- # test
-
- for (i in 1:length(feature_names)) {
- df <- .create_feat_df(s, divs_df, feature_names[i], all_features)
-
- all_features[[feature_names[i]]] <- df
- }
- } else {
- all_features <- NULL
- }
-
- return(all_features)
- }
- .get_mean_burst_info_per_well_mod <- function(s) {
- master_sum <- list() # summary over all files
- for (i in 1:length(s)) {
- sum <- list() # summary for each timepoint
- # calculate bursting variables for current data File
- nbursts <- sapply(s[[i]]$allb, nrow)
- allb <- s[[i]]$allb
- tempsum <- calc_burst_summary(s[[i]])
-
- ###### NEW PART 1/3 ######
-
- for (t in 1:nrow(tempsum)) {
- if (tempsum[t,'nbursts'] == 0) {
- tempsum[t,'bursts_per_sec'] <- NA
- tempsum[t,'bursts_per_min'] <- NA
- tempsum[t,'mean_dur'] <- NA
- tempsum[t,'mean_spikes'] <- NA
- }
- }
-
- ###### NEW PART 1/3 ENDS ######
-
- # isis: gets the ISI for each channel of s[[i]]
- isis <- .calc_all_isi(s[[i]], allb)
-
- # IBIs get IBI's across all inter burst intervals across all data
- temp_ibis <- .calc_all_ibi(s[[i]], allb)
-
- # loop through goodwells
- for (j in 1:length(s[[i]]$goodwells)) {
- # indicator of current well
- icurrentwell <- (s[[i]]$goodwells[j] == s[[i]]$cw)
-
- # index of current well
- incurrentwell <- which(s[[i]]$goodwells[j] == s[[i]]$cw)
-
- if (sum(icurrentwell) != 0){
- ##### variables that need summing and averaging
- # total spikes across all AE in current well
- sum$nspikes[j] <- sum(tempsum$spikes[icurrentwell], na.rm = TRUE)
- sum$nAB[j] <- length(which(nbursts[incurrentwell] > 0))
- # Total recorded time on current well= recording time * nae
- sum$duration[j] <-
- length(incurrentwell) * (s[[i]]$rec_time[2] - s[[i]]$rec_time[1])
-
- # mean duration
- sum$mean_dur[j] <- mean(tempsum$mean_dur[incurrentwell], na.rm = TRUE)
-
- # mean spikes per second
- sum$mean_freq[j] <- mean(tempsum$mean_freq[incurrentwell], na.rm = TRUE)
- # total number of bursts
- sum$nbursts[j] <- sum(tempsum$nbursts[icurrentwell], na.rm = TRUE)
- # mean burst per second
- sum$bursts_per_sec[j] <- mean(tempsum$bursts_per_sec[incurrentwell], na.rm = TRUE)
- # mean burst per minute
- sum$bursts_per_min[j] <- sum$bursts_per_sec[j] * 60
-
- # finds the mean of duration for a particular well (across all channels)
- # get_burst_info(allb[icurrentwell],"durn")
- # takes out the column "durn" of all
- # matricies allb among the indicator set icurrentwell
- # get duration data across all channels of current well
- #sum$mean_dur[j] <-
- # mean(unlist(get_burst_info(allb[icurrentwell], "durn")), na.rm = TRUE)
-
- ######## NEW PART 2/3 #########
-
- my.dur <- unlist(get_burst_info(allb[icurrentwell], "durn"))
- my.len <- unlist(get_burst_info(allb[incurrentwell], "len"))
- for (m in 1:length(my.dur)) {
- if (my.dur[m] == 0) {
- my.dur[m] <- NA
- }
- }
- for (n in 1:length(my.len)) {
- if (my.len[n] == 0) {
- my.len[n] <- NA
- }
- }
-
- # get duration data across all channels of current well
- sum$mean_dur[j] <-
- mean(my.dur, na.rm = TRUE)
- # sd of current well burst durations
- sum$sd_dur[j] <- sd(my.dur, na.rm = TRUE)
-
- # mean frequency within a burst
- sum$mean_freq_in_burst[j] <-
- mean(my.len /
- my.dur, na.rm = TRUE)
- # sd frequency within a burst
- sum$sd_freq_in_burst[j] <-
- sd(my.len /
- my.dur, na.rm = TRUE)
-
- ######## NEW PART 2/3 ENDS ########
-
- # sd of current well burst durations
- #sum$sd_dur[j] <- sd(unlist(get_burst_info(allb[icurrentwell], "durn")))
-
- # mean frequency within a burst
- #sum$mean_freq_in_burst[j] <-
- # mean(unlist(get_burst_info(allb[incurrentwell], "len")) /
- # unlist(get_burst_info(allb[incurrentwell], "durn")), na.rm = TRUE)
-
- # sd frequency within a burst
- #sum$sd_freq_in_burst[j] <-
- # sd(unlist(get_burst_info(allb[incurrentwell], "len")) /
- # unlist(get_burst_info(allb[incurrentwell], "durn")), na.rm = TRUE)
-
- # mean of ISI across all channels in current well
- sum$mean_isis[j] <- mean(unlist(isis[incurrentwell]), na.rm = TRUE)
-
- # finds sd of ISI across all channels in current well
- sum$sd_isis[j] <- sd(unlist(isis[incurrentwell]), na.rm = TRUE)
-
- ######## NEW PART 3/3 ########
-
- # mean spikes in burst
- sum$mean_spikes_in_burst[j] <- round(mean(my.len, na.rm = TRUE), 3)
- # sd of spikes in burst
- sum$sd_spikes_in_burst[j] <- round(sd(my.len, na.rm = TRUE), 3)
-
- # total number of spikes arcross all bursts
- sum$total_spikes_in_burst[j] <- sum(unlist(get_burst_info(allb[incurrentwell], "len")), na.rm = TRUE)
-
- ######## NEW PART 3/3 ENDS ########
-
- # len=#spikes in burst (length of burst in bursts)
- # mean_spikes_in_burst
- #ns <- unlist(get_burst_info(allb[icurrentwell], "len"))
- #sum$mean_spikes_in_burst[j] <- round(mean(ns, na.rm = TRUE), 3)
-
- # sd of spikes in burst
- #sum$sd_spikes_in_burst[j] <- round(sd(ns, na.rm = TRUE), 3)
-
- # total number of spikes arcross all bursts
- #sum$total_spikes_in_burst[j] <- sum(ns, na.rm = TRUE)
-
- # percent of spikes in bursts
- sum$per_spikes_in_burst[j] <-
- round(100 * (sum$total_spikes_in_burst[j] / sum$nspikes[j]), 3)
-
- # mean IBI
- sum$mean_ibis[j] <-
- round(mean(unlist(temp_ibis[incurrentwell]), na.rm = TRUE), 3)
- # sd IBI
- sum$sd_ibis[j] <-
- round(sd(unlist(temp_ibis[incurrentwell]), na.rm = TRUE), 3)
- # cv IBI
- sum$cv_ibis[j] <- round(sum$mean_ibis[j] / sum$sd_ibis[j], 3)
-
- } else {
- sum$nspikes[j] <- NA
- sum$nAB[j] <- NA
- sum$duration[j] <- NA
- sum$mean_dur[j] <- NA
- sum$mean_freq[j] <- NA
- sum$nbursts[j] <- NA
- sum$bursts_per_sec[j] <- NA
- sum$bursts_per_min[j] <- NA
- sum$mean_dur[j] <- NA
- sum$sd_dur[j] <- NA
- sum$mean_freq_in_burst[j] <- NA
- sum$sd_freq_in_burst[j] <- NA
- sum$mean_isis[j] <- NA
- sum$sd_isis[j] <- NA
- sum$mean_spikes_in_burst[j] <- NA
- sum$sd_spikes_in_burst[j] <- NA
- sum$total_spikes_in_burst[j] <- NA
- sum$per_spikes_in_burst[j] <- NA
- sum$mean_ibis[j] <- NA
- sum$sd_ibis[j] <- NA
- sum$cv_ibis[j] <- NA
-
- }
-
- }
-
- ### Set all names
- for (k in 1:length(names(sum))) {
- names(sum[[k]]) <- s[[i]]$goodwells
- }
-
- # make a master_sum, that is a list of all the summaries
- goodwellindex <- which(is.element(s[[i]]$well, s[[i]]$goodwells))
-
- master_sum[[i]] <- sum
- master_sum[[i]]$file <- strsplit(basename(s[[i]]$file), ".RData")[[1]][1]
- master_sum[[i]]$treatment <- s[[i]]$treatment[goodwellindex]
- master_sum[[i]]$size <- s[[i]]$size[goodwellindex]
- master_sum[[i]]$dose <- s[[i]]$dose[goodwellindex]
- master_sum[[i]]$well <- s[[i]]$well[goodwellindex]
- master_sum[[i]]$nae <- s[[i]]$nae[goodwellindex]
- master_sum[[i]]$timepoint <-
- rep(s[[i]]$timepoint[1], length(s[[i]]$goodwells))
- master_sum[[i]]$start_rec_time <-
- rep(s[[i]]$rec_time[1], length(s[[i]]$goodwells))
- master_sum[[i]]$end_rec_time <-
- rep(s[[i]]$rec_time[2], length(s[[i]]$goodwells))
- master_sum[[i]]$goodwells <- s[[i]]$goodwells
-
- }
-
- master_sum
- }
- .calc_all_isi <- function(s, allb) {
- ## Compute ISI within bursts for all spike trains.
-
- calc_isi <- function(spikes, b) {
- ## for one spike train, get all isis within bursts in that train.
- if (.num_bursts(b) == 0) {
- return(NA)
- }
-
- ## as.vector is needed below in case each burst is of the same
- ## length (in which case an array is returned by apply). In
- ## normal cases, bursts are of different lengths, so "apply"
- ## returns a list.
-
- isis <- as.vector(
- unlist(apply(b, 1,
- function(x) {
- diff(spikes[x[1]:x[2]])
- })))
- }
-
- nchannels <- s$NCells
- isis <- list()
- for (i in 1:nchannels) {
- isis[[i]] <- calc_isi(s$spikes[[i]], allb[[i]])
- }
-
- isis
- }
- .num_bursts <- function(b) {
- ## Return the number of bursts found for a spike train.
- if (is.na(b[1]))
- 0
- else
- nrow(b)
- }
- .calc_all_ibi <- function(s, allb) {
- ## Compute IBI for all spike trains.
- nchannels <- s$NCells
- ibis <- list()
- for (i in 1:nchannels) {
- ibis[[i]] <- .calc_ibi(s$spikes[[i]], allb[[i]])
- }
-
- ibis
- }
- .calc_ibi <- function(spikes, b) {
- ## Compute the interburst intervals (IBI) for one spike train.
- ## Only valid if more than one burst.
-
- nburst <- .num_bursts(b)
- if (nburst == 0) {
- res <- NA # no bursts
- } else {
- if (nburst == 1) {
- res <- NA # cannot compute IBI w/only 1 burst.
- } else {
- ## find end spike in each burst.
- end <- b[, "beg"] + b[, "len"] - 1
-
- ## for n bursts, there will be n bursts minus 1 IBIs.
- start_spikes <- b[2:nburst, "beg"]
- end_spikes <- end[1:(nburst - 1)]
- ## NEX uses a strange definition of IBI: it counts the time between
- ## the first spike of n burst and the first spike of n burst
- ## plus one as the IBI.
- ## If we want to use that definition, use the following line:
- ## end.spikes equal b where 1 to nburst minus 1
- res <- spikes[start_spikes] - spikes[end_spikes]
- }
- }
- res
- }
- .get_div <- function(s) {
- div <- NA
- if(length(s$div>0)){
- div<-s$div
- } else {
- t1 <- strsplit(s$file, split = "_", fixed = TRUE)
- for (i in t1[[1]]) {
- i <- toupper(i)
- if (nchar(i) > 2 && substr(i, 1, 3) == "DIV") {
- if (nchar(i) > 5) {
- i <- unlist(strsplit(i, split = ".", fixed = T))[1]
- }
- div <- as.numeric(substr(i, 4, nchar(i)))
- }
- }
- }
- # set default div as 1
- if (is.na(div)){ div <- 1}
- div
- }
|