doi
/
Comparative_MEA_dataset
разклонено от NeuroGroup_TUNI/Comparative_MEA_dataset


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
							# Laura Aarnos 190213
# meaRtools: aggegate features (burst)
# original functions from meaRtools 2018, modification made by LAa and marked with _mod

###############################################################################
# Purpose:  Functions for aggregating data from s objects into single         #
#                 dataframes per feature                                      #
# Author:   Ryan Dhindsa                                                      #
###############################################################################

# These functions take data from S object to make dataframes
.write_spike_summary <- function(s) {
  # Creates a list of dataframes for spike features. Each df corresponds to
  #     a single DIV and contains values for each feature
  #
  # Args:
  #   s object
  #
  # Returns:
  #   list of data frames containing spike data
  
  # initiate empty list to store dataframes
  divs_df <- list()
  
  # loop through DIVs in s object and create 1 df per DIV.
  # Each df gets stored in divs_df
  for (i in 1:length(s)) {
    div <- paste("div", .get_div(s[[i]]), sep = "")
    
    df <- .spike_summary_by_well(s[[i]])
    df <- cbind(rownames(df), df)
    df <- as.data.frame(unclass(df)) # convert strings to factors
    
    colnames(df)[1] <- "well"
    divs_df[[div]] <- df
  }
  
  divs_df <- do.call(rbind, lapply(names(divs_df),
                                   function(x) cbind(div = x, divs_df[[x]])))
  return(divs_df)
}

.compile_ns <- function(s, nspikes) {
  # Calculates network spikes
  # Called from .write_network_spike_summary
  
  active_wells <- .active_wells_network_spikes(nspikes)$ns_all
  if (length(active_wells) > 0) {
    newcol <- 3
    # 2 to peak_min and peak_max
    p <- length(active_wells[[1]]$brief) +
      length(active_wells[[1]]$mean) + newcol
    nsdata <- matrix(0, length(s$well), p)
    temp <- c()
    length_temp_mean <- length(active_wells[[1]]$mean)
    for (j in 1:length(s$well)) {
      cur_well <- s$well[j]
      if (is.element(cur_well, names(active_wells))){
        temp <- active_wells[[cur_well]]
        nsdata[j, 1:length(temp$brief)] <- temp$brief
        nsdata[j, length(temp$brief) + 1] <- min(temp$measures[, "peak_val"])
        nsdata[j, length(temp$brief) + 2] <- max(temp$measures[, "peak_val"])
        nsdata[j, length(temp$brief) + 3] <- s$treatment[cur_well]
        nsdata[j, (length(temp$brief) + newcol + 1):p] <- as.double(temp$mean)
        
      } else {
        temp$brief <- c(0, rep(NA, 4), 0, NA, NA)
        nsdata[j, 1:length(temp$brief)] <- temp$brief
        nsdata[j, length(temp$brief) + 1] <- NA
        nsdata[j, length(temp$brief) + 2] <- NA
        nsdata[j, length(temp$brief) + 3] <- NA
        nsdata[j, (length(temp$brief) + newcol + 1):p] <-
          rep(0, length_temp_mean)
      }
    }
    
    nsdata <- data.frame(nsdata)
    names(nsdata)[1:length(temp$brief)] <- names(active_wells[[1]]$brief)
    names(nsdata)[(length(temp$brief) + 1):(length(temp$brief) + newcol)] <-
      c("peak_min", "peak_max", "treatment")
    
    for (j in 1:(p - length(temp$brief) - newcol)) {
      names(nsdata)[j + newcol + length(temp$brief)] <- paste("t", j, sep = "")
    }
    nsdata <- cbind(s$well, nsdata)
    names(nsdata)[1] <- "well"
    
    return(nsdata)
  }
}
.write_network_spike_summary <- function(s, parameters) {
  # Creates a list of dataframes for network spike features.
  # Each df corresponds to a single DIV and contains values for each feature
  #
  # Args:
  #   s object
  #
  # Returns:
  #   list of data frames containing spike data
  
  # initiate empty list to store dataframes
  divs_df <- list()
  
  # calculate network spikes
  for (i in 1:length(s)) {
    div <- paste("div", .get_div(s[[i]]), sep = "")
    
    nspikes_old <- calculate_network_spikes(s[[i]], parameters$sur,
                                            parameters$ns_n, parameters$ns_t)
    nspikes <- summarize_network_spikes(s[[i]], nspikes_old,
                                        ns_e = 1, parameters$sur)
    basename <- strsplit(basename(s[[i]]$file), "[.]")[[1]][1]
    
    df <- .compile_ns(s[[i]], nspikes)
    df <- as.data.frame(unclass(df)) # convert strings to factors
    
    df <- df[, - grep("^t[0-9]", colnames(df))]
    
    divs_df[[div]] <- df
  }
  
  for (name in names(divs_df)) {
    # If no data for DIV - remove the DIV
    if (length(divs_df[[name]]) == 0) {
      divs_df[[name]] <- NULL
    }
  }
  
  divs_df <- do.call(rbind, lapply(names(divs_df),
                                   function(x) cbind(div = x, divs_df[[x]])))
  
  return(divs_df)
}

.write_burst_summary_mod <- function(s) {
  # Creates a list of dataframes for bursting  features. Each df corresponds to
  #     a single DIV and contains values for each feature
  #
  # Args:
  #   s object
  #
  # Returns:
  #   list of data frames containing spike data
  
  master_sum <- .get_mean_burst_info_per_well_mod(s)
  
  divs_df <- list()
  for (i in 1:length(s)) {
    div <- paste("div", .get_div(s[[i]]), sep = "")
    
    ########## data frame summarized over well
    # get number of object in master_sum[[1]] list
    tempdf <- c(); tempcolnames <- c()
    for (j in 2:length(master_sum[[i]])) {
      tempc <- unlist(master_sum[[i]][j])
      tempdf <- cbind(tempdf, tempc)
      tempcolnames <- c(tempcolnames, names(master_sum[[i]][j]))
    } # end of loop through master_sum list objects
    
    # need to switch around columns so first columns come first
    if (dim(tempdf)[2] > 20) {
      if (dim(tempdf)[1] == 1) {
        df <- cbind(t(tempdf[, 21:25]), t(tempdf[, 1:20]))
      } else {
        df <- cbind(tempdf[, 21:25], tempdf[, 1:20])
      }
      colnames <- c(tempcolnames[21:25], tempcolnames[1:20])
      colnames(df) <- colnames
    }
    
    df <- as.data.frame(unclass(df)) # convert strings to factors
    df$file <- NULL
    row.names(df) <- df$well
    divs_df[[div]] <- df
  }
  
  divs_df <- do.call(rbind, lapply(names(divs_df),
                                   function(x) cbind(div = x, divs_df[[x]])))
  return(divs_df)
}

.create_feat_df <- function(s, df, feature, all_feat_list) {
  
  df <- df[!is.na(df$treatment) & df$treatment != "", ]
  x <- data.frame(df$div, df$well, df$treatment, df[, feature])
  colnames(x) <- c("div", "well", "treatment", feature)
  y <- dcast(x, well~div, value.var = feature)
  well_to_treatment <- x[!duplicated(x$well), c("well", "treatment")]
  # reorder in case early wells show up in later DIVs
  well_to_treatment <-
    well_to_treatment[order(as.character(well_to_treatment$well)), ]
  
  ymerged <- merge(x = y, y = well_to_treatment, by = c("well"), all.x = TRUE)
  ymerged <- ymerged[order(as.character(ymerged$well)), ]
  y <- ymerged[c(1, dim(ymerged)[2], 2:(dim(ymerged)[2] - 1))]
  
  y <- .sort_df(y)
  return(y)
}

.sort_df <- function(df) {
  # natural order sorting
  df_divs <- df[3:ncol(df)]
  df_sorted <- df_divs[, mixedorder(names(df_divs)), drop = FALSE]
  df_sorted <- cbind(treatment = df$treatment, df_sorted)
  df_sorted <- cbind(well = df$well, df_sorted)
  return(df_sorted)
}

aggregate_features_mod <- function(s, feat_type, parameters=list()) {
  
  # Takes in s object and creates a dataframe for each feature.
  #     based on the feature type (spikes, ns, etc), it calls appropriate
  #     function (e.g. .write_spike_summary if feat_type is "spikes")
  #
  # Args:
  #   s object
  #   feat_type = "spike", "ns", or "burst"
  #
  # Returns:
  #   list of data frames (one df per feature)
  
  # write feature summaries (calls xxx.summary.by.well from meaRtools)
  if (feat_type == "spike") {
    divs_df <- .write_spike_summary(s)
  } else if (feat_type == "ns"){
    divs_df <- .write_network_spike_summary(s, parameters)
  } else if (feat_type == "burst") {
    divs_df <- .write_burst_summary_mod(s)
    divs_df$size <- NULL;
    divs_df$dose <- NULL;
  }
  
  all_features <- list()
  
  if (!is.null(divs_df)){
    # create list of dataframes (one dataframe per feature)
    feature_names <- colnames(divs_df)
    remove <- c("div", "treatment", "well")
    feature_names <- setdiff(feature_names, remove)
    
    # test
    
    for (i in 1:length(feature_names)) {
      df <- .create_feat_df(s, divs_df, feature_names[i], all_features)
      
      all_features[[feature_names[i]]] <- df
    }
  } else {
    all_features <- NULL
  }
  
  return(all_features)
}


.get_mean_burst_info_per_well_mod <- function(s) {
  master_sum <- list() # summary over all files
  for (i in 1:length(s)) {
    sum <- list() # summary for each timepoint
    # calculate bursting variables for current data File
    nbursts <- sapply(s[[i]]$allb, nrow)
    allb <- s[[i]]$allb
    tempsum <- calc_burst_summary(s[[i]])
    
    ###### NEW PART 1/3 ######
    
    for (t in 1:nrow(tempsum)) {
      if (tempsum[t,'nbursts'] == 0) {
        tempsum[t,'bursts_per_sec'] <- NA
        tempsum[t,'bursts_per_min'] <- NA
        tempsum[t,'mean_dur'] <- NA
        tempsum[t,'mean_spikes'] <- NA
      }
    }
    
    ###### NEW PART 1/3 ENDS ######
    
    # isis: gets the ISI for each channel of s[[i]]
    isis <- .calc_all_isi(s[[i]], allb)
    
    # IBIs get IBI's across all inter burst intervals across all data
    temp_ibis <- .calc_all_ibi(s[[i]], allb)
    
    # loop through goodwells
    for (j in 1:length(s[[i]]$goodwells)) {
      # indicator of current well
      icurrentwell <- (s[[i]]$goodwells[j] == s[[i]]$cw)
      
      # index of current well
      incurrentwell <- which(s[[i]]$goodwells[j] == s[[i]]$cw)
      
      if (sum(icurrentwell) != 0){
        ##### variables that need summing and averaging
        # total spikes across all AE in current well
        sum$nspikes[j] <- sum(tempsum$spikes[icurrentwell], na.rm = TRUE)
        sum$nAB[j] <- length(which(nbursts[incurrentwell] > 0))
        # Total recorded time on current well= recording time * nae
        sum$duration[j] <-
          length(incurrentwell) * (s[[i]]$rec_time[2] - s[[i]]$rec_time[1])
        
        # mean duration
        sum$mean_dur[j] <- mean(tempsum$mean_dur[incurrentwell], na.rm = TRUE)
        
        # mean spikes per second
        sum$mean_freq[j] <- mean(tempsum$mean_freq[incurrentwell], na.rm = TRUE)
        # total number of bursts
        sum$nbursts[j] <- sum(tempsum$nbursts[icurrentwell], na.rm = TRUE)
        # mean burst per second
        sum$bursts_per_sec[j] <- mean(tempsum$bursts_per_sec[incurrentwell], na.rm = TRUE)
        # mean burst per minute
        sum$bursts_per_min[j] <- sum$bursts_per_sec[j] * 60
        
        # finds the mean of duration for a particular well (across all channels)
        # get_burst_info(allb[icurrentwell],"durn")
        # takes out the column "durn" of all
        # matricies allb among the indicator set icurrentwell
        # get duration data across all channels of current well
        #sum$mean_dur[j] <-
        #  mean(unlist(get_burst_info(allb[icurrentwell], "durn")), na.rm = TRUE)
        
        ######## NEW PART 2/3 #########
        
        my.dur <- unlist(get_burst_info(allb[icurrentwell], "durn"))
        my.len <- unlist(get_burst_info(allb[incurrentwell], "len"))
        for (m in 1:length(my.dur)) {
          if (my.dur[m] == 0) {
            my.dur[m] <- NA
          }
        }
        for (n in 1:length(my.len)) {
          if (my.len[n] == 0) {
            my.len[n] <- NA
          }
        }
        
        # get duration data across all channels of current well
        sum$mean_dur[j] <-
          mean(my.dur, na.rm = TRUE)
        # sd of current well burst durations
        sum$sd_dur[j] <- sd(my.dur, na.rm = TRUE)
        
        # mean frequency within a burst
        sum$mean_freq_in_burst[j] <-
          mean(my.len /
                 my.dur, na.rm = TRUE)
        # sd frequency within a burst
        sum$sd_freq_in_burst[j] <-
          sd(my.len /
               my.dur, na.rm = TRUE)
        
        ######## NEW PART 2/3 ENDS ########
        
        # sd of current well burst durations
        #sum$sd_dur[j] <- sd(unlist(get_burst_info(allb[icurrentwell], "durn")))
        
        # mean frequency within a burst
        #sum$mean_freq_in_burst[j] <-
        #  mean(unlist(get_burst_info(allb[incurrentwell], "len")) /
        #         unlist(get_burst_info(allb[incurrentwell], "durn")), na.rm = TRUE)
        
        # sd frequency within a burst
        #sum$sd_freq_in_burst[j] <-
        #  sd(unlist(get_burst_info(allb[incurrentwell], "len")) /
        #       unlist(get_burst_info(allb[incurrentwell], "durn")), na.rm = TRUE)
        
        # mean of ISI across all channels in current well
        sum$mean_isis[j] <- mean(unlist(isis[incurrentwell]), na.rm = TRUE)
        
        # finds sd of ISI across all channels in current well
        sum$sd_isis[j] <- sd(unlist(isis[incurrentwell]), na.rm = TRUE)
        
        ######## NEW PART 3/3 ########
        
        # mean spikes in burst
        sum$mean_spikes_in_burst[j] <- round(mean(my.len, na.rm = TRUE), 3)
        # sd of spikes in burst
        sum$sd_spikes_in_burst[j] <- round(sd(my.len, na.rm = TRUE), 3)
        
        # total number of spikes arcross all bursts
        sum$total_spikes_in_burst[j] <- sum(unlist(get_burst_info(allb[incurrentwell], "len")), na.rm = TRUE)
        
        ######## NEW PART 3/3 ENDS ########        
        
        # len=#spikes in burst (length of burst in bursts)
        # mean_spikes_in_burst
        #ns <- unlist(get_burst_info(allb[icurrentwell], "len"))
        #sum$mean_spikes_in_burst[j] <- round(mean(ns, na.rm = TRUE), 3)
        
        # sd of spikes in burst
        #sum$sd_spikes_in_burst[j] <- round(sd(ns, na.rm = TRUE), 3)
        
        # total number of spikes arcross all bursts
        #sum$total_spikes_in_burst[j] <- sum(ns, na.rm = TRUE)
        
        # percent of spikes in bursts
        sum$per_spikes_in_burst[j] <-
          round(100 * (sum$total_spikes_in_burst[j] / sum$nspikes[j]), 3)
        
        # mean IBI
        sum$mean_ibis[j] <-
          round(mean(unlist(temp_ibis[incurrentwell]), na.rm = TRUE), 3)
        # sd IBI
        sum$sd_ibis[j] <-
          round(sd(unlist(temp_ibis[incurrentwell]), na.rm = TRUE), 3)
        # cv IBI
        sum$cv_ibis[j] <- round(sum$mean_ibis[j] / sum$sd_ibis[j], 3)
        
      } else {
        sum$nspikes[j] <- NA
        sum$nAB[j] <- NA
        sum$duration[j] <- NA
        sum$mean_dur[j] <- NA
        sum$mean_freq[j] <- NA
        sum$nbursts[j] <- NA
        sum$bursts_per_sec[j] <- NA
        sum$bursts_per_min[j] <- NA
        sum$mean_dur[j] <- NA
        sum$sd_dur[j] <- NA
        sum$mean_freq_in_burst[j] <- NA
        sum$sd_freq_in_burst[j] <- NA
        sum$mean_isis[j] <- NA
        sum$sd_isis[j] <- NA
        sum$mean_spikes_in_burst[j] <- NA
        sum$sd_spikes_in_burst[j] <- NA
        sum$total_spikes_in_burst[j] <- NA
        sum$per_spikes_in_burst[j] <- NA
        sum$mean_ibis[j] <- NA
        sum$sd_ibis[j] <- NA
        sum$cv_ibis[j] <- NA
        
      }
      
    }
    
    ### Set all names
    for (k in 1:length(names(sum))) {
      names(sum[[k]]) <- s[[i]]$goodwells
    }
    
    # make a master_sum, that is a list of all the summaries
    goodwellindex <- which(is.element(s[[i]]$well, s[[i]]$goodwells))
    
    master_sum[[i]] <- sum
    master_sum[[i]]$file <- strsplit(basename(s[[i]]$file), ".RData")[[1]][1]
    master_sum[[i]]$treatment <- s[[i]]$treatment[goodwellindex]
    master_sum[[i]]$size <- s[[i]]$size[goodwellindex]
    master_sum[[i]]$dose <- s[[i]]$dose[goodwellindex]
    master_sum[[i]]$well <- s[[i]]$well[goodwellindex]
    master_sum[[i]]$nae <- s[[i]]$nae[goodwellindex]
    master_sum[[i]]$timepoint <-
      rep(s[[i]]$timepoint[1], length(s[[i]]$goodwells))
    master_sum[[i]]$start_rec_time <-
      rep(s[[i]]$rec_time[1], length(s[[i]]$goodwells))
    master_sum[[i]]$end_rec_time <-
      rep(s[[i]]$rec_time[2], length(s[[i]]$goodwells))
    master_sum[[i]]$goodwells <- s[[i]]$goodwells
    
  }
  
  master_sum
}

.calc_all_isi <- function(s, allb) {
  ## Compute ISI within bursts for all spike trains.
  
  calc_isi <- function(spikes, b) {
    ## for one spike train, get all isis within bursts in that train.
    if (.num_bursts(b) == 0) {
      return(NA)
    }
    
    ## as.vector is needed below in case each burst is of the same
    ## length (in which case an array is returned by apply).  In
    ## normal cases, bursts are of different lengths, so "apply"
    ## returns a list.
    
    isis <- as.vector(
      unlist(apply(b, 1,
                   function(x) {
                     diff(spikes[x[1]:x[2]])
                   })))
  }
  
  nchannels <- s$NCells
  isis <- list()
  for (i in 1:nchannels) {
    isis[[i]] <- calc_isi(s$spikes[[i]], allb[[i]])
  }
  
  isis
}

.num_bursts <- function(b) {
  ## Return the number of bursts found for a spike train.
  if (is.na(b[1]))
    0
  else
    nrow(b)
}

.calc_all_ibi <- function(s, allb) {
  ## Compute IBI for all spike trains.
  nchannels <- s$NCells
  ibis <- list()
  for (i in 1:nchannels) {
    ibis[[i]] <- .calc_ibi(s$spikes[[i]], allb[[i]])
  }
  
  ibis
}

.calc_ibi <- function(spikes, b) {
  ## Compute the interburst intervals (IBI) for one spike train.
  ## Only valid if more than one burst.
  
  nburst <- .num_bursts(b)
  if (nburst == 0) {
    res <- NA # no bursts
  } else {
    if (nburst == 1) {
      res <- NA # cannot compute  IBI w/only 1 burst.
    } else {
      ## find end spike in each burst.
      end <- b[, "beg"] + b[, "len"] - 1
      
      ## for n bursts, there will be n bursts minus 1 IBIs.
      start_spikes <- b[2:nburst, "beg"]
      end_spikes <- end[1:(nburst - 1)]
      ## NEX uses a strange definition of IBI: it counts the time between
      ## the first spike of n burst and the first spike of n burst
      ## plus one as the IBI.
      ## If we want to use that definition, use the following line:
      ## end.spikes equal b where 1 to nburst minus 1
      res <- spikes[start_spikes] - spikes[end_spikes]
    }
  }
  res
}

.get_div <- function(s) {
  div <- NA
  if(length(s$div>0)){
    div<-s$div
  } else {
    t1 <- strsplit(s$file, split = "_", fixed = TRUE)
    for (i in t1[[1]]) {
      i <- toupper(i)
      if (nchar(i) > 2 && substr(i, 1, 3) == "DIV") {
        if (nchar(i) > 5) {
          i <- unlist(strsplit(i, split = ".", fixed = T))[1]
        }
        div <- as.numeric(substr(i, 4, nchar(i)))
      }
    }
  }
  # set default div as 1
  if (is.na(div)){ div <- 1}
  div
}