highspeed-analysis-source.R 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. load_packages <- function(packages_list) {
  2. # install required packages if they are not installed yet:
  3. new_packages <- packages_list[
  4. !(packages_list %in% installed.packages()[, "Package"])]
  5. if (length(new_packages)) install.packages(new_packages)
  6. # load all required packages:
  7. invisible(lapply(packages_list, library, character.only = TRUE))
  8. }
  9. save_datatable <- function(dt, path) {
  10. write.table(dt, file = path, sep = ",", row.names = FALSE)
  11. }
  12. detect_cores <- function() {
  13. # get the maximum number of available cores rstudio has access to:
  14. # We will get the maximum number of computing cores available.
  15. # This information could be used later to parallelize execution when needed.
  16. num_cores <- parallel::detectCores(all.tests = TRUE, logical = TRUE)
  17. sprintf("Number of available computing cores: %d", num_cores)
  18. # use all available cores for processing in r
  19. doMC::registerDoMC(cores = num_cores)
  20. return(num_cores)
  21. }
  22. round_pvalues <- function(pvalue) {
  23. # This function can be used to round p-values.
  24. pvalue_rounded <- vector()
  25. for (p in seq_len(length(pvalue))) {
  26. pvalue_rounded[p] <- format.pval(
  27. pv = pvalue[p], digits = 1, eps = 0.001, nsmall = 2, scientific = FALSE)
  28. if (pvalue_rounded[p] == "<0.001") {
  29. pvalue_rounded[p] <- gsub("<", "p < ", pvalue_rounded[p])
  30. } else {
  31. pvalue_rounded[p] <- paste0("p = ", pvalue_rounded[p])
  32. }
  33. pvalue_rounded[p] <- stringr::str_replace(pvalue_rounded[p], ".0", " ")
  34. }
  35. return(pvalue_rounded)
  36. }
  37. label_fill <- function(original, offset = 0, mod = 2, fill = "") {
  38. # This function can be used to generate axis labels that omit e.g.,
  39. # every second label. Solution was taken from [here](https://bit.ly/2VycSy0).
  40. ii <- as.logical((seq_len(length(original)) - 1 + offset) %% mod)
  41. original[ii] <- fill
  42. return(original)
  43. }
  44. extract_number <- function(mixed_string) {
  45. # this function can be used to extract a number from a mixed string.
  46. number <- regmatches(mixed_string, gregexpr("[[:digit:]]+", mixed_string))
  47. number <- as.numeric(unlist(number))
  48. }
  49. get_labeller <- function(array, suffix = " ms") {
  50. facet_labels_new <- unique(paste0(as.numeric(array) * 1000, suffix))
  51. facet_labels_old <- as.character(unique(array))
  52. names(facet_labels_new) <- facet_labels_old
  53. labeller <- as_labeller(facet_labels_new)
  54. return(labeller)
  55. }
  56. # function to get sequential position and switch to next sequential stimulus:
  57. get_pos <- function(data, events) {
  58. # get the matching subject id:
  59. sub_id <- events$subject == unique(data$id)
  60. # get the matching sequence trial number (trial)
  61. trial <- events$trial == unique(data$trial)
  62. # get the sequence of the current trial:
  63. seq <- events$stim_label[sub_id & trial]
  64. # get only unique elements of the sequence while maintaining their order:
  65. seq_items <- unique(seq)
  66. # get the trial number of the switch to the second item within the sequence:
  67. change <- min(which((seq == seq_items[2]) == TRUE))
  68. # get the sequential position of the current label:
  69. position <- which(seq_items == unique(data$class))
  70. # repeat the sequential position as needed (depending on length of the data):
  71. position <- rep(position, nrow(data))
  72. # repeat the change trial as needed (depending on length of the data):
  73. change <- rep(change, nrow(data))
  74. # get the target cue of the current trial:
  75. trial_cue <- rep(unique(events$trial_cue[sub_id & trial]), nrow(data))
  76. # get the target cue position of the current trial:
  77. tmp_target <- events$target[sub_id & trial]
  78. tmp_position <- events$serial_position[sub_id & trial]
  79. trial_cue_position <- rep(tmp_position[tmp_target == 1], nrow(data))
  80. # get the accuracy of the current trial:
  81. accuracy <- rep(unique(events$trial_accuracy[sub_id & trial]), nrow(data))
  82. # return the position and change indices as result of the function:
  83. return(list(position = position, change = change,
  84. trial_cue = trial_cue, accuracy = accuracy,
  85. trial_cue_position = trial_cue_position))
  86. }
  87. prepare_data <- function(dt) {
  88. library(data.table)
  89. dt <- setDT(dt)
  90. # specify whether one-vs-rest or multi-class classifiers have been used:
  91. dt[, classification := ifelse(classifier == "log_reg", "multi", "ovr")]
  92. # add column to specify the task condition:
  93. dt[grepl("odd", test_set, fixed = TRUE), condition := "oddball"]
  94. dt[grepl("seq", test_set, fixed = TRUE), condition := "sequence"]
  95. dt[grepl("rep", test_set, fixed = TRUE), condition := "repetition"]
  96. dt[grepl("rest", test_set, fixed = TRUE), condition := "rest"]
  97. # display warning if there is any empty row in the task condition column:
  98. if ( any(is.na(dt$condition)) ) warning("missing condition assignment")
  99. # add a within speed condition trial counter across all runs (max should be 15):
  100. dt[, by = .(id, classifier, condition, tITI, class, seq_tr), ":=" (trial_tITI = 1:.N)]
  101. # check if the maximum within speed condition trial counter does not exceed 15:
  102. if( max(subset(dt, condition == "sequence")$trial_tITI) != 15 )
  103. warning('max within speed counter does not equal 15!')
  104. if( max(subset(dt, condition == "repetition")$trial_tITI) != 45 )
  105. warning('max within speed counter does not equal 45!')
  106. # probabilities are normalized for each class within a trial to sum up to 1
  107. dt[, by = .(mask, id, condition, classification, classifier, test_set, session, run_study, tITI, trial, class), ":=" (
  108. probability_norm = probability / sum(probability),
  109. probability_zscore = (probability - mean(probability))/sd(probability),
  110. probability_cum = cumsum(probability) / max(cumsum(probability)))] %>%
  111. # check if the number of TRs match:
  112. verify(.[, by = .(mask, id, condition, classification, classifier, test_set, session, run_study, tITI, trial, class), .(
  113. num_trs = .N
  114. )]$num_trs %in% c(1, 5, 7, 13, 233))
  115. # order sequence trial data by participant, classifier, trial and serial TR:
  116. dt = setorder(dt, mask, id, condition, classification, classifier, tITI, trial, class, seq_tr) %>% setDT(.)
  117. # return the prepared data table:
  118. return(dt)
  119. }
  120. data_summary <- function(x) {
  121. # Function to produce summary statistics (mean and +/- sem)
  122. m <- mean(x)
  123. sem_lower <- m - (sd(x) / sqrt(length(x)))
  124. sem_upper <- m + (sd(x) / sqrt(length(x)))
  125. return(c(y = m, ymin = sem_lower, ymax = sem_upper))
  126. }
  127. round_updown <- function(numbers, base) {
  128. numbers_round <- rep(NA, length(numbers))
  129. for (i in seq_len(length(numbers))) {
  130. if (numbers[i] < 0) {
  131. numbers_round[i] <- -base
  132. } else if (numbers[i] > 0) {
  133. numbers_round[i] <- base
  134. }
  135. }
  136. return(numbers_round)
  137. }