create_Picture_AllSinglePlots_Final.R 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833
  1. #### clean workspace ####
  2. rm(list=ls())
  3. # The csv File has to be in the same directory
  4. setwd(dirname(rstudioapi::getSourceEditorContext()$path))
  5. # clear the console
  6. cat("\014")
  7. color_pal_vec = c('#070778','#816c93','#808080','#0e6966','#5a2c40','#1b1b1b','#935430','#f43661','#171511')
  8. #### functions ####
  9. replace_factor_na <- function(x){
  10. x <- as.character(x)
  11. x <- if_else(is.na(x), "Not answered", x)
  12. x <- as.factor(x)
  13. }
  14. wide_to_long_as <- function(data0,comb_string_vec,comb_col_names){
  15. # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
  16. library(data.table)
  17. for(i in seq(1,length(comb_string_vec),1)){
  18. data0 = data.table::melt(as.data.table(data0),
  19. id= c(which(!grepl(comb_string_vec[i],colnames(data0),fixed = TRUE))),
  20. measure=list(grep(comb_string_vec[i],colnames(data0),fixed = TRUE)),
  21. variable.name = paste0(comb_col_names[i],'Cat'),
  22. value.name = comb_col_names[i],value.factor=TRUE)
  23. # make some nicer labels
  24. data0 = as.data.frame(data0)
  25. level_strings = levels(data0[,ncol(data0)-1])
  26. # iterate over the level strings and update them
  27. for(s in seq(1,length(level_strings),1)){
  28. level_string = level_strings[s]
  29. temp_start = unlist(gregexpr('\\[', level_string))[1]
  30. temp_end = tail(unlist(gregexpr('\\]', level_string)), n=1)
  31. if(length(temp_end) != -1 & length(temp_start) != -1){
  32. level_string = substr(level_string,temp_start[1]+1,max(temp_end)-1)
  33. }
  34. level_strings[s] = level_string
  35. }
  36. # reset the labels
  37. levels(data0[,ncol(data0)-1]) = level_strings
  38. }
  39. data = data0
  40. return(data)
  41. }
  42. #### load libraries ####
  43. library(ggplot2)
  44. library(dplyr)
  45. library(forcats)
  46. library(ggpubr)
  47. library(reshape2)
  48. library(stringr)
  49. library(ragg) # scaling text size for different picture sizes
  50. #### Load the data ####
  51. # catch NA strings
  52. #neuro_data <- read.csv("results-survey197421_nurkomplett.csv",row.names=NULL,na.strings=c("","N/A"),sep=',')
  53. neuro_data <- read.csv("results-survey197421_alledaten.csv",row.names=NULL,na.strings=c("","N/A"),sep=',',check.names = FALSE)
  54. neuro_data <- neuro_data[!is.na(neuro_data$'My current (primary) position is:'),]
  55. # Es geht um die Frage was die Leute die Daten teilen von den anderen Unterscheidet
  56. # und insgesamt um die Frage was es fuer Probleme gibt in unserer Dateninfrastruktur
  57. # set up some global values
  58. glob_insideBar_text_size = 2.75
  59. glob_text_size = 8
  60. glob_title_text_size = 8
  61. glob_fig_width = 17.5
  62. glob_dec_round = 0
  63. #### About myself ####
  64. # more elegant
  65. data0 = neuro_data %>% dplyr::select('Response ID',
  66. starts_with('I work at'),
  67. starts_with('My current '),
  68. starts_with('Which neuroscience discipline(s) '),
  69. starts_with('Please state if your ')
  70. )
  71. comb_string_vec = c('I work at',
  72. 'My current ',
  73. 'Which neuroscience discipline(s) ',
  74. 'Please state if your ')
  75. comb_col_names = c('WorkPlaces',
  76. 'CurrentPosition',
  77. 'NeuroDiscipline',
  78. 'FimilarDataTypes')
  79. data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
  80. # make a WorkPlaces plot filter out the 'Other' answers
  81. temp = data %>% select('Response ID',WorkPlaces) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>% group_by(WorkPlaces) %>%
  82. filter(n() >= 3) %>% mutate(WorkPlaces = str_replace(WorkPlaces,", Helmholtz Center", "")) %>%
  83. mutate(WorkPlaces = str_replace(WorkPlaces,", Fraunhofer Institute", ""))
  84. temp = temp[1:218,] # remove [Other] answers
  85. temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2))
  86. pWP = ggplot(data=temp) +
  87. geom_histogram(mapping=aes(x=reorder(WorkPlaces,percent),y=percent),
  88. stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  89. ylim(0,max(temp$percent)+5) +
  90. geom_text(aes(x=WorkPlaces,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  91. colour = "white", size = glob_insideBar_text_size) +
  92. geom_text(aes(x=WorkPlaces,y=percent,label = paste0('n=',n)),hjust = -0.25,
  93. colour = "black", size = glob_insideBar_text_size) +
  94. xlab('') + ylab('percent (%)') + ggtitle(paste0('I work at / I am affiliated with: ','n = ',sum(temp$n))) + coord_flip() +
  95. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  96. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  97. axis.title = element_text(size = glob_text_size),
  98. plot.title = element_text(size = glob_title_text_size)) +
  99. scale_x_discrete(labels=function(x){str_wrap(x,width = 30)})
  100. ragg::agg_tiff('Q1.I.work.at.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
  101. pWP
  102. dev.off()
  103. # make a Current Position plot and replace NaN with 'None'
  104. temp = data %>% select('Response ID',CurrentPosition) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>%
  105. filter(CurrentPosition != 'child neurologist') %>%
  106. filter(CurrentPosition != 'coodinator ') %>%
  107. filter(CurrentPosition != 'Clinician') %>%
  108. filter(CurrentPosition != 'Doctor') %>%
  109. filter(CurrentPosition != 'Emeritus Professor') %>%
  110. filter(CurrentPosition != 'medical doctor') %>%
  111. filter(CurrentPosition != 'Oberarzt') %>%
  112. filter(CurrentPosition != 'Therapist')
  113. temp = temp[1:218,] # remove [Other] answers
  114. temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2))
  115. pCP = ggplot(data=temp) +
  116. geom_histogram(mapping=aes(x=reorder(CurrentPosition,percent),y = percent),
  117. stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  118. ylim(0,max(temp$percent)+5) +
  119. geom_text(aes(x=CurrentPosition,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  120. colour = "white", size = glob_insideBar_text_size) +
  121. geom_text(aes(x=CurrentPosition,y=percent,label = paste0('n=',n)),hjust = -0.25,
  122. colour = "black", size = glob_insideBar_text_size) +
  123. xlab('') + ylab('percent (%)') + ggtitle(paste0('My current (primary) position is: ','n = ',sum(temp$n))) + coord_flip() +
  124. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  125. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  126. axis.title = element_text(size = glob_text_size),
  127. plot.title = element_text(size = glob_title_text_size)) +
  128. scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels
  129. ragg::agg_tiff('Q2.My.current.position.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
  130. pCP
  131. dev.off()
  132. # make a Neuro Discipline plot
  133. # Neuro Discipline questions are Yes/No questions ==> just need the ones who answered with Yes
  134. temp = data %>% select('Response ID',NeuroDisciplineCat,NeuroDiscipline) %>%
  135. unique() %>% mutate_if(is.factor, replace_factor_na)
  136. # change other colum to Yes/No column
  137. idx = grepl('Other',temp$NeuroDisciplineCat) & !grepl('Not answered',temp$NeuroDiscipline)
  138. temp$NeuroDiscipline[idx] = c('Yes')
  139. idx = grepl('Other',temp$NeuroDisciplineCat) & grepl('Not answered',temp$NeuroDiscipline)
  140. temp$NeuroDiscipline[idx] = c('No')
  141. temp = temp %>% filter(NeuroDiscipline != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  142. mutate(percent = round(n / sum(n) * 100,2))
  143. temp_abs = temp %>% group_by(NeuroDisciplineCat) %>% summarise(n_abs = sum(n))
  144. temp = temp %>% filter(NeuroDiscipline == 'Yes')
  145. pND = ggplot(data=temp) +
  146. geom_histogram(aes(x=reorder(NeuroDisciplineCat,percent,function(x) max(x[NeuroDiscipline == 'Yes'],na.rm = TRUE)), y = percent,
  147. group=NeuroDiscipline,fill=NeuroDiscipline), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  148. geom_text(aes(x=NeuroDisciplineCat, y = percent,group=NeuroDiscipline,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  149. colour = "white", size = glob_insideBar_text_size,) +
  150. geom_text(aes(x=NeuroDisciplineCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
  151. colour = "black", size = glob_insideBar_text_size) +
  152. xlab('') + ylab('percent (%)') + coord_flip() +
  153. ggtitle(str_wrap(paste0('Which neuroscience discipline(s) describe(s) your work or research best? ','n = ',unique(temp_abs$n_abs)),width = 70)) +
  154. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  155. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  156. axis.title = element_text(size = glob_text_size),
  157. plot.title = element_text(size = glob_title_text_size),
  158. legend.text=element_text(size=glob_text_size)) +
  159. scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
  160. scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
  161. ragg::agg_tiff('Q3.Which.neuroscience.discipline.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
  162. pND
  163. dev.off()
  164. # make a Fimiliar Data Types plot
  165. # Fimiliar Data Types are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  166. temp = data %>% select('Response ID',FimilarDataTypesCat,FimilarDataTypes) %>% unique() %>% mutate_if(is.factor, replace_factor_na)
  167. # change other colum to Yes/No column
  168. idx = grepl('Other',temp$FimilarDataTypesCat) & !grepl('Not answered',temp$FimilarDataTypes)
  169. temp$FimilarDataTypes[idx] = c('Yes')
  170. idx = grepl('Other',temp$FimilarDataTypesCat) & grepl('Not answered',temp$FimilarDataTypes)
  171. temp$FimilarDataTypes[idx] = c('No')
  172. temp = temp %>% filter(FimilarDataTypes != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  173. mutate(percent = round(n / sum(n) * 100,2))
  174. temp_abs = temp %>% group_by(FimilarDataTypesCat) %>% summarise(n_abs = sum(n))
  175. temp = temp %>% filter(FimilarDataTypes == 'Yes')
  176. pFD = ggplot(data=temp) +
  177. geom_histogram(aes(x=reorder(FimilarDataTypesCat,percent,function(x) max(x[FimilarDataTypes == 'Yes'],na.rm = TRUE)), y = percent,
  178. group=FimilarDataTypes,fill=FimilarDataTypes), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  179. geom_text(aes(x=FimilarDataTypesCat, y = percent,group=FimilarDataTypes,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  180. colour = "white", size = glob_insideBar_text_size,) +
  181. geom_text(aes(x=FimilarDataTypesCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
  182. colour = "black", size = glob_insideBar_text_size) +
  183. xlab('') + ylab('percent (%)') + coord_flip() +
  184. ggtitle(str_wrap(paste0('Please state if your work includes one or several of the following recording methods or data types: ','n = ',unique(temp_abs$n_abs)),width = 100)) +
  185. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  186. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  187. axis.title = element_text(size = glob_text_size),
  188. plot.title = element_text(size = glob_title_text_size),
  189. legend.text=element_text(size=glob_text_size)) +
  190. scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
  191. scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels
  192. ragg::agg_tiff('Q4.Please.state.if.your.work.includes.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
  193. pFD
  194. dev.off()
  195. #### Tools ####
  196. # question Q6 For which tasks would you wish to have a tool or standard? is not considered do to the large number of different answer posibilities
  197. # more elegant
  198. data0 = neuro_data %>% dplyr::select('Response ID',
  199. starts_with('For which of these tasks '),
  200. starts_with('To what degree do you ')
  201. )
  202. comb_string_vec = c('For which of these tasks ',
  203. 'To what degree do you ')
  204. comb_col_names = c('TaskStandardTools',
  205. 'TaskStandardToolsDegree')
  206. data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
  207. # make a Task Standard Tools plot
  208. # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  209. # remove Comment columes
  210. temp = data %>% select('Response ID',TaskStandardToolsCat,TaskStandardTools) %>% filter(!grepl('Comment',TaskStandardToolsCat)) %>%
  211. mutate_if(is.factor, replace_factor_na) %>% unique() %>% droplevels()
  212. temp = temp %>% filter(TaskStandardTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  213. mutate(percent = round(n / sum(n) * 100,2))
  214. temp_abs = temp %>% group_by(TaskStandardToolsCat) %>% summarise(n_abs = sum(n))
  215. temp = temp %>% filter(TaskStandardTools == 'Yes')
  216. pTST = ggplot(data=temp) +
  217. geom_histogram(aes(x=reorder(TaskStandardToolsCat,percent,function(x) max(x[TaskStandardTools == 'Yes'],na.rm = TRUE)), y = percent,
  218. group=TaskStandardTools,fill=TaskStandardTools), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  219. geom_text(aes(x=TaskStandardToolsCat, y = percent,group=TaskStandardTools,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  220. colour = "white", size = glob_insideBar_text_size,) +
  221. geom_text(aes(x=TaskStandardToolsCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
  222. colour = "black", size = glob_insideBar_text_size) +
  223. xlab('') + ylab('percent (%)') + coord_flip() +
  224. ggtitle(str_wrap(paste0('For which of these tasks do you use available tools or standards? ','n = ',unique(temp_abs$n_abs)),width = 100)) +
  225. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  226. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  227. axis.title = element_text(size = glob_text_size),
  228. plot.title = element_text(size = glob_title_text_size),
  229. legend.text=element_text(size=glob_text_size)) +
  230. scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
  231. scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels
  232. ragg::agg_tiff('Q5.For.which.of.these.tasks.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
  233. pTST
  234. dev.off()
  235. # make a Task Standard Tools Degree plot
  236. temp = data %>% select( 'Response ID',TaskStandardToolsDegreeCat,TaskStandardToolsDegree) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
  237. # change some long level names
  238. temp$TaskStandardToolsDegree = recode_factor(temp$TaskStandardToolsDegree, 'Not at all - I use my own costum solutions' = 'Use my own solutions',
  239. 'This is not relevant for my scientific work' = 'Not relevant for my work')
  240. # change level order
  241. levels(temp$TaskStandardToolsDegree)
  242. temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'As much as possible')
  243. temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Mostly')
  244. temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Occasionally')
  245. temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Use my own solutions')
  246. temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not relevant for my work')
  247. temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not answered')
  248. temp = temp %>% filter(TaskStandardToolsDegree != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  249. mutate(percent = round(n / sum(n) * 100,2))
  250. temp_abs = temp %>% group_by(TaskStandardToolsDegreeCat) %>% summarise(n_abs = sum(n))
  251. pTSD = ggplot(data=temp) +
  252. geom_histogram(aes(x=reorder(TaskStandardToolsDegreeCat,percent,function(x) max(x[TaskStandardToolsDegree == 'As much as possible'],na.rm = TRUE)), y = percent,
  253. group=TaskStandardToolsDegree,fill=TaskStandardToolsDegree), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  254. geom_text(aes(x=TaskStandardToolsDegreeCat, y = percent,group=TaskStandardToolsDegree,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  255. colour = "white", size = glob_insideBar_text_size,) +
  256. geom_text(data = temp_abs,aes(x=TaskStandardToolsDegreeCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
  257. colour = "black", size = glob_insideBar_text_size) +
  258. xlab('') + ylab('percent (%)') + coord_flip() +
  259. ggtitle(str_wrap(paste0('To what degree do you use available tools or standards?'),width = 100)) +
  260. theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
  261. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  262. axis.title = element_text(size = glob_text_size),
  263. plot.title = element_text(size = glob_title_text_size),
  264. legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
  265. scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
  266. scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels
  267. ragg::agg_tiff('Q7.To.what.degree.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,units = "cm",
  268. res = 600,scaling=0.75)
  269. pTSD
  270. dev.off()
  271. #### sharing and reusing data ####
  272. # recreate different datasets
  273. # more elegant
  274. data0 = neuro_data %>% dplyr::select( 'Response ID',
  275. starts_with('Have you shared data with'),
  276. starts_with('Do you have existing data'),
  277. starts_with('Think of re-using data'),
  278. starts_with('Think of data sharing')
  279. )
  280. comb_string_vec = c('Have you shared data with',
  281. 'Do you have existing data',
  282. 'Think of re-using data',
  283. 'Think of data sharing')
  284. comb_col_names = c('SharingData',
  285. 'ExistingData',
  286. 'ThinkReusingData',
  287. 'ThinkSharingData')
  288. data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
  289. # Data sharing plot
  290. temp = data %>% select( 'Response ID',SharingDataCat,SharingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
  291. temp = temp %>% filter(SharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  292. mutate(percent = round(n / sum(n) * 100,2))
  293. temp_abs = temp %>% group_by(SharingDataCat) %>% summarise(n_abs = sum(n))
  294. temp = temp %>% filter(SharingData == 'Yes')
  295. pSD = ggplot(data=temp) +
  296. geom_histogram(aes(x=reorder(SharingDataCat,percent,function(x) max(x[SharingData == 'Yes'],na.rm = TRUE)), y = percent,
  297. group=SharingData,fill=SharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  298. geom_text(aes(x=SharingDataCat, y = percent,group=SharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  299. colour = "white", size = glob_insideBar_text_size,) +
  300. geom_text(aes(x=SharingDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
  301. colour = "black", size = glob_insideBar_text_size) +
  302. xlab('') + ylab('percent (%)') + coord_flip() +
  303. ggtitle(str_wrap(paste0('Have you shared data with... ','n = ',unique(temp_abs$n_abs)),width = 60)) +
  304. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  305. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  306. axis.title = element_text(size = glob_text_size),
  307. plot.title = element_text(size = glob_title_text_size),
  308. legend.text=element_text(size=glob_text_size)) +
  309. scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
  310. scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
  311. ragg::agg_tiff('Q8.Have.you.shared.data.with.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
  312. pSD
  313. dev.off()
  314. # Existing Data plot
  315. temp = data %>% select( 'Response ID',ExistingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
  316. temp = temp %>% filter(ExistingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  317. mutate(percent = round(n / sum(n) * 100,2))
  318. pED = ggplot(data=temp) +
  319. geom_histogram(mapping=aes(x=reorder(ExistingData,percent),y=percent),
  320. stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  321. ylim(0,max(temp$percent)+5) +
  322. geom_text(aes(x=ExistingData,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  323. colour = "white", size = glob_insideBar_text_size) +
  324. geom_text(aes(x=ExistingData,y=percent,label = paste0('n=',n)),hjust = -0.25,
  325. colour = "black", size = glob_insideBar_text_size) +
  326. xlab('') + ylab('percent (%)') + coord_flip() +
  327. ggtitle(str_wrap(paste0('Do you have existing data sets (experiments) that should be kept alive by making them available for reuse? ','n = ',sum(temp$n)),width = 70)) +
  328. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  329. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  330. axis.title = element_text(size = glob_text_size),
  331. plot.title = element_text(size = glob_title_text_size)) +
  332. scale_x_discrete(labels=function(x){str_wrap(x,width = 40)})
  333. ragg::agg_tiff('Q9.Do.you.have.existing.data.sets.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
  334. pED
  335. dev.off()
  336. # make a Think of Reusing Data plot
  337. temp = data %>% select( 'Response ID',ThinkReusingDataCat,ThinkReusingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
  338. # change level order
  339. temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Undecided')
  340. temp$ThinkReusingData = relevel(temp$ThinkReusingData,'No')
  341. temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Not answered')
  342. temp = temp %>% filter(ThinkReusingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  343. mutate(percent = round(n / sum(n) * 100,2))
  344. temp_abs = temp %>% group_by(ThinkReusingDataCat) %>% summarise(n_abs = sum(n))
  345. pTRD = ggplot(data=temp) +
  346. geom_histogram(aes(x=reorder(ThinkReusingDataCat,percent,function(x) max(x[ThinkReusingData == 'Yes'],na.rm = TRUE)), y = percent,
  347. group=ThinkReusingData,fill=ThinkReusingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  348. geom_text(aes(x=ThinkReusingDataCat, y = percent,group=ThinkReusingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  349. colour = "white", size = glob_insideBar_text_size,) +
  350. geom_text(data = temp_abs,aes(x=ThinkReusingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
  351. colour = "black", size = glob_insideBar_text_size) +
  352. xlab('') + ylab('percent (%)') + coord_flip() +
  353. ggtitle(str_wrap(paste0('Think of re-using data from repositories.'),width = 100)) +
  354. theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
  355. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  356. axis.title = element_text(size = glob_text_size),
  357. plot.title = element_text(size = glob_title_text_size),
  358. legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
  359. scale_color_manual(values=rev(color_pal_vec[1:3])) + scale_fill_manual(values = rev(color_pal_vec[1:3])) +
  360. scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
  361. ragg::agg_tiff('Q10.Think.of.re.using.data.from.repositories.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm",
  362. res = 600,scaling = 0.75)
  363. pTRD
  364. dev.off()
  365. # make a Think of Sharing Data plot
  366. temp = data %>% select( 'Response ID',ThinkSharingDataCat,ThinkSharingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
  367. # change level order
  368. temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Undecided')
  369. temp$ThinkSharingData = relevel(temp$ThinkSharingData,'No')
  370. temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Not answered')
  371. temp = temp %>% filter(ThinkSharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  372. mutate(percent = round(n / sum(n) * 100,2))
  373. temp_abs = temp %>% group_by(ThinkSharingDataCat) %>% summarise(n_abs = sum(n))
  374. pTSD = ggplot(data=temp) +
  375. geom_histogram(aes(x=reorder(ThinkSharingDataCat,percent,function(x) max(x[ThinkSharingData == 'Yes'],na.rm = TRUE)), y = percent,
  376. group=ThinkSharingData,fill=ThinkSharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  377. geom_text(aes(x=ThinkSharingDataCat, y = percent,group=ThinkSharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  378. colour = "white", size = glob_insideBar_text_size,) +
  379. geom_text(data = temp_abs,aes(x=ThinkSharingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
  380. colour = "black", size = glob_insideBar_text_size) +
  381. xlab('') + ylab('percent (%)') + coord_flip() +
  382. ggtitle(str_wrap(paste0('Think of data sharing with researchers who are NOT direct collaborators.'),width = 100)) +
  383. theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
  384. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  385. axis.title = element_text(size = glob_text_size),
  386. plot.title = element_text(size = glob_title_text_size),
  387. legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
  388. scale_color_manual(values=rev(color_pal_vec[1:3])) + scale_fill_manual(values = rev(color_pal_vec[1:3])) +
  389. scale_x_discrete(labels=function(x){str_wrap(x,width = 40)})
  390. ragg::agg_tiff('Q11.Think.of.sharing.with.researchers.who.are.NOT.direct.collaborators.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,
  391. units = "cm", res = 600,scaling = 0.75)
  392. pTSD
  393. dev.off()
  394. #### Data repositories and analyses ####
  395. # more elegant
  396. data0 = neuro_data %>% dplyr::select( 'Response ID',
  397. starts_with('Please indicate:'),
  398. starts_with('How do you process and analyze your data')
  399. )
  400. comb_string_vec = c('Please indicate:',
  401. 'How do you process and analyze your data')
  402. comb_col_names = c('SharingProblems',
  403. 'HowAnalyzeData')
  404. data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
  405. # make a Sharing Data Problems plot
  406. temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>%
  407. filter(as.numeric(SharingProblemsCat) != 6 & as.numeric(SharingProblemsCat) != 7) %>% droplevels()
  408. # change level order
  409. levels(temp$SharingProblemsCat)
  410. temp$SharingProblems = relevel(temp$SharingProblems,'Rather agree')
  411. temp$SharingProblems = relevel(temp$SharingProblems,'Undecided')
  412. temp$SharingProblems = relevel(temp$SharingProblems,'Rather disagree')
  413. temp$SharingProblems = relevel(temp$SharingProblems,'Fully disagree')
  414. temp$SharingProblems = relevel(temp$SharingProblems,'Not answered')
  415. temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  416. mutate(percent = round(n / sum(n) * 100,2))
  417. temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n))
  418. pSP = ggplot(data=temp) +
  419. geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Fully agree'],na.rm = TRUE)), y = percent,
  420. group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  421. geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  422. colour = "white", size = glob_insideBar_text_size,) +
  423. geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
  424. colour = "black", size = glob_insideBar_text_size) +
  425. xlab('') + ylab('percent (%)') + coord_flip() +
  426. ggtitle(str_wrap(paste0('Sharing problems. Please indicate: '),width = 100)) +
  427. theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
  428. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  429. axis.title = element_text(size = glob_text_size),
  430. plot.title = element_text(size = glob_title_text_size),
  431. legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
  432. scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
  433. scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
  434. ragg::agg_tiff('Q12.Sharing.problems.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling =0.75)
  435. pSP
  436. dev.off()
  437. # make a How to exercise plot
  438. temp = data %>% select( 'Response ID',HowAnalyzeDataCat,HowAnalyzeData) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
  439. # change the specific answers to yes
  440. idx = grepl('Other',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData)
  441. temp$HowAnalyzeData[idx] = c('Yes')
  442. tempID = temp$'Response ID'[grepl('Manual inspection and analysis',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData)]
  443. idx = grepl('Other',temp$HowAnalyzeDataCat) & grepl('Not answered',temp$HowAnalyzeData) & (temp$'Response ID' %in% tempID)
  444. temp$HowAnalyzeData[idx] = c('No')
  445. temp = temp %>% filter(HowAnalyzeData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  446. mutate(percent = round(n / sum(n) * 100,2))
  447. temp_abs = temp %>% group_by(HowAnalyzeDataCat) %>% summarise(n_abs = sum(n))
  448. temp = temp %>% filter(HowAnalyzeData == 'Yes')
  449. pHAD = ggplot(data=temp) +
  450. geom_histogram(aes(x=reorder(HowAnalyzeDataCat,percent,function(x) max(x[HowAnalyzeData == 'Yes'],na.rm = TRUE)), y = percent,
  451. group=HowAnalyzeData,fill=HowAnalyzeData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  452. geom_text(aes(x=HowAnalyzeDataCat, y = percent,group=HowAnalyzeData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  453. colour = "white", size = glob_insideBar_text_size,) +
  454. geom_text(aes(x=HowAnalyzeDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
  455. colour = "black", size = glob_insideBar_text_size) +
  456. xlab('') + ylab('percent (%)') + coord_flip() +
  457. ggtitle(str_wrap(paste0('How do you process and analyze your data? ','n = ',unique(temp_abs$n_abs)),width = 60)) +
  458. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  459. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  460. axis.title = element_text(size = glob_text_size),
  461. plot.title = element_text(size = glob_title_text_size),
  462. legend.text=element_text(size=glob_text_size)) +
  463. scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
  464. scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels
  465. ragg::agg_tiff('Q13.How.do.you.process.and.analyze.your.data.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
  466. pHAD
  467. dev.off()
  468. # make publishing plot
  469. temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>%
  470. filter(as.numeric(SharingProblemsCat) == 6 | as.numeric(SharingProblemsCat) == 7)
  471. # change level order
  472. temp$SharingProblems = relevel(temp$SharingProblems,'Undecided')
  473. temp$SharingProblems = relevel(temp$SharingProblems,'No')
  474. temp$SharingProblems = relevel(temp$SharingProblems,'Not answered')
  475. temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  476. mutate(percent = round(n / sum(n) * 100,2))
  477. temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n))
  478. pSO = ggplot(data=temp) +
  479. geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Yes'],na.rm = TRUE)), y = percent,
  480. group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  481. geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  482. colour = "white", size = glob_insideBar_text_size,) +
  483. geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
  484. colour = "black", size = glob_insideBar_text_size) +
  485. xlab('') + ylab('percent (%)') + coord_flip() +
  486. ggtitle(str_wrap(paste0('Sharing opinions. Please indicate: ','n = ',unique(temp_abs$n_abs)),width = 60)) +
  487. theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
  488. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  489. axis.title = element_text(size = glob_text_size),
  490. plot.title = element_text(size = glob_title_text_size),
  491. legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
  492. scale_color_manual(values=rev(color_pal_vec[1:3])) + scale_fill_manual(values = rev(color_pal_vec[1:3])) +
  493. scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels
  494. ragg::agg_tiff('Q14.Sharing.opinions.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+3.5, units = "cm", res = 600)
  495. pSO
  496. dev.off()
  497. #### Research data management in general ####
  498. # more elegant
  499. data0 = neuro_data %>% dplyr::select( 'Response ID',
  500. starts_with('What is your opinion'),
  501. starts_with('Applying research data management'),
  502. starts_with('When do you employ'),
  503. starts_with('Do you have dedicated'),
  504. starts_with('How much time do you currently'),
  505. starts_with('Please rank the top'))
  506. comb_string_vec = c('What is your opinion',
  507. 'Applying research data management',
  508. 'When do you employ',
  509. 'Do you have dedicated',
  510. 'How much time do you currently',
  511. 'Please rank the top')
  512. comb_col_names = c('StatementsOpinion',
  513. 'ApplyDataManagement',
  514. 'EmployTools',
  515. 'DedicatedPersonal',
  516. 'TimeConsum',
  517. 'TopSharingProblems')
  518. data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
  519. # make a What is your opinion plot
  520. temp = data %>% select( 'Response ID',StatementsOpinionCat,StatementsOpinion) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
  521. # change level order
  522. levels(droplevels(temp$StatementsOpinion))
  523. temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather agree')
  524. temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Undecided')
  525. temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather disagree')
  526. temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Fully disagree')
  527. temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Not answered')
  528. temp = temp %>% filter(StatementsOpinion != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  529. mutate(percent = round(n / sum(n) * 100,2))
  530. temp_abs = temp %>% group_by(StatementsOpinionCat) %>% summarise(n_abs = sum(n))
  531. pOS = ggplot(data=temp) +
  532. geom_histogram(aes(x=reorder(StatementsOpinionCat,percent,function(x) max(x[StatementsOpinion == 'Fully agree'],na.rm = TRUE)), y = percent,
  533. group=StatementsOpinion,fill=StatementsOpinion), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  534. geom_text(aes(x=StatementsOpinionCat, y = percent,group=StatementsOpinion,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  535. colour = "white", size = glob_insideBar_text_size,) +
  536. geom_text(data = temp_abs,aes(x=StatementsOpinionCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
  537. colour = "black", size = glob_insideBar_text_size) +
  538. xlab('') + ylab('percent (%)') + coord_flip() +
  539. ggtitle(str_wrap(paste0('What is your opinion on the following statements?'),width = 100)) +
  540. theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
  541. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  542. axis.title = element_text(size = glob_text_size),
  543. plot.title = element_text(size = glob_title_text_size),
  544. legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
  545. scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
  546. scale_x_discrete(labels=function(x){str_wrap(x,width = 35)}) # nice regular expression solution for multiple lined labels
  547. ragg::agg_tiff('Q15.What.is.your.opinion.on.the.following.statements.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,
  548. units = "cm", res = 600,scaling = 0.75)
  549. pOS
  550. dev.off()
  551. # make a Apply Data Management plot
  552. temp = data %>% select( 'Response ID',ApplyDataManagementCat,ApplyDataManagement) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
  553. # change level order
  554. temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather agree')
  555. temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Undecided')
  556. temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather disagree')
  557. temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Fully disagree')
  558. temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Not answered')
  559. temp = temp %>% filter(ApplyDataManagement != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  560. mutate(percent = round(n / sum(n) * 100,2))
  561. temp_abs = temp %>% group_by(ApplyDataManagementCat) %>% summarise(n_abs = sum(n))
  562. pARM = ggplot(data=temp) +
  563. geom_histogram(aes(x=reorder(ApplyDataManagementCat,percent,function(x) max(x[ApplyDataManagement == 'Fully agree'],na.rm = TRUE)), y = percent,
  564. group=ApplyDataManagement,fill=ApplyDataManagement), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  565. geom_text(aes(x=ApplyDataManagementCat, y = percent,group=ApplyDataManagement,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  566. colour = "white", size = glob_insideBar_text_size,) +
  567. geom_text(data = temp_abs,aes(x=ApplyDataManagementCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
  568. colour = "black", size = glob_insideBar_text_size) +
  569. xlab('') + ylab('percent (%)') + coord_flip() +
  570. ggtitle(str_wrap(paste0('Applying research data management...'),width = 100)) +
  571. theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
  572. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  573. axis.title = element_text(size = glob_text_size),
  574. plot.title = element_text(size = glob_title_text_size),
  575. legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
  576. scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
  577. scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels
  578. ragg::agg_tiff('Q16.Applying.research.data.management.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm",
  579. res = 600,scaling = 0.75)
  580. pARM
  581. dev.off()
  582. # make When do you employ research data management tools and services in your research plot
  583. temp = data %>% select( 'Response ID',EmployTools) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
  584. temp = temp %>% filter(EmployTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  585. mutate(percent = round(n / sum(n) * 100,2))
  586. pWOT = ggplot(data=temp) +
  587. geom_histogram(mapping=aes(x=reorder(EmployTools,percent),y=percent),
  588. stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  589. ylim(0,max(temp$percent)+5) +
  590. geom_text(aes(x=EmployTools,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  591. colour = "white", size = glob_insideBar_text_size) +
  592. geom_text(aes(x=EmployTools,y=percent,label = paste0('n=',n)),hjust = -0.25,
  593. colour = "black", size = glob_insideBar_text_size) +
  594. xlab('') + ylab('percent (%)') + coord_flip() +
  595. ggtitle(str_wrap(paste0('When do you employ research data management tools and services in your research? ','n = ',sum(temp$n)),width = 70)) +
  596. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  597. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  598. axis.title = element_text(size = glob_text_size),
  599. plot.title = element_text(size = glob_title_text_size)) +
  600. scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
  601. ragg::agg_tiff('Q17.When.do.you.employ.research.data.management.tools.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
  602. pWOT
  603. dev.off()
  604. # make Do you have dedicated personnel with research data management or data curation expertise plot
  605. temp = data %>% select( 'Response ID',DedicatedPersonal) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
  606. temp = temp %>% filter(DedicatedPersonal != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  607. mutate(percent = round(n / sum(n) * 100,2))
  608. pDP = ggplot(data=temp) +
  609. geom_histogram(mapping=aes(x=reorder(DedicatedPersonal,percent),y=percent),
  610. stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  611. ylim(0,max(temp$percent)+5) +
  612. geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  613. colour = "white", size = glob_insideBar_text_size) +
  614. geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0('n=',n)),hjust = -0.25,
  615. colour = "black", size = glob_insideBar_text_size) +
  616. xlab('') + ylab('percent (%)') + coord_flip() +
  617. ggtitle(str_wrap(paste0('Do you have dedicated personnel with research data management or data curation expertise? ','n = ',sum(temp$n)),width = 70)) +
  618. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  619. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  620. axis.title = element_text(size = glob_text_size),
  621. plot.title = element_text(size = glob_title_text_size)) +
  622. scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
  623. ragg::agg_tiff('Q18.Do.you.have.dedicated.personnel.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
  624. pDP
  625. dev.off()
  626. # make How much time do you currently need to ready a dataset from your lab for publication and re-use plot
  627. temp = data %>% select( 'Response ID',TimeConsum) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
  628. temp = temp %>% filter(TimeConsum != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  629. mutate(percent = round(n / sum(n) * 100,2))
  630. pHMT = ggplot(data=temp) +
  631. geom_histogram(mapping=aes(x=reorder(TimeConsum,percent),y=percent),
  632. stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  633. ylim(0,max(temp$percent)+5) +
  634. geom_text(aes(x=TimeConsum,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  635. colour = "white", size = glob_insideBar_text_size) +
  636. geom_text(aes(x=TimeConsum,y=percent,label = paste0('n=',n)),hjust = -0.25,
  637. colour = "black", size = glob_insideBar_text_size) +
  638. xlab('') + ylab('percent (%)') + coord_flip() +
  639. ggtitle(str_wrap(paste0('How much time do you currently need to ready a dataset from your lab for publication and re-use? ','n = ',sum(temp$n)),width = 70)) +
  640. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  641. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  642. axis.title = element_text(size = glob_text_size),
  643. plot.title = element_text(size = glob_title_text_size)) +
  644. scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
  645. ragg::agg_tiff('Q19.How.much.time.do.you.currently.need.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
  646. pHMT
  647. dev.off()
  648. # make a Top Sharing Data Problems plot
  649. # hier würde ich die nicht angegebenen werte wirklich entfernen alles andere sieht
  650. # dämlich aus und macht unnötig Probleme bzw ist schwer zu sehen was nun die wichtigen Dinge sind
  651. temp = data %>% select( 'Response ID',TopSharingProblemsCat,TopSharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
  652. # change level order
  653. temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 2')
  654. temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 3')
  655. temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 4')
  656. temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 5')
  657. temp = temp %>% filter(TopSharingProblems != 'Not answered') %>% group_by_at(vars('TopSharingProblems','TopSharingProblemsCat')) %>% summarise(n = n()) %>% droplevels() %>%
  658. mutate(percent = round(n / sum(n) * 100,2))
  659. temp_abs = temp %>% group_by(TopSharingProblems) %>% summarise(n_abs = sum(n))
  660. pTSP = ggplot(data=temp) +
  661. geom_histogram(aes(x=reorder(TopSharingProblems,percent,function(x) max(x[TopSharingProblemsCat == 'Rank 1'],na.rm = TRUE)), y = percent,
  662. group=TopSharingProblemsCat,fill=TopSharingProblemsCat), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  663. geom_text(aes(x=TopSharingProblems, y = percent,group=TopSharingProblemsCat,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  664. colour = "white", size = glob_insideBar_text_size,) +
  665. geom_text(data = temp_abs,aes(x=TopSharingProblems,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
  666. colour = "black", size = glob_insideBar_text_size) +
  667. xlab('') + ylab('percent (%)') + coord_flip() +
  668. ggtitle(str_wrap(paste0('Please rank the top 5 most pressing issues: '),width = 100)) +
  669. theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
  670. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  671. axis.title = element_text(size = glob_text_size),
  672. plot.title = element_text(size = glob_title_text_size),
  673. legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
  674. scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
  675. scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
  676. ragg::agg_tiff('Q20.Please.rank.the.top.most.pressing.issues.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm",
  677. res = 600,scaling = 0.75)
  678. pTSP
  679. dev.off()
  680. #### Membership ####
  681. # more elegant
  682. data0 = neuro_data %>% dplyr::select( 'Response ID',
  683. starts_with('Are you member of'))
  684. comb_string_vec = c('Are you member of')
  685. comb_col_names = c('MemberOf')
  686. data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
  687. # make a Are you member of, or otherwise involved in, other NFDI consortia or initiatives plot
  688. temp = data %>% select( 'Response ID',MemberOf) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
  689. temp = temp %>% filter(MemberOf != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
  690. mutate(percent = round(n / sum(n) * 100,2))
  691. pMO = ggplot(data=temp) +
  692. geom_histogram(mapping=aes(x=reorder(MemberOf,percent),y=percent),
  693. stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  694. ylim(0,max(temp$percent)+5) +
  695. geom_text(aes(x=MemberOf,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
  696. colour = "white", size = glob_insideBar_text_size) +
  697. geom_text(aes(x=MemberOf,y=percent,label = paste0('n=',n)),hjust = -0.25,
  698. colour = "black", size = glob_insideBar_text_size) +
  699. xlab('') + ylab('percent (%)') + coord_flip() +
  700. ggtitle(str_wrap(paste0('Are you member of, or otherwise involved in, other NFDI consortia or initiatives? ','n = ',sum(temp$n)),width = 100)) +
  701. theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
  702. axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
  703. axis.title = element_text(size = glob_text_size),
  704. plot.title = element_text(size = glob_title_text_size)) +
  705. scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
  706. ragg::agg_tiff('Q21.Are.you.member.of.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
  707. pMO
  708. dev.off()