Browse Source

Add data and scripts

Thomas Wachtler 2 years ago
parent
commit
1b3d9cf51d
5 changed files with 4019 additions and 2 deletions
  1. 17 2
      README.md
  2. 2769 0
      create_Picture4.R
  3. 833 0
      create_Picture_AllSinglePlots_Final.R
  4. 94 0
      datacite.yml
  5. 306 0
      results-survey197421_alledaten1.csv

+ 17 - 2
README.md

@@ -1,3 +1,18 @@
-# SurveyData
+# NFDI-Neuro Survey Data
 
-NFDI-Neuro Survey Data
+Survey response data underlying the manuscript
+
+Klingner C, Denker M, Grün S, Hanke M, Oeltze-Jafra S, Ohl FW, Radny J, Rotter S, Scherberger H, Stein A, Wachtler T, Witte OW, Ritter P (2022) 
+"Overcoming the Reproducibility Crisis - Results of the first Community Survey of the German National Research Data Infrastructure for Neuroscience".
+BioRxiv.
+
+Files: 
+
+- `results-survey197421_alledaten.csv`: Response data
+- `create_Picture_AllSinglePlots_Final.R`: R script to generate figures of response distributions for individual questions
+- `create_Picture4.R`: R script to generate figures of analysis results
+
+
+(c) Carsten Klingner and the NFDI-Neuro initiative ([info@nfdi-neuro.de](mailto:info@nfdi-neuro.de))
+
+License: Creative Commons Attribution (CC BY) 4.0

File diff suppressed because it is too large
+ 2769 - 0
create_Picture4.R


+ 833 - 0
create_Picture_AllSinglePlots_Final.R

@@ -0,0 +1,833 @@
+#### clean workspace ####
+rm(list=ls())
+setwd("/home/aschmidt/R-Abbildungen/CK-Quest/")
+getwd()
+
+# clear the console
+cat("\014") 
+
+color_pal_vec = c('#070778','#816c93','#808080','#0e6966','#5a2c40','#1b1b1b','#935430','#f43661','#171511')
+
+#### functions ####
+replace_factor_na <- function(x){
+  x <- as.character(x)
+  x <- if_else(is.na(x), "Not answered", x)
+  x <- as.factor(x)
+}
+
+wide_to_long_as <- function(data0,comb_string_vec,comb_col_names){
+  # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
+  library(data.table)
+  for(i in seq(1,length(comb_string_vec),1)){
+    data0 = data.table::melt(as.data.table(data0),
+                             id= c(which(!grepl(comb_string_vec[i],colnames(data0),fixed = TRUE))),
+                             measure=list(grep(comb_string_vec[i],colnames(data0),fixed = TRUE)),
+                             variable.name = paste0(comb_col_names[i],'Cat'),
+                             value.name = comb_col_names[i],value.factor=TRUE)
+    
+    # make some nicer labels
+    data0 = as.data.frame(data0)
+    level_strings = levels(data0[,ncol(data0)-1])
+    
+    # iterate over the level strings and update them
+    for(s in seq(1,length(level_strings),1)){
+      level_string = level_strings[s]
+      temp_start = unlist(gregexpr('\\[', level_string))[1]
+      temp_end = tail(unlist(gregexpr('\\]', level_string)), n=1)
+      if(length(temp_end) != -1 & length(temp_start) != -1){
+        level_string = substr(level_string,temp_start[1]+1,max(temp_end)-1)
+      }
+      level_strings[s] = level_string
+    }
+    # reset the labels
+    levels(data0[,ncol(data0)-1]) = level_strings
+  }
+  data = data0
+  
+  return(data)
+}
+
+#### load libraries ####
+library(ggplot2)
+library(dplyr)
+library(forcats)
+library(ggpubr)
+library(reshape2)
+library(stringr)
+library(ragg) # scaling text size for different picture sizes
+
+#### Load the data ####
+# catch NA strings
+#neuro_data <- read.csv("results-survey197421_nurkomplett.csv",row.names=NULL,na.strings=c("","N/A"),sep=',')
+neuro_data <- read.csv("results-survey197421_alledaten.csv",row.names=NULL,na.strings=c("","N/A"),sep=',',check.names = FALSE)
+neuro_data <- neuro_data[!is.na(neuro_data$'My current (primary) position is:'),]
+
+# Es geht um die Frage was die Leute die Daten teilen von den anderen Unterscheidet 
+# und insgesamt um die Frage was es fuer Probleme gibt in unserer Dateninfrastruktur
+
+# set up some global values
+glob_insideBar_text_size = 2.75
+glob_text_size = 8
+glob_title_text_size = 8
+glob_fig_width = 17.5
+glob_dec_round = 0
+
+#### About myself ####
+# more elegant
+data0 = neuro_data %>% dplyr::select('Response ID',
+                                     starts_with('I work at'),
+                                     starts_with('My current '),
+                                     starts_with('Which neuroscience discipline(s) '),
+                                     starts_with('Please state if your ')
+)
+
+comb_string_vec = c('I work at',
+                    'My current ',
+                    'Which neuroscience discipline(s) ',
+                    'Please state if your ')
+comb_col_names = c('WorkPlaces',
+                   'CurrentPosition',
+                   'NeuroDiscipline',
+                   'FimilarDataTypes')
+
+data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
+
+# make a WorkPlaces plot filter out the 'Other' answers
+temp = data %>% select('Response ID',WorkPlaces) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>% group_by(WorkPlaces) %>% 
+  filter(n() >= 3) %>% mutate(WorkPlaces = str_replace(WorkPlaces,", Helmholtz Center", "")) %>%
+                       mutate(WorkPlaces = str_replace(WorkPlaces,", Fraunhofer Institute", ""))
+temp = temp[1:218,] # remove [Other] answers
+temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2))
+
+pWP = ggplot(data=temp) + 
+  geom_histogram(mapping=aes(x=reorder(WorkPlaces,percent),y=percent),
+                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
+  ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=WorkPlaces,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size) +
+  geom_text(aes(x=WorkPlaces,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + ggtitle(paste0('I work at / I am affiliated with: ','n = ',sum(temp$n))) + coord_flip() +
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size)) +
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 30)})
+
+ragg::agg_tiff('Q1.I.work.at.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
+pWP
+dev.off()
+
+# make a Current Position plot and replace NaN with 'None'
+temp = data %>% select('Response ID',CurrentPosition) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>%
+  filter(CurrentPosition != 'child neurologist') %>%
+  filter(CurrentPosition != 'coodinator ') %>%
+  filter(CurrentPosition != 'Clinician') %>%
+  filter(CurrentPosition != 'Doctor') %>%
+  filter(CurrentPosition != 'Emeritus Professor') %>%
+  filter(CurrentPosition != 'medical doctor') %>%
+  filter(CurrentPosition != 'Oberarzt') %>%
+  filter(CurrentPosition != 'Therapist')
+temp = temp[1:218,] # remove [Other] answers
+temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2))
+
+pCP = ggplot(data=temp) + 
+  geom_histogram(mapping=aes(x=reorder(CurrentPosition,percent),y = percent),
+                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
+  ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=CurrentPosition,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size) +
+  geom_text(aes(x=CurrentPosition,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) + 
+  xlab('') + ylab('percent (%)') + ggtitle(paste0('My current (primary) position is: ','n = ',sum(temp$n))) + coord_flip() +
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size)) +
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q2.My.current.position.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
+pCP
+dev.off()
+
+# make a Neuro Discipline plot
+# Neuro Discipline questions are Yes/No questions ==> just need the ones who answered with Yes
+temp = data %>% select('Response ID',NeuroDisciplineCat,NeuroDiscipline) %>% 
+  unique() %>% mutate_if(is.factor, replace_factor_na)
+
+# change other colum to Yes/No column
+idx = grepl('Other',temp$NeuroDisciplineCat) & !grepl('Not answered',temp$NeuroDiscipline)
+temp$NeuroDiscipline[idx] = c('Yes')
+idx = grepl('Other',temp$NeuroDisciplineCat) & grepl('Not answered',temp$NeuroDiscipline)
+temp$NeuroDiscipline[idx] = c('No')
+
+temp = temp %>% filter(NeuroDiscipline != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+       mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(NeuroDisciplineCat) %>% summarise(n_abs = sum(n))
+temp = temp %>% filter(NeuroDiscipline == 'Yes')
+
+pND = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(NeuroDisciplineCat,percent,function(x) max(x[NeuroDiscipline == 'Yes'],na.rm = TRUE)), y = percent,
+                     group=NeuroDiscipline,fill=NeuroDiscipline), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=NeuroDisciplineCat, y = percent,group=NeuroDiscipline,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(aes(x=NeuroDisciplineCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('Which neuroscience discipline(s) describe(s) your work or research best? ','n = ',unique(temp_abs$n_abs)),width = 70)) +
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) +
+  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q3.Which.neuroscience.discipline.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
+pND
+dev.off()
+
+
+# make a Fimiliar Data Types plot
+# Fimiliar Data Types are Yes/No questions ==> just need the ones who answered with Yes ==> or?
+temp = data %>% select('Response ID',FimilarDataTypesCat,FimilarDataTypes)  %>% unique() %>% mutate_if(is.factor, replace_factor_na)
+
+# change other colum to Yes/No column
+idx = grepl('Other',temp$FimilarDataTypesCat) & !grepl('Not answered',temp$FimilarDataTypes)
+temp$FimilarDataTypes[idx] = c('Yes')
+idx = grepl('Other',temp$FimilarDataTypesCat) & grepl('Not answered',temp$FimilarDataTypes)
+temp$FimilarDataTypes[idx] = c('No')
+
+temp = temp %>% filter(FimilarDataTypes != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(FimilarDataTypesCat) %>% summarise(n_abs = sum(n))
+temp = temp %>% filter(FimilarDataTypes == 'Yes')
+
+pFD = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(FimilarDataTypesCat,percent,function(x) max(x[FimilarDataTypes == 'Yes'],na.rm = TRUE)), y = percent,
+                     group=FimilarDataTypes,fill=FimilarDataTypes), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=FimilarDataTypesCat, y = percent,group=FimilarDataTypes,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(aes(x=FimilarDataTypesCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('Please state if your work includes one or several of the following recording methods or data types: ','n = ',unique(temp_abs$n_abs)),width = 100)) +
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) +
+  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q4.Please.state.if.your.work.includes.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
+pFD
+dev.off()
+
+
+#### Tools ####
+# question Q6 For which tasks would you wish to have a tool or standard? is not considered do to the large number of different answer posibilities
+
+# more elegant
+data0 = neuro_data %>% dplyr::select('Response ID',
+                                     starts_with('For which of these tasks '),
+                                     starts_with('To what degree do you ')
+)
+
+comb_string_vec = c('For which of these tasks ',
+                    'To what degree do you ')
+
+comb_col_names = c('TaskStandardTools',
+                   'TaskStandardToolsDegree')
+
+data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
+
+# make a Task Standard Tools plot
+# Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
+# remove Comment columes
+temp = data %>% select('Response ID',TaskStandardToolsCat,TaskStandardTools) %>% filter(!grepl('Comment',TaskStandardToolsCat)) %>% 
+  mutate_if(is.factor, replace_factor_na) %>% unique() %>% droplevels()
+
+temp = temp %>% filter(TaskStandardTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(TaskStandardToolsCat) %>% summarise(n_abs = sum(n))
+temp = temp %>% filter(TaskStandardTools == 'Yes')
+
+pTST = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(TaskStandardToolsCat,percent,function(x) max(x[TaskStandardTools == 'Yes'],na.rm = TRUE)), y = percent,
+                     group=TaskStandardTools,fill=TaskStandardTools), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=TaskStandardToolsCat, y = percent,group=TaskStandardTools,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(aes(x=TaskStandardToolsCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('For which of these tasks do you use available tools or standards? ','n = ',unique(temp_abs$n_abs)),width = 100)) +
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) +
+  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q5.For.which.of.these.tasks.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
+pTST
+dev.off()
+
+# make a Task Standard Tools Degree plot 
+temp = data %>% select( 'Response ID',TaskStandardToolsDegreeCat,TaskStandardToolsDegree) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
+
+# change some long level names
+temp$TaskStandardToolsDegree = recode_factor(temp$TaskStandardToolsDegree, 'Not at all - I use my own costum solutions' = 'Use my own solutions', 
+                                             'This is not relevant for my scientific work' = 'Not relevant for my work')
+
+# change level order
+levels(temp$TaskStandardToolsDegree)
+temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'As much as possible')
+temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Mostly')
+temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Occasionally')
+temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Use my own solutions')
+temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not relevant for my work')
+temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not answered')
+
+temp = temp %>% filter(TaskStandardToolsDegree != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(TaskStandardToolsDegreeCat) %>% summarise(n_abs = sum(n))
+
+pTSD = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(TaskStandardToolsDegreeCat,percent,function(x) max(x[TaskStandardToolsDegree == 'As much as possible'],na.rm = TRUE)), y = percent,
+                     group=TaskStandardToolsDegree,fill=TaskStandardToolsDegree), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
+  geom_text(aes(x=TaskStandardToolsDegreeCat, y = percent,group=TaskStandardToolsDegree,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(data = temp_abs,aes(x=TaskStandardToolsDegreeCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('To what degree do you use available tools or standards?'),width = 100)) +
+  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
+  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q7.To.what.degree.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,units = "cm",
+              res = 600,scaling=0.75)
+pTSD
+dev.off()
+
+#### sharing and reusing data ####
+# recreate different datasets
+# more elegant
+data0 = neuro_data %>% dplyr::select( 'Response ID',
+                                      starts_with('Have you shared data with'),
+                                      starts_with('Do you have existing data'),
+                                      starts_with('Think of re-using data'),
+                                      starts_with('Think of data sharing')
+)
+
+comb_string_vec = c('Have you shared data with',
+                    'Do you have existing data',
+                    'Think of re-using data',
+                    'Think of data sharing')
+comb_col_names = c('SharingData',
+                   'ExistingData',
+                   'ThinkReusingData',
+                   'ThinkSharingData')
+
+data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
+
+# Data sharing plot
+temp = data %>% select( 'Response ID',SharingDataCat,SharingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
+
+temp = temp %>% filter(SharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(SharingDataCat) %>% summarise(n_abs = sum(n))
+temp = temp %>% filter(SharingData == 'Yes')
+
+pSD = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(SharingDataCat,percent,function(x) max(x[SharingData == 'Yes'],na.rm = TRUE)), y = percent,
+                     group=SharingData,fill=SharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=SharingDataCat, y = percent,group=SharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(aes(x=SharingDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('Have you shared data with... ','n = ',unique(temp_abs$n_abs)),width = 60)) +
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) +
+  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q8.Have.you.shared.data.with.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
+pSD
+dev.off()
+
+# Existing Data plot
+temp = data %>% select( 'Response ID',ExistingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
+
+temp = temp %>% filter(ExistingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+
+pED = ggplot(data=temp) + 
+  geom_histogram(mapping=aes(x=reorder(ExistingData,percent),y=percent),
+                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
+  ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=ExistingData,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size) +
+  geom_text(aes(x=ExistingData,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() +
+  ggtitle(str_wrap(paste0('Do you have existing data sets (experiments) that should be kept alive by making them available for reuse? ','n = ',sum(temp$n)),width = 70)) +
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size)) +
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)})
+
+ragg::agg_tiff('Q9.Do.you.have.existing.data.sets.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
+pED
+dev.off()
+
+# make a Think of Reusing Data plot 
+temp = data %>% select( 'Response ID',ThinkReusingDataCat,ThinkReusingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
+
+# change level order
+temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Undecided')
+temp$ThinkReusingData = relevel(temp$ThinkReusingData,'No')
+temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Not answered')
+
+temp = temp %>% filter(ThinkReusingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(ThinkReusingDataCat) %>% summarise(n_abs = sum(n))
+
+pTRD = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(ThinkReusingDataCat,percent,function(x) max(x[ThinkReusingData == 'Yes'],na.rm = TRUE)), y = percent,
+                     group=ThinkReusingData,fill=ThinkReusingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
+  geom_text(aes(x=ThinkReusingDataCat, y = percent,group=ThinkReusingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(data = temp_abs,aes(x=ThinkReusingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('Think of re-using data from repositories.'),width = 100)) +
+  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
+  scale_color_manual(values=rev(color_pal_vec[1:3]))  + scale_fill_manual(values = rev(color_pal_vec[1:3])) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q10.Think.of.re.using.data.from.repositories.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", 
+              res = 600,scaling = 0.75)
+pTRD
+dev.off()
+
+# make a Think of Sharing Data plot 
+temp = data %>% select( 'Response ID',ThinkSharingDataCat,ThinkSharingData) %>% mutate_if(is.factor, replace_factor_na) %>%  unique()
+
+# change level order
+temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Undecided')
+temp$ThinkSharingData = relevel(temp$ThinkSharingData,'No')
+temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Not answered')
+
+temp = temp %>% filter(ThinkSharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(ThinkSharingDataCat) %>% summarise(n_abs = sum(n))
+
+pTSD = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(ThinkSharingDataCat,percent,function(x) max(x[ThinkSharingData == 'Yes'],na.rm = TRUE)), y = percent,
+                     group=ThinkSharingData,fill=ThinkSharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
+  geom_text(aes(x=ThinkSharingDataCat, y = percent,group=ThinkSharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(data = temp_abs,aes(x=ThinkSharingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('Think of data sharing with researchers who are NOT direct collaborators.'),width = 100)) +
+  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
+  scale_color_manual(values=rev(color_pal_vec[1:3]))  + scale_fill_manual(values = rev(color_pal_vec[1:3])) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)})
+
+
+ragg::agg_tiff('Q11.Think.of.sharing.with.researchers.who.are.NOT.direct.collaborators.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,
+              units = "cm", res = 600,scaling = 0.75)
+pTSD
+dev.off()
+
+#### Data repositories and analyses ####
+# more elegant
+data0 = neuro_data %>% dplyr::select( 'Response ID',
+                                      starts_with('Please indicate:'),
+                                      starts_with('How do you process and analyze your data')
+)
+
+comb_string_vec = c('Please indicate:',
+                    'How do you process and analyze your data')
+comb_col_names = c('SharingProblems',
+                   'HowAnalyzeData')
+
+data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
+
+# make a Sharing Data Problems plot 
+temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>%
+  filter(as.numeric(SharingProblemsCat) != 6 & as.numeric(SharingProblemsCat) != 7) %>% droplevels() 
+
+# change level order
+levels(temp$SharingProblemsCat)
+temp$SharingProblems = relevel(temp$SharingProblems,'Rather agree')
+temp$SharingProblems = relevel(temp$SharingProblems,'Undecided')
+temp$SharingProblems = relevel(temp$SharingProblems,'Rather disagree')
+temp$SharingProblems = relevel(temp$SharingProblems,'Fully disagree')
+temp$SharingProblems = relevel(temp$SharingProblems,'Not answered')
+
+temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n))
+
+pSP = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Fully agree'],na.rm = TRUE)), y = percent,
+                     group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
+  geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('Sharing problems. Please indicate: '),width = 100)) +
+  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
+  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q12.Sharing.problems.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling =0.75)
+pSP
+dev.off()
+
+# make a How to exercise plot 
+temp = data %>% select( 'Response ID',HowAnalyzeDataCat,HowAnalyzeData) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
+
+# change the specific answers to yes
+idx = grepl('Other',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData)
+temp$HowAnalyzeData[idx] = c('Yes')
+tempID = temp$'Response ID'[grepl('Manual inspection and analysis',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData)]
+idx = grepl('Other',temp$HowAnalyzeDataCat) & grepl('Not answered',temp$HowAnalyzeData) & (temp$'Response ID' %in% tempID)
+temp$HowAnalyzeData[idx] = c('No')
+
+temp = temp %>% filter(HowAnalyzeData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(HowAnalyzeDataCat) %>% summarise(n_abs = sum(n))
+temp = temp %>% filter(HowAnalyzeData == 'Yes')
+
+pHAD = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(HowAnalyzeDataCat,percent,function(x) max(x[HowAnalyzeData == 'Yes'],na.rm = TRUE)), y = percent,
+                     group=HowAnalyzeData,fill=HowAnalyzeData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=HowAnalyzeDataCat, y = percent,group=HowAnalyzeData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(aes(x=HowAnalyzeDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('How do you process and analyze your data? ','n = ',unique(temp_abs$n_abs)),width = 60)) +
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) +
+  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q13.How.do.you.process.and.analyze.your.data.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
+pHAD
+dev.off()
+
+# make publishing plot
+temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>% 
+  filter(as.numeric(SharingProblemsCat) == 6 | as.numeric(SharingProblemsCat) == 7)
+
+# change level order
+temp$SharingProblems = relevel(temp$SharingProblems,'Undecided')
+temp$SharingProblems = relevel(temp$SharingProblems,'No')
+temp$SharingProblems = relevel(temp$SharingProblems,'Not answered')
+
+temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n))
+
+pSO = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Yes'],na.rm = TRUE)), y = percent,
+                     group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
+  geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('Sharing opinions. Please indicate: ','n = ',unique(temp_abs$n_abs)),width = 60)) +
+  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
+  scale_color_manual(values=rev(color_pal_vec[1:3]))  + scale_fill_manual(values = rev(color_pal_vec[1:3])) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q14.Sharing.opinions.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+3.5, units = "cm", res = 600)
+pSO
+dev.off()
+
+#### Research data management in general ####
+# more elegant
+data0 = neuro_data %>% dplyr::select( 'Response ID',
+                                      starts_with('What is your opinion'),
+                                      starts_with('Applying research data management'),
+                                      starts_with('When do you employ'),
+                                      starts_with('Do you have dedicated'),
+                                      starts_with('How much time do you currently'),
+                                      starts_with('Please rank the top'))
+
+comb_string_vec = c('What is your opinion',
+                    'Applying research data management',
+                    'When do you employ',
+                    'Do you have dedicated',
+                    'How much time do you currently',
+                    'Please rank the top')
+comb_col_names = c('StatementsOpinion',
+                   'ApplyDataManagement',
+                   'EmployTools',
+                   'DedicatedPersonal',
+                   'TimeConsum',
+                   'TopSharingProblems')
+
+data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
+
+# make a What is your opinion plot 
+temp = data %>% select( 'Response ID',StatementsOpinionCat,StatementsOpinion) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()
+
+# change level order
+levels(droplevels(temp$StatementsOpinion))
+temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather agree')
+temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Undecided')
+temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather disagree')
+temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Fully disagree')
+temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Not answered')
+
+temp = temp %>% filter(StatementsOpinion != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(StatementsOpinionCat) %>% summarise(n_abs = sum(n))
+
+pOS = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(StatementsOpinionCat,percent,function(x) max(x[StatementsOpinion == 'Fully agree'],na.rm = TRUE)), y = percent,
+                     group=StatementsOpinion,fill=StatementsOpinion), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
+  geom_text(aes(x=StatementsOpinionCat, y = percent,group=StatementsOpinion,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(data = temp_abs,aes(x=StatementsOpinionCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('What is your opinion on the following statements?'),width = 100)) +
+  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
+  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 35)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q15.What.is.your.opinion.on.the.following.statements.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, 
+              units = "cm", res = 600,scaling = 0.75)
+pOS
+dev.off()
+
+# make a Apply Data Management plot 
+temp = data %>% select( 'Response ID',ApplyDataManagementCat,ApplyDataManagement) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()
+
+# change level order
+temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather agree')
+temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Undecided')
+temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather disagree')
+temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Fully disagree')
+temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Not answered')
+
+temp = temp %>% filter(ApplyDataManagement != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(ApplyDataManagementCat) %>% summarise(n_abs = sum(n))
+
+pARM = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(ApplyDataManagementCat,percent,function(x) max(x[ApplyDataManagement == 'Fully agree'],na.rm = TRUE)), y = percent,
+                     group=ApplyDataManagement,fill=ApplyDataManagement), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
+  geom_text(aes(x=ApplyDataManagementCat, y = percent,group=ApplyDataManagement,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(data = temp_abs,aes(x=ApplyDataManagementCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('Applying research data management...'),width = 100)) +
+  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
+  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q16.Applying.research.data.management.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm",
+              res = 600,scaling = 0.75)
+pARM
+dev.off()
+
+# make When do you employ research data management tools and services in your research plot 
+temp = data %>% select( 'Response ID',EmployTools) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()
+
+temp = temp %>% filter(EmployTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2))
+
+pWOT = ggplot(data=temp) + 
+  geom_histogram(mapping=aes(x=reorder(EmployTools,percent),y=percent),
+                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
+  ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=EmployTools,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size) +
+  geom_text(aes(x=EmployTools,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() +
+  ggtitle(str_wrap(paste0('When do you employ research data management tools and services in your research? ','n = ',sum(temp$n)),width = 70)) + 
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size)) +
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
+
+ragg::agg_tiff('Q17.When.do.you.employ.research.data.management.tools.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
+pWOT
+dev.off()
+
+# make Do you have dedicated personnel with research data management or data curation expertise plot 
+temp = data %>% select( 'Response ID',DedicatedPersonal) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()
+
+temp = temp %>% filter(DedicatedPersonal != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2))
+
+pDP = ggplot(data=temp) + 
+  geom_histogram(mapping=aes(x=reorder(DedicatedPersonal,percent),y=percent),
+                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
+  ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size) +
+  geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() +
+  ggtitle(str_wrap(paste0('Do you have dedicated personnel with research data management or data curation expertise? ','n = ',sum(temp$n)),width = 70)) + 
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size)) +
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
+
+ragg::agg_tiff('Q18.Do.you.have.dedicated.personnel.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
+pDP
+dev.off()
+
+# make How much time do you currently need to ready a dataset from your lab for publication and re-use plot 
+temp = data %>% select( 'Response ID',TimeConsum) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()
+
+temp = temp %>% filter(TimeConsum != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2))
+
+pHMT = ggplot(data=temp) + 
+  geom_histogram(mapping=aes(x=reorder(TimeConsum,percent),y=percent),
+                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
+  ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=TimeConsum,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size) +
+  geom_text(aes(x=TimeConsum,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() +
+  ggtitle(str_wrap(paste0('How much time do you currently need to ready a dataset from your lab for publication and re-use? ','n = ',sum(temp$n)),width = 70)) + 
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size)) +
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
+
+ragg::agg_tiff('Q19.How.much.time.do.you.currently.need.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
+pHMT
+dev.off()
+
+
+# make a Top Sharing Data Problems plot
+# hier würde ich die nicht angegebenen werte wirklich entfernen alles andere sieht
+# dämlich aus und macht unnötig Probleme bzw ist schwer zu sehen was nun die wichtigen Dinge sind
+temp = data %>% select( 'Response ID',TopSharingProblemsCat,TopSharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()
+
+# change level order
+temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 2')
+temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 3')
+temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 4')
+temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 5')
+
+temp = temp %>% filter(TopSharingProblems != 'Not answered') %>% group_by_at(vars('TopSharingProblems','TopSharingProblemsCat')) %>% summarise(n = n()) %>% droplevels() %>%
+  mutate(percent = round(n / sum(n) * 100,2)) 
+temp_abs = temp %>% group_by(TopSharingProblems) %>% summarise(n_abs = sum(n))
+
+pTSP = ggplot(data=temp) + 
+  geom_histogram(aes(x=reorder(TopSharingProblems,percent,function(x) max(x[TopSharingProblemsCat == 'Rank 1'],na.rm = TRUE)), y = percent,
+                     group=TopSharingProblemsCat,fill=TopSharingProblemsCat), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
+  geom_text(aes(x=TopSharingProblems, y = percent,group=TopSharingProblemsCat,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size,) +
+  geom_text(data = temp_abs,aes(x=TopSharingProblems,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() + 
+  ggtitle(str_wrap(paste0('Please rank the top 5 most pressing issues: '),width = 100)) +
+  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size),
+        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
+  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
+
+ragg::agg_tiff('Q20.Please.rank.the.top.most.pressing.issues.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", 
+              res = 600,scaling = 0.75)
+pTSP
+dev.off()
+
+#### Membership ####
+# more elegant
+data0 = neuro_data %>% dplyr::select( 'Response ID',
+                                      starts_with('Are you member of'))
+
+comb_string_vec = c('Are you member of')
+comb_col_names = c('MemberOf')
+
+data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
+
+# make a Are you member of, or otherwise involved in, other NFDI consortia or initiatives plot 
+temp = data %>% select( 'Response ID',MemberOf) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()
+
+temp = temp %>% filter(MemberOf != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
+  mutate(percent = round(n / sum(n) * 100,2))
+
+pMO = ggplot(data=temp) + 
+  geom_histogram(mapping=aes(x=reorder(MemberOf,percent),y=percent),
+                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
+  ylim(0,max(temp$percent)+5) +
+  geom_text(aes(x=MemberOf,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
+            colour = "white", size = glob_insideBar_text_size) +
+  geom_text(aes(x=MemberOf,y=percent,label = paste0('n=',n)),hjust = -0.25,
+            colour = "black", size = glob_insideBar_text_size) +
+  xlab('') + ylab('percent (%)') + coord_flip() +
+  ggtitle(str_wrap(paste0('Are you member of, or otherwise involved in, other NFDI consortia or initiatives? ','n = ',sum(temp$n)),width = 100)) + 
+  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
+        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
+        axis.title = element_text(size = glob_text_size),
+        plot.title = element_text(size = glob_title_text_size)) +
+  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
+
+ragg::agg_tiff('Q21.Are.you.member.of.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
+pMO
+dev.off()

+ 94 - 0
datacite.yml

@@ -0,0 +1,94 @@
+authors:
+  -
+    firstname: Carsten
+    lastname: Klingner
+    affiliation: Hans Berger Department of Neurology, Jena University Hospital, Germany; Biomagnetic Center, Jena University Hospital, Germany
+  -
+    firstname: Michael
+    lastname: Denker
+    affiliation: Institute of Neuroscience and Medicine (INM-6) and Institute for Advanced Simulation (IAS-6) and JARA-Institute Brain Structure-Function Relationships (INM-10), Jülich Research Centre, Jülich, Germany
+  -
+    firstname: Sonja
+    lastname: Grün
+    affiliation: Institute of Neuroscience and Medicine (INM-6) and Institute for Advanced Simulation (IAS-6) and JARA-Institute Brain Structure-Function Relationships (INM-10), Jülich Research Centre, Jülich, Germany; Theoretical Systems Neurobiology, RWTH Aachen University, Aachen, Germany
+  -
+    firstname: Michael
+    lastname: Hanke
+    affiliation: Institute of Neuroscience and Medicine, Brain & Behaviour (INM-7), Research Center Jülich, Jülich, Germany; Institute of Systems Neuroscience, Medical Faculty, Heinrich Heine University Düsseldorf, Düsseldorf, Germany
+  -
+    firstname: Steffen
+    lastname: Oeltze-Jafra
+    affiliation: Department of Neurology, Otto von Guericke University Magdeburg, Germany; Peter L. Reichertz Institute for Medical Informatics, Hannover Medical School, Germany
+  -
+    firstname: Frank W.
+    lastname: Ohl
+    affiliation: Leibniz Institute for Neurobiology (LIN), Magdeburg Germany; Center for Behavioral Brain Science (CBBS), Magdeburg Germany
+  -
+    firstname: Janina
+    lastname: Radny
+    affiliation: Bernstein Coordination Site, INM-6 - Forschungszentrum Jülich, Germany; University of Freiburg, Germany
+  -
+    firstname: Stefan
+    lastname: Rotter
+    affiliation: Bernstein Center Freiburg & Faculty of Biology, University of Freiburg, Germany
+  -
+    firstname: Hansjörg
+    lastname: Scherberger
+    affiliation: Deutsches Primatenzentrum GmbH – Leibniz-Institut für Primatenforschung, Göttingen; Faculty of Biology and Psychology, University of Goettingen, Germany
+  -
+    firstname: Alexandra
+    lastname: Stein
+    affiliation: Bernstein Coordination Site, INM-6 - Forschungszentrum Jülich, Germany
+  -
+    firstname: Thomas
+    lastname: Wachtler
+    affiliation: Faculty of Biology, Ludwig-Maximilians-Universität München
+  -
+    firstname: Otto W.
+    lastname: Witte
+    affiliation: Hans Berger Department of Neurology, Jena University Hospital, Germany
+  -
+    firstname: Petra
+    lastname: Ritter
+    affiliation: Berlin Institute of Health at Charité - Universitätsmedizin Berlin, Berlin, Germany; Charité - Universitätsmedizin Berlin, Corporate Member of Freie Universität Berlin and Humboldt-Universität zu Berlin, Department of Neurology with Experimental Neurology, Brain Simulation Section, Berlin, Germany; Bernstein Center for Computational Neuroscience Berlin, Berlin, Germany; Einstein Center for Neuroscience Berlin, Berlin, Germany; Einstein Center Digital Future, Berlin, Germany
+
+title: "NFDI-Neuro Survey Data"
+
+description: "The lack of reproducibility of research results is a serious problem – known as “the reproducibility crisis”. The German National Research Data Infrastructure (NFDI) initiative implemented by the German Research Foundation (DFG) aims to help overcoming this crisis by developing sustainable solutions for research data management (RDM). NFDI comprises domain specific consortia across all science disciplines. In the field of neuroscience, NFDI Neuroscience (NFDI-Neuro) contributes to the strengthening of systematic and standardized RDM in its research communities. NFDI-Neuro conducted a comprehensive survey amongst the neuroscience community to determine the current needs, challenges, and opinions with respect to RDM. The outcomes of this survey are presented here. The German neuroscience community perceives barriers with respect to RDM and data sharing mainly linked to (1) lack of data and metadata standards, (2) lack of community adopted provenance tracking methods, 3) lack of a privacy preserving research infrastructure for sensitive data (4) lack of RDM literacy and (5) lack of required time and resources for proper RDM. NFDI-Neuro aims to systematically address these barriers by leading and contributing to the development of standards, tools, and infrastructure and by providing training, education and support, as well as additional resources to its research community. The RDM work of NFDI-Neuro is conducted in close collaboration with its partner EBRAINS AISBL, the coordinating entity of the EU Flagship Human Brain Project, and its Research Infrastructure (RI) EBRAINS with more than 4500 registered users and developers from more than 30 countries. While NFDI-Neuro aims to address the national needs, it closely aligns with the international community and the topics of the Digital Europe Program and EU Data Spaces."
+
+keywords:
+  - Neuroscience
+  - "research data infrastructure"
+  - "data sharing"
+  - metadata
+  - provenance
+  - linkage
+  - lineage
+  - repositories
+  - "research data management"
+  - survey
+  - community
+
+license:
+  name: 'Creative Commons Attribution 4.0 International'
+  url: 'https://creativecommons.org/licenses/by/4.0/'
+
+funding:
+  - 'EU, H2020 Research and Innovation Action grants Human Brain Project SGA2 785907, SGA3 945539, VirtualBrainCloud 82642'
+  - 'EU, European Innovation Council grant PHRASE 101058240 and ERC 683049'
+  - 'Berlin Institute of Health & Foundation Charité'
+  - 'Johanna Quandt Excellence Initiative'
+  - 'DFG, SFB 1451 (project ID 431549029); SFB 1436 (project ID 425899996); SFB 1315 (project ID 327654276); SFB 936 (project ID 178316478); SFB-TRR 295 (project ID 424778381); SPP Computational Connectomics RI 2073/6-1, RI 2073/10-2, RI 2073/9-1; GRK 2150 (project 269953372)'
+  - 'BMBF, 01GQ1905'
+  - 'Helmholtz Metadata Collaboration (HMC)'
+  - 'Federal State of Saxony-Anhalt, FKZ: I 88'
+
+references:
+  -
+    id: 'doi::tba'
+    reftype: IsSupplementTo
+    citation: Klingner C, Denker M, Grün S, Hanke M, Oeltze-Jafra S, Ohl FW, Radny J, Rotter S, Scherberger H, Stein A, Wachtler T, Witte OW, Ritter P (2022) Overcoming the Reproducibility Crisis - Results of the first Community Survey of the German National Research Data Infrastructure for Neuroscience. BioRxiv.
+
+resourcetype: Dataset
+
+templateversion: 1.2

File diff suppressed because it is too large
+ 306 - 0
results-survey197421_alledaten1.csv