#### clean workspace #### rm(list=ls()) # The csv File has to be in the same directory setwd(dirname(rstudioapi::getSourceEditorContext()$path)) # clear the console cat("\014") color_pal_vec = c('#070778','#816c93','#808080','#0e6966','#5a2c40','#1b1b1b','#935430','#f43661','#171511') #### functions #### replace_factor_na <- function(x){ x <- as.character(x) x <- if_else(is.na(x), "Not answered", x) x <- as.factor(x) } wide_to_long_as <- function(data0,comb_string_vec,comb_col_names){ # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden library(data.table) for(i in seq(1,length(comb_string_vec),1)){ data0 = data.table::melt(as.data.table(data0), id= c(which(!grepl(comb_string_vec[i],colnames(data0),fixed = TRUE))), measure=list(grep(comb_string_vec[i],colnames(data0),fixed = TRUE)), variable.name = paste0(comb_col_names[i],'Cat'), value.name = comb_col_names[i],value.factor=TRUE) # make some nicer labels data0 = as.data.frame(data0) level_strings = levels(data0[,ncol(data0)-1]) # iterate over the level strings and update them for(s in seq(1,length(level_strings),1)){ level_string = level_strings[s] temp_start = unlist(gregexpr('\\[', level_string))[1] temp_end = tail(unlist(gregexpr('\\]', level_string)), n=1) if(length(temp_end) != -1 & length(temp_start) != -1){ level_string = substr(level_string,temp_start[1]+1,max(temp_end)-1) } level_strings[s] = level_string } # reset the labels levels(data0[,ncol(data0)-1]) = level_strings } data = data0 return(data) } #### load libraries #### library(ggplot2) library(dplyr) library(forcats) library(ggpubr) library(reshape2) library(stringr) library(ragg) # scaling text size for different picture sizes #### Load the data #### # catch NA strings #neuro_data <- read.csv("results-survey197421_nurkomplett.csv",row.names=NULL,na.strings=c("","N/A"),sep=',') neuro_data <- read.csv("results-survey197421_alledaten.csv",row.names=NULL,na.strings=c("","N/A"),sep=',',check.names = FALSE) neuro_data <- neuro_data[!is.na(neuro_data$'My current (primary) position is:'),] # Es geht um die Frage was die Leute die Daten teilen von den anderen Unterscheidet # und insgesamt um die Frage was es fuer Probleme gibt in unserer Dateninfrastruktur # set up some global values glob_insideBar_text_size = 2.75 glob_text_size = 8 glob_title_text_size = 8 glob_fig_width = 17.5 glob_dec_round = 0 #### About myself #### # more elegant data0 = neuro_data %>% dplyr::select('Response ID', starts_with('I work at'), starts_with('My current '), starts_with('Which neuroscience discipline(s) '), starts_with('Please state if your ') ) comb_string_vec = c('I work at', 'My current ', 'Which neuroscience discipline(s) ', 'Please state if your ') comb_col_names = c('WorkPlaces', 'CurrentPosition', 'NeuroDiscipline', 'FimilarDataTypes') data = wide_to_long_as(data0,comb_string_vec,comb_col_names) # make a WorkPlaces plot filter out the 'Other' answers temp = data %>% select('Response ID',WorkPlaces) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>% group_by(WorkPlaces) %>% filter(n() >= 3) %>% mutate(WorkPlaces = str_replace(WorkPlaces,", Helmholtz Center", "")) %>% mutate(WorkPlaces = str_replace(WorkPlaces,", Fraunhofer Institute", "")) temp = temp[1:218,] # remove [Other] answers temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) pWP = ggplot(data=temp) + geom_histogram(mapping=aes(x=reorder(WorkPlaces,percent),y=percent), stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) + ylim(0,max(temp$percent)+5) + geom_text(aes(x=WorkPlaces,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size) + geom_text(aes(x=WorkPlaces,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + ggtitle(paste0('I work at / I am affiliated with: ','n = ',sum(temp$n))) + coord_flip() + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size)) + scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) ragg::agg_tiff('Q1.I.work.at.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600) pWP dev.off() # make a Current Position plot and replace NaN with 'None' temp = data %>% select('Response ID',CurrentPosition) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>% filter(CurrentPosition != 'child neurologist') %>% filter(CurrentPosition != 'coodinator ') %>% filter(CurrentPosition != 'Clinician') %>% filter(CurrentPosition != 'Doctor') %>% filter(CurrentPosition != 'Emeritus Professor') %>% filter(CurrentPosition != 'medical doctor') %>% filter(CurrentPosition != 'Oberarzt') %>% filter(CurrentPosition != 'Therapist') temp = temp[1:218,] # remove [Other] answers temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) pCP = ggplot(data=temp) + geom_histogram(mapping=aes(x=reorder(CurrentPosition,percent),y = percent), stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) + ylim(0,max(temp$percent)+5) + geom_text(aes(x=CurrentPosition,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size) + geom_text(aes(x=CurrentPosition,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + ggtitle(paste0('My current (primary) position is: ','n = ',sum(temp$n))) + coord_flip() + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size)) + scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q2.My.current.position.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600) pCP dev.off() # make a Neuro Discipline plot # Neuro Discipline questions are Yes/No questions ==> just need the ones who answered with Yes temp = data %>% select('Response ID',NeuroDisciplineCat,NeuroDiscipline) %>% unique() %>% mutate_if(is.factor, replace_factor_na) # change other colum to Yes/No column idx = grepl('Other',temp$NeuroDisciplineCat) & !grepl('Not answered',temp$NeuroDiscipline) temp$NeuroDiscipline[idx] = c('Yes') idx = grepl('Other',temp$NeuroDisciplineCat) & grepl('Not answered',temp$NeuroDiscipline) temp$NeuroDiscipline[idx] = c('No') temp = temp %>% filter(NeuroDiscipline != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(NeuroDisciplineCat) %>% summarise(n_abs = sum(n)) temp = temp %>% filter(NeuroDiscipline == 'Yes') pND = ggplot(data=temp) + geom_histogram(aes(x=reorder(NeuroDisciplineCat,percent,function(x) max(x[NeuroDiscipline == 'Yes'],na.rm = TRUE)), y = percent, group=NeuroDiscipline,fill=NeuroDiscipline), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) + geom_text(aes(x=NeuroDisciplineCat, y = percent,group=NeuroDiscipline,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(aes(x=NeuroDisciplineCat,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Which neuroscience discipline(s) describe(s) your work or research best? ','n = ',unique(temp_abs$n_abs)),width = 70)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) + scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q3.Which.neuroscience.discipline.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600) pND dev.off() # make a Fimiliar Data Types plot # Fimiliar Data Types are Yes/No questions ==> just need the ones who answered with Yes ==> or? temp = data %>% select('Response ID',FimilarDataTypesCat,FimilarDataTypes) %>% unique() %>% mutate_if(is.factor, replace_factor_na) # change other colum to Yes/No column idx = grepl('Other',temp$FimilarDataTypesCat) & !grepl('Not answered',temp$FimilarDataTypes) temp$FimilarDataTypes[idx] = c('Yes') idx = grepl('Other',temp$FimilarDataTypesCat) & grepl('Not answered',temp$FimilarDataTypes) temp$FimilarDataTypes[idx] = c('No') temp = temp %>% filter(FimilarDataTypes != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(FimilarDataTypesCat) %>% summarise(n_abs = sum(n)) temp = temp %>% filter(FimilarDataTypes == 'Yes') pFD = ggplot(data=temp) + geom_histogram(aes(x=reorder(FimilarDataTypesCat,percent,function(x) max(x[FimilarDataTypes == 'Yes'],na.rm = TRUE)), y = percent, group=FimilarDataTypes,fill=FimilarDataTypes), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) + geom_text(aes(x=FimilarDataTypesCat, y = percent,group=FimilarDataTypes,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(aes(x=FimilarDataTypesCat,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Please state if your work includes one or several of the following recording methods or data types: ','n = ',unique(temp_abs$n_abs)),width = 100)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) + scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q4.Please.state.if.your.work.includes.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600) pFD dev.off() #### Tools #### # question Q6 For which tasks would you wish to have a tool or standard? is not considered do to the large number of different answer posibilities # more elegant data0 = neuro_data %>% dplyr::select('Response ID', starts_with('For which of these tasks '), starts_with('To what degree do you ') ) comb_string_vec = c('For which of these tasks ', 'To what degree do you ') comb_col_names = c('TaskStandardTools', 'TaskStandardToolsDegree') data = wide_to_long_as(data0,comb_string_vec,comb_col_names) # make a Task Standard Tools plot # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or? # remove Comment columes temp = data %>% select('Response ID',TaskStandardToolsCat,TaskStandardTools) %>% filter(!grepl('Comment',TaskStandardToolsCat)) %>% mutate_if(is.factor, replace_factor_na) %>% unique() %>% droplevels() temp = temp %>% filter(TaskStandardTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(TaskStandardToolsCat) %>% summarise(n_abs = sum(n)) temp = temp %>% filter(TaskStandardTools == 'Yes') pTST = ggplot(data=temp) + geom_histogram(aes(x=reorder(TaskStandardToolsCat,percent,function(x) max(x[TaskStandardTools == 'Yes'],na.rm = TRUE)), y = percent, group=TaskStandardTools,fill=TaskStandardTools), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) + geom_text(aes(x=TaskStandardToolsCat, y = percent,group=TaskStandardTools,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(aes(x=TaskStandardToolsCat,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('For which of these tasks do you use available tools or standards? ','n = ',unique(temp_abs$n_abs)),width = 100)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) + scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q5.For.which.of.these.tasks.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600) pTST dev.off() # make a Task Standard Tools Degree plot temp = data %>% select( 'Response ID',TaskStandardToolsDegreeCat,TaskStandardToolsDegree) %>% mutate_if(is.factor, replace_factor_na) %>% unique() # change some long level names temp$TaskStandardToolsDegree = recode_factor(temp$TaskStandardToolsDegree, 'Not at all - I use my own costum solutions' = 'Use my own solutions', 'This is not relevant for my scientific work' = 'Not relevant for my work') # change level order levels(temp$TaskStandardToolsDegree) temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'As much as possible') temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Mostly') temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Occasionally') temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Use my own solutions') temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not relevant for my work') temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not answered') temp = temp %>% filter(TaskStandardToolsDegree != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(TaskStandardToolsDegreeCat) %>% summarise(n_abs = sum(n)) pTSD = ggplot(data=temp) + geom_histogram(aes(x=reorder(TaskStandardToolsDegreeCat,percent,function(x) max(x[TaskStandardToolsDegree == 'As much as possible'],na.rm = TRUE)), y = percent, group=TaskStandardToolsDegree,fill=TaskStandardToolsDegree), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) + geom_text(aes(x=TaskStandardToolsDegreeCat, y = percent,group=TaskStandardToolsDegree,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(data = temp_abs,aes(x=TaskStandardToolsDegreeCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('To what degree do you use available tools or standards?'),width = 100)) + theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) + scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) + scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q7.To.what.degree.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,units = "cm", res = 600,scaling=0.75) pTSD dev.off() #### sharing and reusing data #### # recreate different datasets # more elegant data0 = neuro_data %>% dplyr::select( 'Response ID', starts_with('Have you shared data with'), starts_with('Do you have existing data'), starts_with('Think of re-using data'), starts_with('Think of data sharing') ) comb_string_vec = c('Have you shared data with', 'Do you have existing data', 'Think of re-using data', 'Think of data sharing') comb_col_names = c('SharingData', 'ExistingData', 'ThinkReusingData', 'ThinkSharingData') data = wide_to_long_as(data0,comb_string_vec,comb_col_names) # Data sharing plot temp = data %>% select( 'Response ID',SharingDataCat,SharingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique() temp = temp %>% filter(SharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(SharingDataCat) %>% summarise(n_abs = sum(n)) temp = temp %>% filter(SharingData == 'Yes') pSD = ggplot(data=temp) + geom_histogram(aes(x=reorder(SharingDataCat,percent,function(x) max(x[SharingData == 'Yes'],na.rm = TRUE)), y = percent, group=SharingData,fill=SharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) + geom_text(aes(x=SharingDataCat, y = percent,group=SharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(aes(x=SharingDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Have you shared data with... ','n = ',unique(temp_abs$n_abs)),width = 60)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) + scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q8.Have.you.shared.data.with.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600) pSD dev.off() # Existing Data plot temp = data %>% select( 'Response ID',ExistingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique() temp = temp %>% filter(ExistingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) pED = ggplot(data=temp) + geom_histogram(mapping=aes(x=reorder(ExistingData,percent),y=percent), stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) + ylim(0,max(temp$percent)+5) + geom_text(aes(x=ExistingData,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size) + geom_text(aes(x=ExistingData,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Do you have existing data sets (experiments) that should be kept alive by making them available for reuse? ','n = ',sum(temp$n)),width = 70)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size)) + scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) ragg::agg_tiff('Q9.Do.you.have.existing.data.sets.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600) pED dev.off() # make a Think of Reusing Data plot temp = data %>% select( 'Response ID',ThinkReusingDataCat,ThinkReusingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique() # change level order temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Undecided') temp$ThinkReusingData = relevel(temp$ThinkReusingData,'No') temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Not answered') temp = temp %>% filter(ThinkReusingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(ThinkReusingDataCat) %>% summarise(n_abs = sum(n)) pTRD = ggplot(data=temp) + geom_histogram(aes(x=reorder(ThinkReusingDataCat,percent,function(x) max(x[ThinkReusingData == 'Yes'],na.rm = TRUE)), y = percent, group=ThinkReusingData,fill=ThinkReusingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) + geom_text(aes(x=ThinkReusingDataCat, y = percent,group=ThinkReusingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(data = temp_abs,aes(x=ThinkReusingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Think of re-using data from repositories.'),width = 100)) + theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) + scale_color_manual(values=rev(color_pal_vec[1:3])) + scale_fill_manual(values = rev(color_pal_vec[1:3])) + scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q10.Think.of.re.using.data.from.repositories.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling = 0.75) pTRD dev.off() # make a Think of Sharing Data plot temp = data %>% select( 'Response ID',ThinkSharingDataCat,ThinkSharingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique() # change level order temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Undecided') temp$ThinkSharingData = relevel(temp$ThinkSharingData,'No') temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Not answered') temp = temp %>% filter(ThinkSharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(ThinkSharingDataCat) %>% summarise(n_abs = sum(n)) pTSD = ggplot(data=temp) + geom_histogram(aes(x=reorder(ThinkSharingDataCat,percent,function(x) max(x[ThinkSharingData == 'Yes'],na.rm = TRUE)), y = percent, group=ThinkSharingData,fill=ThinkSharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) + geom_text(aes(x=ThinkSharingDataCat, y = percent,group=ThinkSharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(data = temp_abs,aes(x=ThinkSharingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Think of data sharing with researchers who are NOT direct collaborators.'),width = 100)) + theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) + scale_color_manual(values=rev(color_pal_vec[1:3])) + scale_fill_manual(values = rev(color_pal_vec[1:3])) + scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) ragg::agg_tiff('Q11.Think.of.sharing.with.researchers.who.are.NOT.direct.collaborators.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling = 0.75) pTSD dev.off() #### Data repositories and analyses #### # more elegant data0 = neuro_data %>% dplyr::select( 'Response ID', starts_with('Please indicate:'), starts_with('How do you process and analyze your data') ) comb_string_vec = c('Please indicate:', 'How do you process and analyze your data') comb_col_names = c('SharingProblems', 'HowAnalyzeData') data = wide_to_long_as(data0,comb_string_vec,comb_col_names) # make a Sharing Data Problems plot temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>% filter(as.numeric(SharingProblemsCat) != 6 & as.numeric(SharingProblemsCat) != 7) %>% droplevels() # change level order levels(temp$SharingProblemsCat) temp$SharingProblems = relevel(temp$SharingProblems,'Rather agree') temp$SharingProblems = relevel(temp$SharingProblems,'Undecided') temp$SharingProblems = relevel(temp$SharingProblems,'Rather disagree') temp$SharingProblems = relevel(temp$SharingProblems,'Fully disagree') temp$SharingProblems = relevel(temp$SharingProblems,'Not answered') temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n)) pSP = ggplot(data=temp) + geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Fully agree'],na.rm = TRUE)), y = percent, group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) + geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Sharing problems. Please indicate: '),width = 100)) + theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) + scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) + scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q12.Sharing.problems.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling =0.75) pSP dev.off() # make a How to exercise plot temp = data %>% select( 'Response ID',HowAnalyzeDataCat,HowAnalyzeData) %>% mutate_if(is.factor,replace_factor_na) %>% unique() # change the specific answers to yes idx = grepl('Other',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData) temp$HowAnalyzeData[idx] = c('Yes') tempID = temp$'Response ID'[grepl('Manual inspection and analysis',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData)] idx = grepl('Other',temp$HowAnalyzeDataCat) & grepl('Not answered',temp$HowAnalyzeData) & (temp$'Response ID' %in% tempID) temp$HowAnalyzeData[idx] = c('No') temp = temp %>% filter(HowAnalyzeData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(HowAnalyzeDataCat) %>% summarise(n_abs = sum(n)) temp = temp %>% filter(HowAnalyzeData == 'Yes') pHAD = ggplot(data=temp) + geom_histogram(aes(x=reorder(HowAnalyzeDataCat,percent,function(x) max(x[HowAnalyzeData == 'Yes'],na.rm = TRUE)), y = percent, group=HowAnalyzeData,fill=HowAnalyzeData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) + geom_text(aes(x=HowAnalyzeDataCat, y = percent,group=HowAnalyzeData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(aes(x=HowAnalyzeDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('How do you process and analyze your data? ','n = ',unique(temp_abs$n_abs)),width = 60)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) + scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q13.How.do.you.process.and.analyze.your.data.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600) pHAD dev.off() # make publishing plot temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>% filter(as.numeric(SharingProblemsCat) == 6 | as.numeric(SharingProblemsCat) == 7) # change level order temp$SharingProblems = relevel(temp$SharingProblems,'Undecided') temp$SharingProblems = relevel(temp$SharingProblems,'No') temp$SharingProblems = relevel(temp$SharingProblems,'Not answered') temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n)) pSO = ggplot(data=temp) + geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Yes'],na.rm = TRUE)), y = percent, group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) + geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Sharing opinions. Please indicate: ','n = ',unique(temp_abs$n_abs)),width = 60)) + theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) + scale_color_manual(values=rev(color_pal_vec[1:3])) + scale_fill_manual(values = rev(color_pal_vec[1:3])) + scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q14.Sharing.opinions.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+3.5, units = "cm", res = 600) pSO dev.off() #### Research data management in general #### # more elegant data0 = neuro_data %>% dplyr::select( 'Response ID', starts_with('What is your opinion'), starts_with('Applying research data management'), starts_with('When do you employ'), starts_with('Do you have dedicated'), starts_with('How much time do you currently'), starts_with('Please rank the top')) comb_string_vec = c('What is your opinion', 'Applying research data management', 'When do you employ', 'Do you have dedicated', 'How much time do you currently', 'Please rank the top') comb_col_names = c('StatementsOpinion', 'ApplyDataManagement', 'EmployTools', 'DedicatedPersonal', 'TimeConsum', 'TopSharingProblems') data = wide_to_long_as(data0,comb_string_vec,comb_col_names) # make a What is your opinion plot temp = data %>% select( 'Response ID',StatementsOpinionCat,StatementsOpinion) %>% mutate_if(is.factor,replace_factor_na) %>% unique() # change level order levels(droplevels(temp$StatementsOpinion)) temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather agree') temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Undecided') temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather disagree') temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Fully disagree') temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Not answered') temp = temp %>% filter(StatementsOpinion != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(StatementsOpinionCat) %>% summarise(n_abs = sum(n)) pOS = ggplot(data=temp) + geom_histogram(aes(x=reorder(StatementsOpinionCat,percent,function(x) max(x[StatementsOpinion == 'Fully agree'],na.rm = TRUE)), y = percent, group=StatementsOpinion,fill=StatementsOpinion), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) + geom_text(aes(x=StatementsOpinionCat, y = percent,group=StatementsOpinion,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(data = temp_abs,aes(x=StatementsOpinionCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('What is your opinion on the following statements?'),width = 100)) + theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) + scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) + scale_x_discrete(labels=function(x){str_wrap(x,width = 35)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q15.What.is.your.opinion.on.the.following.statements.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling = 0.75) pOS dev.off() # make a Apply Data Management plot temp = data %>% select( 'Response ID',ApplyDataManagementCat,ApplyDataManagement) %>% mutate_if(is.factor,replace_factor_na) %>% unique() # change level order temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather agree') temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Undecided') temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather disagree') temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Fully disagree') temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Not answered') temp = temp %>% filter(ApplyDataManagement != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(ApplyDataManagementCat) %>% summarise(n_abs = sum(n)) pARM = ggplot(data=temp) + geom_histogram(aes(x=reorder(ApplyDataManagementCat,percent,function(x) max(x[ApplyDataManagement == 'Fully agree'],na.rm = TRUE)), y = percent, group=ApplyDataManagement,fill=ApplyDataManagement), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) + geom_text(aes(x=ApplyDataManagementCat, y = percent,group=ApplyDataManagement,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(data = temp_abs,aes(x=ApplyDataManagementCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Applying research data management...'),width = 100)) + theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) + scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) + scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q16.Applying.research.data.management.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling = 0.75) pARM dev.off() # make When do you employ research data management tools and services in your research plot temp = data %>% select( 'Response ID',EmployTools) %>% mutate_if(is.factor,replace_factor_na) %>% unique() temp = temp %>% filter(EmployTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) pWOT = ggplot(data=temp) + geom_histogram(mapping=aes(x=reorder(EmployTools,percent),y=percent), stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) + ylim(0,max(temp$percent)+5) + geom_text(aes(x=EmployTools,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size) + geom_text(aes(x=EmployTools,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('When do you employ research data management tools and services in your research? ','n = ',sum(temp$n)),width = 70)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size)) + scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) ragg::agg_tiff('Q17.When.do.you.employ.research.data.management.tools.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600) pWOT dev.off() # make Do you have dedicated personnel with research data management or data curation expertise plot temp = data %>% select( 'Response ID',DedicatedPersonal) %>% mutate_if(is.factor,replace_factor_na) %>% unique() temp = temp %>% filter(DedicatedPersonal != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) pDP = ggplot(data=temp) + geom_histogram(mapping=aes(x=reorder(DedicatedPersonal,percent),y=percent), stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) + ylim(0,max(temp$percent)+5) + geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size) + geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Do you have dedicated personnel with research data management or data curation expertise? ','n = ',sum(temp$n)),width = 70)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size)) + scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) ragg::agg_tiff('Q18.Do.you.have.dedicated.personnel.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600) pDP dev.off() # make How much time do you currently need to ready a dataset from your lab for publication and re-use plot temp = data %>% select( 'Response ID',TimeConsum) %>% mutate_if(is.factor,replace_factor_na) %>% unique() temp = temp %>% filter(TimeConsum != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) pHMT = ggplot(data=temp) + geom_histogram(mapping=aes(x=reorder(TimeConsum,percent),y=percent), stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) + ylim(0,max(temp$percent)+5) + geom_text(aes(x=TimeConsum,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size) + geom_text(aes(x=TimeConsum,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('How much time do you currently need to ready a dataset from your lab for publication and re-use? ','n = ',sum(temp$n)),width = 70)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size)) + scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) ragg::agg_tiff('Q19.How.much.time.do.you.currently.need.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600) pHMT dev.off() # make a Top Sharing Data Problems plot # hier würde ich die nicht angegebenen werte wirklich entfernen alles andere sieht # dämlich aus und macht unnötig Probleme bzw ist schwer zu sehen was nun die wichtigen Dinge sind temp = data %>% select( 'Response ID',TopSharingProblemsCat,TopSharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() # change level order temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 2') temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 3') temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 4') temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 5') temp = temp %>% filter(TopSharingProblems != 'Not answered') %>% group_by_at(vars('TopSharingProblems','TopSharingProblemsCat')) %>% summarise(n = n()) %>% droplevels() %>% mutate(percent = round(n / sum(n) * 100,2)) temp_abs = temp %>% group_by(TopSharingProblems) %>% summarise(n_abs = sum(n)) pTSP = ggplot(data=temp) + geom_histogram(aes(x=reorder(TopSharingProblems,percent,function(x) max(x[TopSharingProblemsCat == 'Rank 1'],na.rm = TRUE)), y = percent, group=TopSharingProblemsCat,fill=TopSharingProblemsCat), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) + geom_text(aes(x=TopSharingProblems, y = percent,group=TopSharingProblemsCat,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size,) + geom_text(data = temp_abs,aes(x=TopSharingProblems,y=100,label = paste0('n=',n_abs)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Please rank the top 5 most pressing issues: '),width = 100)) + theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size), legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) + scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) + scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels ragg::agg_tiff('Q20.Please.rank.the.top.most.pressing.issues.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling = 0.75) pTSP dev.off() #### Membership #### # more elegant data0 = neuro_data %>% dplyr::select( 'Response ID', starts_with('Are you member of')) comb_string_vec = c('Are you member of') comb_col_names = c('MemberOf') data = wide_to_long_as(data0,comb_string_vec,comb_col_names) # make a Are you member of, or otherwise involved in, other NFDI consortia or initiatives plot temp = data %>% select( 'Response ID',MemberOf) %>% mutate_if(is.factor,replace_factor_na) %>% unique() temp = temp %>% filter(MemberOf != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2)) pMO = ggplot(data=temp) + geom_histogram(mapping=aes(x=reorder(MemberOf,percent),y=percent), stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) + ylim(0,max(temp$percent)+5) + geom_text(aes(x=MemberOf,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5), colour = "white", size = glob_insideBar_text_size) + geom_text(aes(x=MemberOf,y=percent,label = paste0('n=',n)),hjust = -0.25, colour = "black", size = glob_insideBar_text_size) + xlab('') + ylab('percent (%)') + coord_flip() + ggtitle(str_wrap(paste0('Are you member of, or otherwise involved in, other NFDI consortia or initiatives? ','n = ',sum(temp$n)),width = 100)) + theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(), axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size), axis.title = element_text(size = glob_text_size), plot.title = element_text(size = glob_title_text_size)) + scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) ragg::agg_tiff('Q21.Are.you.member.of.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600) pMO dev.off()