123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833 |
- #### clean workspace ####
- rm(list=ls())
- # The csv File has to be in the same directory
- setwd(dirname(rstudioapi::getSourceEditorContext()$path))
- # clear the console
- cat("\014")
- color_pal_vec = c('#070778','#816c93','#808080','#0e6966','#5a2c40','#1b1b1b','#935430','#f43661','#171511')
- #### functions ####
- replace_factor_na <- function(x){
- x <- as.character(x)
- x <- if_else(is.na(x), "Not answered", x)
- x <- as.factor(x)
- }
- wide_to_long_as <- function(data0,comb_string_vec,comb_col_names){
- # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0),fixed = TRUE))),
- measure=list(grep(comb_string_vec[i],colnames(data0),fixed = TRUE)),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp_start = unlist(gregexpr('\\[', level_string))[1]
- temp_end = tail(unlist(gregexpr('\\]', level_string)), n=1)
- if(length(temp_end) != -1 & length(temp_start) != -1){
- level_string = substr(level_string,temp_start[1]+1,max(temp_end)-1)
- }
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
-
- return(data)
- }
- #### load libraries ####
- library(ggplot2)
- library(dplyr)
- library(forcats)
- library(ggpubr)
- library(reshape2)
- library(stringr)
- library(ragg) # scaling text size for different picture sizes
- #### Load the data ####
- # catch NA strings
- #neuro_data <- read.csv("results-survey197421_nurkomplett.csv",row.names=NULL,na.strings=c("","N/A"),sep=',')
- neuro_data <- read.csv("results-survey197421_alledaten.csv",row.names=NULL,na.strings=c("","N/A"),sep=',',check.names = FALSE)
- neuro_data <- neuro_data[!is.na(neuro_data$'My current (primary) position is:'),]
- # Es geht um die Frage was die Leute die Daten teilen von den anderen Unterscheidet
- # und insgesamt um die Frage was es fuer Probleme gibt in unserer Dateninfrastruktur
- # set up some global values
- glob_insideBar_text_size = 2.75
- glob_text_size = 8
- glob_title_text_size = 8
- glob_fig_width = 17.5
- glob_dec_round = 0
- #### About myself ####
- # more elegant
- data0 = neuro_data %>% dplyr::select('Response ID',
- starts_with('I work at'),
- starts_with('My current '),
- starts_with('Which neuroscience discipline(s) '),
- starts_with('Please state if your ')
- )
- comb_string_vec = c('I work at',
- 'My current ',
- 'Which neuroscience discipline(s) ',
- 'Please state if your ')
- comb_col_names = c('WorkPlaces',
- 'CurrentPosition',
- 'NeuroDiscipline',
- 'FimilarDataTypes')
- data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
- # make a WorkPlaces plot filter out the 'Other' answers
- temp = data %>% select('Response ID',WorkPlaces) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>% group_by(WorkPlaces) %>%
- filter(n() >= 3) %>% mutate(WorkPlaces = str_replace(WorkPlaces,", Helmholtz Center", "")) %>%
- mutate(WorkPlaces = str_replace(WorkPlaces,", Fraunhofer Institute", ""))
- temp = temp[1:218,] # remove [Other] answers
- temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2))
- pWP = ggplot(data=temp) +
- geom_histogram(mapping=aes(x=reorder(WorkPlaces,percent),y=percent),
- stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
- ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=WorkPlaces,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size) +
- geom_text(aes(x=WorkPlaces,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + ggtitle(paste0('I work at / I am affiliated with: ','n = ',sum(temp$n))) + coord_flip() +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size)) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 30)})
- ragg::agg_tiff('Q1.I.work.at.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
- pWP
- dev.off()
- # make a Current Position plot and replace NaN with 'None'
- temp = data %>% select('Response ID',CurrentPosition) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>%
- filter(CurrentPosition != 'child neurologist') %>%
- filter(CurrentPosition != 'coodinator ') %>%
- filter(CurrentPosition != 'Clinician') %>%
- filter(CurrentPosition != 'Doctor') %>%
- filter(CurrentPosition != 'Emeritus Professor') %>%
- filter(CurrentPosition != 'medical doctor') %>%
- filter(CurrentPosition != 'Oberarzt') %>%
- filter(CurrentPosition != 'Therapist')
- temp = temp[1:218,] # remove [Other] answers
- temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2))
- pCP = ggplot(data=temp) +
- geom_histogram(mapping=aes(x=reorder(CurrentPosition,percent),y = percent),
- stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
- ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=CurrentPosition,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size) +
- geom_text(aes(x=CurrentPosition,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + ggtitle(paste0('My current (primary) position is: ','n = ',sum(temp$n))) + coord_flip() +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size)) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q2.My.current.position.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
- pCP
- dev.off()
- # make a Neuro Discipline plot
- # Neuro Discipline questions are Yes/No questions ==> just need the ones who answered with Yes
- temp = data %>% select('Response ID',NeuroDisciplineCat,NeuroDiscipline) %>%
- unique() %>% mutate_if(is.factor, replace_factor_na)
- # change other colum to Yes/No column
- idx = grepl('Other',temp$NeuroDisciplineCat) & !grepl('Not answered',temp$NeuroDiscipline)
- temp$NeuroDiscipline[idx] = c('Yes')
- idx = grepl('Other',temp$NeuroDisciplineCat) & grepl('Not answered',temp$NeuroDiscipline)
- temp$NeuroDiscipline[idx] = c('No')
- temp = temp %>% filter(NeuroDiscipline != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(NeuroDisciplineCat) %>% summarise(n_abs = sum(n))
- temp = temp %>% filter(NeuroDiscipline == 'Yes')
- pND = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(NeuroDisciplineCat,percent,function(x) max(x[NeuroDiscipline == 'Yes'],na.rm = TRUE)), y = percent,
- group=NeuroDiscipline,fill=NeuroDiscipline), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=NeuroDisciplineCat, y = percent,group=NeuroDiscipline,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(aes(x=NeuroDisciplineCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Which neuroscience discipline(s) describe(s) your work or research best? ','n = ',unique(temp_abs$n_abs)),width = 70)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) +
- scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q3.Which.neuroscience.discipline.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
- pND
- dev.off()
- # make a Fimiliar Data Types plot
- # Fimiliar Data Types are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- temp = data %>% select('Response ID',FimilarDataTypesCat,FimilarDataTypes) %>% unique() %>% mutate_if(is.factor, replace_factor_na)
- # change other colum to Yes/No column
- idx = grepl('Other',temp$FimilarDataTypesCat) & !grepl('Not answered',temp$FimilarDataTypes)
- temp$FimilarDataTypes[idx] = c('Yes')
- idx = grepl('Other',temp$FimilarDataTypesCat) & grepl('Not answered',temp$FimilarDataTypes)
- temp$FimilarDataTypes[idx] = c('No')
- temp = temp %>% filter(FimilarDataTypes != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(FimilarDataTypesCat) %>% summarise(n_abs = sum(n))
- temp = temp %>% filter(FimilarDataTypes == 'Yes')
- pFD = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(FimilarDataTypesCat,percent,function(x) max(x[FimilarDataTypes == 'Yes'],na.rm = TRUE)), y = percent,
- group=FimilarDataTypes,fill=FimilarDataTypes), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=FimilarDataTypesCat, y = percent,group=FimilarDataTypes,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(aes(x=FimilarDataTypesCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Please state if your work includes one or several of the following recording methods or data types: ','n = ',unique(temp_abs$n_abs)),width = 100)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) +
- scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q4.Please.state.if.your.work.includes.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
- pFD
- dev.off()
- #### Tools ####
- # question Q6 For which tasks would you wish to have a tool or standard? is not considered do to the large number of different answer posibilities
- # more elegant
- data0 = neuro_data %>% dplyr::select('Response ID',
- starts_with('For which of these tasks '),
- starts_with('To what degree do you ')
- )
- comb_string_vec = c('For which of these tasks ',
- 'To what degree do you ')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree')
- data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
- # make a Task Standard Tools plot
- # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- # remove Comment columes
- temp = data %>% select('Response ID',TaskStandardToolsCat,TaskStandardTools) %>% filter(!grepl('Comment',TaskStandardToolsCat)) %>%
- mutate_if(is.factor, replace_factor_na) %>% unique() %>% droplevels()
- temp = temp %>% filter(TaskStandardTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(TaskStandardToolsCat) %>% summarise(n_abs = sum(n))
- temp = temp %>% filter(TaskStandardTools == 'Yes')
- pTST = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(TaskStandardToolsCat,percent,function(x) max(x[TaskStandardTools == 'Yes'],na.rm = TRUE)), y = percent,
- group=TaskStandardTools,fill=TaskStandardTools), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=TaskStandardToolsCat, y = percent,group=TaskStandardTools,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(aes(x=TaskStandardToolsCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('For which of these tasks do you use available tools or standards? ','n = ',unique(temp_abs$n_abs)),width = 100)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) +
- scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q5.For.which.of.these.tasks.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
- pTST
- dev.off()
- # make a Task Standard Tools Degree plot
- temp = data %>% select( 'Response ID',TaskStandardToolsDegreeCat,TaskStandardToolsDegree) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
- # change some long level names
- temp$TaskStandardToolsDegree = recode_factor(temp$TaskStandardToolsDegree, 'Not at all - I use my own costum solutions' = 'Use my own solutions',
- 'This is not relevant for my scientific work' = 'Not relevant for my work')
- # change level order
- levels(temp$TaskStandardToolsDegree)
- temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'As much as possible')
- temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Mostly')
- temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Occasionally')
- temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Use my own solutions')
- temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not relevant for my work')
- temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not answered')
- temp = temp %>% filter(TaskStandardToolsDegree != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(TaskStandardToolsDegreeCat) %>% summarise(n_abs = sum(n))
- pTSD = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(TaskStandardToolsDegreeCat,percent,function(x) max(x[TaskStandardToolsDegree == 'As much as possible'],na.rm = TRUE)), y = percent,
- group=TaskStandardToolsDegree,fill=TaskStandardToolsDegree), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
- geom_text(aes(x=TaskStandardToolsDegreeCat, y = percent,group=TaskStandardToolsDegree,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(data = temp_abs,aes(x=TaskStandardToolsDegreeCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('To what degree do you use available tools or standards?'),width = 100)) +
- theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
- scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q7.To.what.degree.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,units = "cm",
- res = 600,scaling=0.75)
- pTSD
- dev.off()
- #### sharing and reusing data ####
- # recreate different datasets
- # more elegant
- data0 = neuro_data %>% dplyr::select( 'Response ID',
- starts_with('Have you shared data with'),
- starts_with('Do you have existing data'),
- starts_with('Think of re-using data'),
- starts_with('Think of data sharing')
- )
- comb_string_vec = c('Have you shared data with',
- 'Do you have existing data',
- 'Think of re-using data',
- 'Think of data sharing')
- comb_col_names = c('SharingData',
- 'ExistingData',
- 'ThinkReusingData',
- 'ThinkSharingData')
- data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
- # Data sharing plot
- temp = data %>% select( 'Response ID',SharingDataCat,SharingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
- temp = temp %>% filter(SharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(SharingDataCat) %>% summarise(n_abs = sum(n))
- temp = temp %>% filter(SharingData == 'Yes')
- pSD = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(SharingDataCat,percent,function(x) max(x[SharingData == 'Yes'],na.rm = TRUE)), y = percent,
- group=SharingData,fill=SharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=SharingDataCat, y = percent,group=SharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(aes(x=SharingDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Have you shared data with... ','n = ',unique(temp_abs$n_abs)),width = 60)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) +
- scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q8.Have.you.shared.data.with.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
- pSD
- dev.off()
- # Existing Data plot
- temp = data %>% select( 'Response ID',ExistingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
- temp = temp %>% filter(ExistingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- pED = ggplot(data=temp) +
- geom_histogram(mapping=aes(x=reorder(ExistingData,percent),y=percent),
- stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
- ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=ExistingData,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size) +
- geom_text(aes(x=ExistingData,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Do you have existing data sets (experiments) that should be kept alive by making them available for reuse? ','n = ',sum(temp$n)),width = 70)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size)) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 40)})
- ragg::agg_tiff('Q9.Do.you.have.existing.data.sets.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
- pED
- dev.off()
- # make a Think of Reusing Data plot
- temp = data %>% select( 'Response ID',ThinkReusingDataCat,ThinkReusingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
- # change level order
- temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Undecided')
- temp$ThinkReusingData = relevel(temp$ThinkReusingData,'No')
- temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Not answered')
- temp = temp %>% filter(ThinkReusingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(ThinkReusingDataCat) %>% summarise(n_abs = sum(n))
- pTRD = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(ThinkReusingDataCat,percent,function(x) max(x[ThinkReusingData == 'Yes'],na.rm = TRUE)), y = percent,
- group=ThinkReusingData,fill=ThinkReusingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
- geom_text(aes(x=ThinkReusingDataCat, y = percent,group=ThinkReusingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(data = temp_abs,aes(x=ThinkReusingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Think of re-using data from repositories.'),width = 100)) +
- theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
- scale_color_manual(values=rev(color_pal_vec[1:3])) + scale_fill_manual(values = rev(color_pal_vec[1:3])) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q10.Think.of.re.using.data.from.repositories.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm",
- res = 600,scaling = 0.75)
- pTRD
- dev.off()
- # make a Think of Sharing Data plot
- temp = data %>% select( 'Response ID',ThinkSharingDataCat,ThinkSharingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()
- # change level order
- temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Undecided')
- temp$ThinkSharingData = relevel(temp$ThinkSharingData,'No')
- temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Not answered')
- temp = temp %>% filter(ThinkSharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(ThinkSharingDataCat) %>% summarise(n_abs = sum(n))
- pTSD = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(ThinkSharingDataCat,percent,function(x) max(x[ThinkSharingData == 'Yes'],na.rm = TRUE)), y = percent,
- group=ThinkSharingData,fill=ThinkSharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
- geom_text(aes(x=ThinkSharingDataCat, y = percent,group=ThinkSharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(data = temp_abs,aes(x=ThinkSharingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Think of data sharing with researchers who are NOT direct collaborators.'),width = 100)) +
- theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
- scale_color_manual(values=rev(color_pal_vec[1:3])) + scale_fill_manual(values = rev(color_pal_vec[1:3])) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 40)})
- ragg::agg_tiff('Q11.Think.of.sharing.with.researchers.who.are.NOT.direct.collaborators.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,
- units = "cm", res = 600,scaling = 0.75)
- pTSD
- dev.off()
- #### Data repositories and analyses ####
- # more elegant
- data0 = neuro_data %>% dplyr::select( 'Response ID',
- starts_with('Please indicate:'),
- starts_with('How do you process and analyze your data')
- )
- comb_string_vec = c('Please indicate:',
- 'How do you process and analyze your data')
- comb_col_names = c('SharingProblems',
- 'HowAnalyzeData')
- data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
- # make a Sharing Data Problems plot
- temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>%
- filter(as.numeric(SharingProblemsCat) != 6 & as.numeric(SharingProblemsCat) != 7) %>% droplevels()
- # change level order
- levels(temp$SharingProblemsCat)
- temp$SharingProblems = relevel(temp$SharingProblems,'Rather agree')
- temp$SharingProblems = relevel(temp$SharingProblems,'Undecided')
- temp$SharingProblems = relevel(temp$SharingProblems,'Rather disagree')
- temp$SharingProblems = relevel(temp$SharingProblems,'Fully disagree')
- temp$SharingProblems = relevel(temp$SharingProblems,'Not answered')
- temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n))
- pSP = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Fully agree'],na.rm = TRUE)), y = percent,
- group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
- geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Sharing problems. Please indicate: '),width = 100)) +
- theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
- scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q12.Sharing.problems.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling =0.75)
- pSP
- dev.off()
- # make a How to exercise plot
- temp = data %>% select( 'Response ID',HowAnalyzeDataCat,HowAnalyzeData) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
- # change the specific answers to yes
- idx = grepl('Other',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData)
- temp$HowAnalyzeData[idx] = c('Yes')
- tempID = temp$'Response ID'[grepl('Manual inspection and analysis',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData)]
- idx = grepl('Other',temp$HowAnalyzeDataCat) & grepl('Not answered',temp$HowAnalyzeData) & (temp$'Response ID' %in% tempID)
- temp$HowAnalyzeData[idx] = c('No')
- temp = temp %>% filter(HowAnalyzeData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(HowAnalyzeDataCat) %>% summarise(n_abs = sum(n))
- temp = temp %>% filter(HowAnalyzeData == 'Yes')
- pHAD = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(HowAnalyzeDataCat,percent,function(x) max(x[HowAnalyzeData == 'Yes'],na.rm = TRUE)), y = percent,
- group=HowAnalyzeData,fill=HowAnalyzeData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=HowAnalyzeDataCat, y = percent,group=HowAnalyzeData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(aes(x=HowAnalyzeDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('How do you process and analyze your data? ','n = ',unique(temp_abs$n_abs)),width = 60)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) +
- scale_color_manual(values=color_pal_vec) + scale_fill_manual(values = color_pal_vec) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q13.How.do.you.process.and.analyze.your.data.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
- pHAD
- dev.off()
- # make publishing plot
- temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>%
- filter(as.numeric(SharingProblemsCat) == 6 | as.numeric(SharingProblemsCat) == 7)
- # change level order
- temp$SharingProblems = relevel(temp$SharingProblems,'Undecided')
- temp$SharingProblems = relevel(temp$SharingProblems,'No')
- temp$SharingProblems = relevel(temp$SharingProblems,'Not answered')
- temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n))
- pSO = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Yes'],na.rm = TRUE)), y = percent,
- group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
- geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Sharing opinions. Please indicate: ','n = ',unique(temp_abs$n_abs)),width = 60)) +
- theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
- scale_color_manual(values=rev(color_pal_vec[1:3])) + scale_fill_manual(values = rev(color_pal_vec[1:3])) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q14.Sharing.opinions.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+3.5, units = "cm", res = 600)
- pSO
- dev.off()
- #### Research data management in general ####
- # more elegant
- data0 = neuro_data %>% dplyr::select( 'Response ID',
- starts_with('What is your opinion'),
- starts_with('Applying research data management'),
- starts_with('When do you employ'),
- starts_with('Do you have dedicated'),
- starts_with('How much time do you currently'),
- starts_with('Please rank the top'))
- comb_string_vec = c('What is your opinion',
- 'Applying research data management',
- 'When do you employ',
- 'Do you have dedicated',
- 'How much time do you currently',
- 'Please rank the top')
- comb_col_names = c('StatementsOpinion',
- 'ApplyDataManagement',
- 'EmployTools',
- 'DedicatedPersonal',
- 'TimeConsum',
- 'TopSharingProblems')
- data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
- # make a What is your opinion plot
- temp = data %>% select( 'Response ID',StatementsOpinionCat,StatementsOpinion) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
- # change level order
- levels(droplevels(temp$StatementsOpinion))
- temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather agree')
- temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Undecided')
- temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather disagree')
- temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Fully disagree')
- temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Not answered')
- temp = temp %>% filter(StatementsOpinion != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(StatementsOpinionCat) %>% summarise(n_abs = sum(n))
- pOS = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(StatementsOpinionCat,percent,function(x) max(x[StatementsOpinion == 'Fully agree'],na.rm = TRUE)), y = percent,
- group=StatementsOpinion,fill=StatementsOpinion), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
- geom_text(aes(x=StatementsOpinionCat, y = percent,group=StatementsOpinion,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(data = temp_abs,aes(x=StatementsOpinionCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('What is your opinion on the following statements?'),width = 100)) +
- theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
- scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 35)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q15.What.is.your.opinion.on.the.following.statements.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,
- units = "cm", res = 600,scaling = 0.75)
- pOS
- dev.off()
- # make a Apply Data Management plot
- temp = data %>% select( 'Response ID',ApplyDataManagementCat,ApplyDataManagement) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
- # change level order
- temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather agree')
- temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Undecided')
- temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather disagree')
- temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Fully disagree')
- temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Not answered')
- temp = temp %>% filter(ApplyDataManagement != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(ApplyDataManagementCat) %>% summarise(n_abs = sum(n))
- pARM = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(ApplyDataManagementCat,percent,function(x) max(x[ApplyDataManagement == 'Fully agree'],na.rm = TRUE)), y = percent,
- group=ApplyDataManagement,fill=ApplyDataManagement), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
- geom_text(aes(x=ApplyDataManagementCat, y = percent,group=ApplyDataManagement,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(data = temp_abs,aes(x=ApplyDataManagementCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Applying research data management...'),width = 100)) +
- theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
- scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q16.Applying.research.data.management.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm",
- res = 600,scaling = 0.75)
- pARM
- dev.off()
- # make When do you employ research data management tools and services in your research plot
- temp = data %>% select( 'Response ID',EmployTools) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
- temp = temp %>% filter(EmployTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- pWOT = ggplot(data=temp) +
- geom_histogram(mapping=aes(x=reorder(EmployTools,percent),y=percent),
- stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
- ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=EmployTools,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size) +
- geom_text(aes(x=EmployTools,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('When do you employ research data management tools and services in your research? ','n = ',sum(temp$n)),width = 70)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size)) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
- ragg::agg_tiff('Q17.When.do.you.employ.research.data.management.tools.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
- pWOT
- dev.off()
- # make Do you have dedicated personnel with research data management or data curation expertise plot
- temp = data %>% select( 'Response ID',DedicatedPersonal) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
- temp = temp %>% filter(DedicatedPersonal != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- pDP = ggplot(data=temp) +
- geom_histogram(mapping=aes(x=reorder(DedicatedPersonal,percent),y=percent),
- stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
- ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size) +
- geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Do you have dedicated personnel with research data management or data curation expertise? ','n = ',sum(temp$n)),width = 70)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size)) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
- ragg::agg_tiff('Q18.Do.you.have.dedicated.personnel.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
- pDP
- dev.off()
- # make How much time do you currently need to ready a dataset from your lab for publication and re-use plot
- temp = data %>% select( 'Response ID',TimeConsum) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
- temp = temp %>% filter(TimeConsum != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- pHMT = ggplot(data=temp) +
- geom_histogram(mapping=aes(x=reorder(TimeConsum,percent),y=percent),
- stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
- ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=TimeConsum,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size) +
- geom_text(aes(x=TimeConsum,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('How much time do you currently need to ready a dataset from your lab for publication and re-use? ','n = ',sum(temp$n)),width = 70)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size)) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
- ragg::agg_tiff('Q19.How.much.time.do.you.currently.need.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
- pHMT
- dev.off()
- # make a Top Sharing Data Problems plot
- # hier würde ich die nicht angegebenen werte wirklich entfernen alles andere sieht
- # dämlich aus und macht unnötig Probleme bzw ist schwer zu sehen was nun die wichtigen Dinge sind
- temp = data %>% select( 'Response ID',TopSharingProblemsCat,TopSharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
- # change level order
- temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 2')
- temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 3')
- temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 4')
- temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 5')
- temp = temp %>% filter(TopSharingProblems != 'Not answered') %>% group_by_at(vars('TopSharingProblems','TopSharingProblemsCat')) %>% summarise(n = n()) %>% droplevels() %>%
- mutate(percent = round(n / sum(n) * 100,2))
- temp_abs = temp %>% group_by(TopSharingProblems) %>% summarise(n_abs = sum(n))
- pTSP = ggplot(data=temp) +
- geom_histogram(aes(x=reorder(TopSharingProblems,percent,function(x) max(x[TopSharingProblemsCat == 'Rank 1'],na.rm = TRUE)), y = percent,
- group=TopSharingProblemsCat,fill=TopSharingProblemsCat), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
- geom_text(aes(x=TopSharingProblems, y = percent,group=TopSharingProblemsCat,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size,) +
- geom_text(data = temp_abs,aes(x=TopSharingProblems,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Please rank the top 5 most pressing issues: '),width = 100)) +
- theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size),
- legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
- scale_color_manual(values=rev(color_pal_vec[1:5])) + scale_fill_manual(values = rev(color_pal_vec[1:5])) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels
- ragg::agg_tiff('Q20.Please.rank.the.top.most.pressing.issues.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm",
- res = 600,scaling = 0.75)
- pTSP
- dev.off()
- #### Membership ####
- # more elegant
- data0 = neuro_data %>% dplyr::select( 'Response ID',
- starts_with('Are you member of'))
- comb_string_vec = c('Are you member of')
- comb_col_names = c('MemberOf')
- data = wide_to_long_as(data0,comb_string_vec,comb_col_names)
- # make a Are you member of, or otherwise involved in, other NFDI consortia or initiatives plot
- temp = data %>% select( 'Response ID',MemberOf) %>% mutate_if(is.factor,replace_factor_na) %>% unique()
- temp = temp %>% filter(MemberOf != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>%
- mutate(percent = round(n / sum(n) * 100,2))
- pMO = ggplot(data=temp) +
- geom_histogram(mapping=aes(x=reorder(MemberOf,percent),y=percent),
- stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
- ylim(0,max(temp$percent)+5) +
- geom_text(aes(x=MemberOf,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
- colour = "white", size = glob_insideBar_text_size) +
- geom_text(aes(x=MemberOf,y=percent,label = paste0('n=',n)),hjust = -0.25,
- colour = "black", size = glob_insideBar_text_size) +
- xlab('') + ylab('percent (%)') + coord_flip() +
- ggtitle(str_wrap(paste0('Are you member of, or otherwise involved in, other NFDI consortia or initiatives? ','n = ',sum(temp$n)),width = 100)) +
- theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
- axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
- axis.title = element_text(size = glob_text_size),
- plot.title = element_text(size = glob_title_text_size)) +
- scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})
- ragg::agg_tiff('Q21.Are.you.member.of.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
- pMO
- dev.off()
|