#### clean workspace ####
rm(list=ls())
# The csv File has to be in the same directory
setwd(dirname(rstudioapi::getSourceEditorContext()$path))

# clear the console
cat("\014") 

color_pal_vec = c('#070778','#816c93','#808080','#0e6966','#5a2c40','#1b1b1b','#935430','#f43661','#171511')

#### functions ####
replace_factor_na <- function(x){
  x <- as.character(x)
  x <- if_else(is.na(x), "Not answered", x)
  x <- as.factor(x)
}

wide_to_long_as <- function(data0,comb_string_vec,comb_col_names){
  # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
  library(data.table)
  for(i in seq(1,length(comb_string_vec),1)){
    data0 = data.table::melt(as.data.table(data0),
                             id= c(which(!grepl(comb_string_vec[i],colnames(data0),fixed = TRUE))),
                             measure=list(grep(comb_string_vec[i],colnames(data0),fixed = TRUE)),
                             variable.name = paste0(comb_col_names[i],'Cat'),
                             value.name = comb_col_names[i],value.factor=TRUE)
    
    # make some nicer labels
    data0 = as.data.frame(data0)
    level_strings = levels(data0[,ncol(data0)-1])
    
    # iterate over the level strings and update them
    for(s in seq(1,length(level_strings),1)){
      level_string = level_strings[s]
      temp_start = unlist(gregexpr('\\[', level_string))[1]
      temp_end = tail(unlist(gregexpr('\\]', level_string)), n=1)
      if(length(temp_end) != -1 & length(temp_start) != -1){
        level_string = substr(level_string,temp_start[1]+1,max(temp_end)-1)
      }
      level_strings[s] = level_string
    }
    # reset the labels
    levels(data0[,ncol(data0)-1]) = level_strings
  }
  data = data0
  
  return(data)
}

#### load libraries ####
library(ggplot2)
library(dplyr)
library(forcats)
library(ggpubr)
library(reshape2)
library(stringr)
library(ragg) # scaling text size for different picture sizes

#### Load the data ####
# catch NA strings
#neuro_data <- read.csv("results-survey197421_nurkomplett.csv",row.names=NULL,na.strings=c("","N/A"),sep=',')
neuro_data <- read.csv("results-survey197421_alledaten.csv",row.names=NULL,na.strings=c("","N/A"),sep=',',check.names = FALSE)
neuro_data <- neuro_data[!is.na(neuro_data$'My current (primary) position is:'),]

# Es geht um die Frage was die Leute die Daten teilen von den anderen Unterscheidet 
# und insgesamt um die Frage was es fuer Probleme gibt in unserer Dateninfrastruktur

# set up some global values
glob_insideBar_text_size = 2.75
glob_text_size = 8
glob_title_text_size = 8
glob_fig_width = 17.5
glob_dec_round = 0

#### About myself ####
# more elegant
data0 = neuro_data %>% dplyr::select('Response ID',
                                     starts_with('I work at'),
                                     starts_with('My current '),
                                     starts_with('Which neuroscience discipline(s) '),
                                     starts_with('Please state if your ')
)

comb_string_vec = c('I work at',
                    'My current ',
                    'Which neuroscience discipline(s) ',
                    'Please state if your ')
comb_col_names = c('WorkPlaces',
                   'CurrentPosition',
                   'NeuroDiscipline',
                   'FimilarDataTypes')

data = wide_to_long_as(data0,comb_string_vec,comb_col_names)

# make a WorkPlaces plot filter out the 'Other' answers
temp = data %>% select('Response ID',WorkPlaces) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>% group_by(WorkPlaces) %>% 
  filter(n() >= 3) %>% mutate(WorkPlaces = str_replace(WorkPlaces,", Helmholtz Center", "")) %>%
                       mutate(WorkPlaces = str_replace(WorkPlaces,", Fraunhofer Institute", ""))
temp = temp[1:218,] # remove [Other] answers
temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2))

pWP = ggplot(data=temp) + 
  geom_histogram(mapping=aes(x=reorder(WorkPlaces,percent),y=percent),
                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=WorkPlaces,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size) +
  geom_text(aes(x=WorkPlaces,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + ggtitle(paste0('I work at / I am affiliated with: ','n = ',sum(temp$n))) + coord_flip() +
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size)) +
  scale_x_discrete(labels=function(x){str_wrap(x,width = 30)})

ragg::agg_tiff('Q1.I.work.at.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
pWP
dev.off()

# make a Current Position plot and replace NaN with 'None'
temp = data %>% select('Response ID',CurrentPosition) %>% unique() %>% mutate_if(is.factor, replace_factor_na) %>%
  filter(CurrentPosition != 'child neurologist') %>%
  filter(CurrentPosition != 'coodinator ') %>%
  filter(CurrentPosition != 'Clinician') %>%
  filter(CurrentPosition != 'Doctor') %>%
  filter(CurrentPosition != 'Emeritus Professor') %>%
  filter(CurrentPosition != 'medical doctor') %>%
  filter(CurrentPosition != 'Oberarzt') %>%
  filter(CurrentPosition != 'Therapist')
temp = temp[1:218,] # remove [Other] answers
temp = temp %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% mutate(percent = round(n / sum(n) * 100,2))

pCP = ggplot(data=temp) + 
  geom_histogram(mapping=aes(x=reorder(CurrentPosition,percent),y = percent),
                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=CurrentPosition,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size) +
  geom_text(aes(x=CurrentPosition,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) + 
  xlab('') + ylab('percent (%)') + ggtitle(paste0('My current (primary) position is: ','n = ',sum(temp$n))) + coord_flip() +
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size)) +
  scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q2.My.current.position.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
pCP
dev.off()

# make a Neuro Discipline plot
# Neuro Discipline questions are Yes/No questions ==> just need the ones who answered with Yes
temp = data %>% select('Response ID',NeuroDisciplineCat,NeuroDiscipline) %>% 
  unique() %>% mutate_if(is.factor, replace_factor_na)

# change other colum to Yes/No column
idx = grepl('Other',temp$NeuroDisciplineCat) & !grepl('Not answered',temp$NeuroDiscipline)
temp$NeuroDiscipline[idx] = c('Yes')
idx = grepl('Other',temp$NeuroDisciplineCat) & grepl('Not answered',temp$NeuroDiscipline)
temp$NeuroDiscipline[idx] = c('No')

temp = temp %>% filter(NeuroDiscipline != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
       mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(NeuroDisciplineCat) %>% summarise(n_abs = sum(n))
temp = temp %>% filter(NeuroDiscipline == 'Yes')

pND = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(NeuroDisciplineCat,percent,function(x) max(x[NeuroDiscipline == 'Yes'],na.rm = TRUE)), y = percent,
                     group=NeuroDiscipline,fill=NeuroDiscipline), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=NeuroDisciplineCat, y = percent,group=NeuroDiscipline,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(aes(x=NeuroDisciplineCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('Which neuroscience discipline(s) describe(s) your work or research best? ','n = ',unique(temp_abs$n_abs)),width = 70)) +
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) +
  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q3.Which.neuroscience.discipline.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
pND
dev.off()


# make a Fimiliar Data Types plot
# Fimiliar Data Types are Yes/No questions ==> just need the ones who answered with Yes ==> or?
temp = data %>% select('Response ID',FimilarDataTypesCat,FimilarDataTypes)  %>% unique() %>% mutate_if(is.factor, replace_factor_na)

# change other colum to Yes/No column
idx = grepl('Other',temp$FimilarDataTypesCat) & !grepl('Not answered',temp$FimilarDataTypes)
temp$FimilarDataTypes[idx] = c('Yes')
idx = grepl('Other',temp$FimilarDataTypesCat) & grepl('Not answered',temp$FimilarDataTypes)
temp$FimilarDataTypes[idx] = c('No')

temp = temp %>% filter(FimilarDataTypes != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(FimilarDataTypesCat) %>% summarise(n_abs = sum(n))
temp = temp %>% filter(FimilarDataTypes == 'Yes')

pFD = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(FimilarDataTypesCat,percent,function(x) max(x[FimilarDataTypes == 'Yes'],na.rm = TRUE)), y = percent,
                     group=FimilarDataTypes,fill=FimilarDataTypes), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=FimilarDataTypesCat, y = percent,group=FimilarDataTypes,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(aes(x=FimilarDataTypesCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('Please state if your work includes one or several of the following recording methods or data types: ','n = ',unique(temp_abs$n_abs)),width = 100)) +
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) +
  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q4.Please.state.if.your.work.includes.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
pFD
dev.off()


#### Tools ####
# question Q6 For which tasks would you wish to have a tool or standard? is not considered do to the large number of different answer posibilities

# more elegant
data0 = neuro_data %>% dplyr::select('Response ID',
                                     starts_with('For which of these tasks '),
                                     starts_with('To what degree do you ')
)

comb_string_vec = c('For which of these tasks ',
                    'To what degree do you ')

comb_col_names = c('TaskStandardTools',
                   'TaskStandardToolsDegree')

data = wide_to_long_as(data0,comb_string_vec,comb_col_names)

# make a Task Standard Tools plot
# Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
# remove Comment columes
temp = data %>% select('Response ID',TaskStandardToolsCat,TaskStandardTools) %>% filter(!grepl('Comment',TaskStandardToolsCat)) %>% 
  mutate_if(is.factor, replace_factor_na) %>% unique() %>% droplevels()

temp = temp %>% filter(TaskStandardTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(TaskStandardToolsCat) %>% summarise(n_abs = sum(n))
temp = temp %>% filter(TaskStandardTools == 'Yes')

pTST = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(TaskStandardToolsCat,percent,function(x) max(x[TaskStandardTools == 'Yes'],na.rm = TRUE)), y = percent,
                     group=TaskStandardTools,fill=TaskStandardTools), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=TaskStandardToolsCat, y = percent,group=TaskStandardTools,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(aes(x=TaskStandardToolsCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('For which of these tasks do you use available tools or standards? ','n = ',unique(temp_abs$n_abs)),width = 100)) +
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) +
  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q5.For.which.of.these.tasks.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp)+4, units = "cm", res = 600)
pTST
dev.off()

# make a Task Standard Tools Degree plot 
temp = data %>% select( 'Response ID',TaskStandardToolsDegreeCat,TaskStandardToolsDegree) %>% mutate_if(is.factor, replace_factor_na) %>% unique()

# change some long level names
temp$TaskStandardToolsDegree = recode_factor(temp$TaskStandardToolsDegree, 'Not at all - I use my own costum solutions' = 'Use my own solutions', 
                                             'This is not relevant for my scientific work' = 'Not relevant for my work')

# change level order
levels(temp$TaskStandardToolsDegree)
temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'As much as possible')
temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Mostly')
temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Occasionally')
temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Use my own solutions')
temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not relevant for my work')
temp$TaskStandardToolsDegree = relevel(temp$TaskStandardToolsDegree,'Not answered')

temp = temp %>% filter(TaskStandardToolsDegree != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(TaskStandardToolsDegreeCat) %>% summarise(n_abs = sum(n))

pTSD = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(TaskStandardToolsDegreeCat,percent,function(x) max(x[TaskStandardToolsDegree == 'As much as possible'],na.rm = TRUE)), y = percent,
                     group=TaskStandardToolsDegree,fill=TaskStandardToolsDegree), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  geom_text(aes(x=TaskStandardToolsDegreeCat, y = percent,group=TaskStandardToolsDegree,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(data = temp_abs,aes(x=TaskStandardToolsDegreeCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('To what degree do you use available tools or standards?'),width = 100)) +
  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 20)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q7.To.what.degree.do.you.use.available.tools.or.standards.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,units = "cm",
              res = 600,scaling=0.75)
pTSD
dev.off()

#### sharing and reusing data ####
# recreate different datasets
# more elegant
data0 = neuro_data %>% dplyr::select( 'Response ID',
                                      starts_with('Have you shared data with'),
                                      starts_with('Do you have existing data'),
                                      starts_with('Think of re-using data'),
                                      starts_with('Think of data sharing')
)

comb_string_vec = c('Have you shared data with',
                    'Do you have existing data',
                    'Think of re-using data',
                    'Think of data sharing')
comb_col_names = c('SharingData',
                   'ExistingData',
                   'ThinkReusingData',
                   'ThinkSharingData')

data = wide_to_long_as(data0,comb_string_vec,comb_col_names)

# Data sharing plot
temp = data %>% select( 'Response ID',SharingDataCat,SharingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()

temp = temp %>% filter(SharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(SharingDataCat) %>% summarise(n_abs = sum(n))
temp = temp %>% filter(SharingData == 'Yes')

pSD = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(SharingDataCat,percent,function(x) max(x[SharingData == 'Yes'],na.rm = TRUE)), y = percent,
                     group=SharingData,fill=SharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=SharingDataCat, y = percent,group=SharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(aes(x=SharingDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('Have you shared data with... ','n = ',unique(temp_abs$n_abs)),width = 60)) +
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) +
  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q8.Have.you.shared.data.with.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
pSD
dev.off()

# Existing Data plot
temp = data %>% select( 'Response ID',ExistingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()

temp = temp %>% filter(ExistingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 

pED = ggplot(data=temp) + 
  geom_histogram(mapping=aes(x=reorder(ExistingData,percent),y=percent),
                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=ExistingData,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size) +
  geom_text(aes(x=ExistingData,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() +
  ggtitle(str_wrap(paste0('Do you have existing data sets (experiments) that should be kept alive by making them available for reuse? ','n = ',sum(temp$n)),width = 70)) +
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size)) +
  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)})

ragg::agg_tiff('Q9.Do.you.have.existing.data.sets.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
pED
dev.off()

# make a Think of Reusing Data plot 
temp = data %>% select( 'Response ID',ThinkReusingDataCat,ThinkReusingData) %>% mutate_if(is.factor, replace_factor_na) %>% unique()

# change level order
temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Undecided')
temp$ThinkReusingData = relevel(temp$ThinkReusingData,'No')
temp$ThinkReusingData = relevel(temp$ThinkReusingData,'Not answered')

temp = temp %>% filter(ThinkReusingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(ThinkReusingDataCat) %>% summarise(n_abs = sum(n))

pTRD = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(ThinkReusingDataCat,percent,function(x) max(x[ThinkReusingData == 'Yes'],na.rm = TRUE)), y = percent,
                     group=ThinkReusingData,fill=ThinkReusingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  geom_text(aes(x=ThinkReusingDataCat, y = percent,group=ThinkReusingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(data = temp_abs,aes(x=ThinkReusingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('Think of re-using data from repositories.'),width = 100)) +
  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
  scale_color_manual(values=rev(color_pal_vec[1:3]))  + scale_fill_manual(values = rev(color_pal_vec[1:3])) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q10.Think.of.re.using.data.from.repositories.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", 
              res = 600,scaling = 0.75)
pTRD
dev.off()

# make a Think of Sharing Data plot 
temp = data %>% select( 'Response ID',ThinkSharingDataCat,ThinkSharingData) %>% mutate_if(is.factor, replace_factor_na) %>%  unique()

# change level order
temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Undecided')
temp$ThinkSharingData = relevel(temp$ThinkSharingData,'No')
temp$ThinkSharingData = relevel(temp$ThinkSharingData,'Not answered')

temp = temp %>% filter(ThinkSharingData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(ThinkSharingDataCat) %>% summarise(n_abs = sum(n))

pTSD = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(ThinkSharingDataCat,percent,function(x) max(x[ThinkSharingData == 'Yes'],na.rm = TRUE)), y = percent,
                     group=ThinkSharingData,fill=ThinkSharingData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  geom_text(aes(x=ThinkSharingDataCat, y = percent,group=ThinkSharingData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(data = temp_abs,aes(x=ThinkSharingDataCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('Think of data sharing with researchers who are NOT direct collaborators.'),width = 100)) +
  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
  scale_color_manual(values=rev(color_pal_vec[1:3]))  + scale_fill_manual(values = rev(color_pal_vec[1:3])) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)})


ragg::agg_tiff('Q11.Think.of.sharing.with.researchers.who.are.NOT.direct.collaborators.tiff', width = glob_fig_width, height = nrow(temp_abs)+2,
              units = "cm", res = 600,scaling = 0.75)
pTSD
dev.off()

#### Data repositories and analyses ####
# more elegant
data0 = neuro_data %>% dplyr::select( 'Response ID',
                                      starts_with('Please indicate:'),
                                      starts_with('How do you process and analyze your data')
)

comb_string_vec = c('Please indicate:',
                    'How do you process and analyze your data')
comb_col_names = c('SharingProblems',
                   'HowAnalyzeData')

data = wide_to_long_as(data0,comb_string_vec,comb_col_names)

# make a Sharing Data Problems plot 
temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>%
  filter(as.numeric(SharingProblemsCat) != 6 & as.numeric(SharingProblemsCat) != 7) %>% droplevels() 

# change level order
levels(temp$SharingProblemsCat)
temp$SharingProblems = relevel(temp$SharingProblems,'Rather agree')
temp$SharingProblems = relevel(temp$SharingProblems,'Undecided')
temp$SharingProblems = relevel(temp$SharingProblems,'Rather disagree')
temp$SharingProblems = relevel(temp$SharingProblems,'Fully disagree')
temp$SharingProblems = relevel(temp$SharingProblems,'Not answered')

temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n))

pSP = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Fully agree'],na.rm = TRUE)), y = percent,
                     group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('Sharing problems. Please indicate: '),width = 100)) +
  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q12.Sharing.problems.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", res = 600,scaling =0.75)
pSP
dev.off()

# make a How to exercise plot 
temp = data %>% select( 'Response ID',HowAnalyzeDataCat,HowAnalyzeData) %>% mutate_if(is.factor,replace_factor_na) %>% unique()

# change the specific answers to yes
idx = grepl('Other',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData)
temp$HowAnalyzeData[idx] = c('Yes')
tempID = temp$'Response ID'[grepl('Manual inspection and analysis',temp$HowAnalyzeDataCat) & !grepl('Not answered',temp$HowAnalyzeData)]
idx = grepl('Other',temp$HowAnalyzeDataCat) & grepl('Not answered',temp$HowAnalyzeData) & (temp$'Response ID' %in% tempID)
temp$HowAnalyzeData[idx] = c('No')

temp = temp %>% filter(HowAnalyzeData != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(HowAnalyzeDataCat) %>% summarise(n_abs = sum(n))
temp = temp %>% filter(HowAnalyzeData == 'Yes')

pHAD = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(HowAnalyzeDataCat,percent,function(x) max(x[HowAnalyzeData == 'Yes'],na.rm = TRUE)), y = percent,
                     group=HowAnalyzeData,fill=HowAnalyzeData), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=HowAnalyzeDataCat, y = percent,group=HowAnalyzeData,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(aes(x=HowAnalyzeDataCat,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('How do you process and analyze your data? ','n = ',unique(temp_abs$n_abs)),width = 60)) +
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) +
  scale_color_manual(values=color_pal_vec)  + scale_fill_manual(values = color_pal_vec) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q13.How.do.you.process.and.analyze.your.data.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
pHAD
dev.off()

# make publishing plot
temp = data %>% select( 'Response ID',SharingProblemsCat,SharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>% unique() %>% 
  filter(as.numeric(SharingProblemsCat) == 6 | as.numeric(SharingProblemsCat) == 7)

# change level order
temp$SharingProblems = relevel(temp$SharingProblems,'Undecided')
temp$SharingProblems = relevel(temp$SharingProblems,'No')
temp$SharingProblems = relevel(temp$SharingProblems,'Not answered')

temp = temp %>% filter(SharingProblems != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(SharingProblemsCat) %>% summarise(n_abs = sum(n))

pSO = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(SharingProblemsCat,percent,function(x) max(x[SharingProblems == 'Yes'],na.rm = TRUE)), y = percent,
                     group=SharingProblems,fill=SharingProblems), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  geom_text(aes(x=SharingProblemsCat, y = percent,group=SharingProblems,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(data = temp_abs,aes(x=SharingProblemsCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('Sharing opinions. Please indicate: ','n = ',unique(temp_abs$n_abs)),width = 60)) +
  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE)) +
  scale_color_manual(values=rev(color_pal_vec[1:3]))  + scale_fill_manual(values = rev(color_pal_vec[1:3])) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q14.Sharing.opinions.please.indicate.tiff', width = glob_fig_width, height = nrow(temp_abs)+3.5, units = "cm", res = 600)
pSO
dev.off()

#### Research data management in general ####
# more elegant
data0 = neuro_data %>% dplyr::select( 'Response ID',
                                      starts_with('What is your opinion'),
                                      starts_with('Applying research data management'),
                                      starts_with('When do you employ'),
                                      starts_with('Do you have dedicated'),
                                      starts_with('How much time do you currently'),
                                      starts_with('Please rank the top'))

comb_string_vec = c('What is your opinion',
                    'Applying research data management',
                    'When do you employ',
                    'Do you have dedicated',
                    'How much time do you currently',
                    'Please rank the top')
comb_col_names = c('StatementsOpinion',
                   'ApplyDataManagement',
                   'EmployTools',
                   'DedicatedPersonal',
                   'TimeConsum',
                   'TopSharingProblems')

data = wide_to_long_as(data0,comb_string_vec,comb_col_names)

# make a What is your opinion plot 
temp = data %>% select( 'Response ID',StatementsOpinionCat,StatementsOpinion) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()

# change level order
levels(droplevels(temp$StatementsOpinion))
temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather agree')
temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Undecided')
temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Rather disagree')
temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Fully disagree')
temp$StatementsOpinion = relevel(temp$StatementsOpinion,'Not answered')

temp = temp %>% filter(StatementsOpinion != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(StatementsOpinionCat) %>% summarise(n_abs = sum(n))

pOS = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(StatementsOpinionCat,percent,function(x) max(x[StatementsOpinion == 'Fully agree'],na.rm = TRUE)), y = percent,
                     group=StatementsOpinion,fill=StatementsOpinion), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  geom_text(aes(x=StatementsOpinionCat, y = percent,group=StatementsOpinion,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(data = temp_abs,aes(x=StatementsOpinionCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('What is your opinion on the following statements?'),width = 100)) +
  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 35)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q15.What.is.your.opinion.on.the.following.statements.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, 
              units = "cm", res = 600,scaling = 0.75)
pOS
dev.off()

# make a Apply Data Management plot 
temp = data %>% select( 'Response ID',ApplyDataManagementCat,ApplyDataManagement) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()

# change level order
temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather agree')
temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Undecided')
temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Rather disagree')
temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Fully disagree')
temp$ApplyDataManagement = relevel(temp$ApplyDataManagement,'Not answered')

temp = temp %>% filter(ApplyDataManagement != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(ApplyDataManagementCat) %>% summarise(n_abs = sum(n))

pARM = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(ApplyDataManagementCat,percent,function(x) max(x[ApplyDataManagement == 'Fully agree'],na.rm = TRUE)), y = percent,
                     group=ApplyDataManagement,fill=ApplyDataManagement), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  geom_text(aes(x=ApplyDataManagementCat, y = percent,group=ApplyDataManagement,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(data = temp_abs,aes(x=ApplyDataManagementCat,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('Applying research data management...'),width = 100)) +
  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 30)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q16.Applying.research.data.management.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm",
              res = 600,scaling = 0.75)
pARM
dev.off()

# make When do you employ research data management tools and services in your research plot 
temp = data %>% select( 'Response ID',EmployTools) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()

temp = temp %>% filter(EmployTools != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2))

pWOT = ggplot(data=temp) + 
  geom_histogram(mapping=aes(x=reorder(EmployTools,percent),y=percent),
                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=EmployTools,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size) +
  geom_text(aes(x=EmployTools,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() +
  ggtitle(str_wrap(paste0('When do you employ research data management tools and services in your research? ','n = ',sum(temp$n)),width = 70)) + 
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size)) +
  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})

ragg::agg_tiff('Q17.When.do.you.employ.research.data.management.tools.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
pWOT
dev.off()

# make Do you have dedicated personnel with research data management or data curation expertise plot 
temp = data %>% select( 'Response ID',DedicatedPersonal) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()

temp = temp %>% filter(DedicatedPersonal != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2))

pDP = ggplot(data=temp) + 
  geom_histogram(mapping=aes(x=reorder(DedicatedPersonal,percent),y=percent),
                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size) +
  geom_text(aes(x=DedicatedPersonal,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() +
  ggtitle(str_wrap(paste0('Do you have dedicated personnel with research data management or data curation expertise? ','n = ',sum(temp$n)),width = 70)) + 
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size)) +
  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})

ragg::agg_tiff('Q18.Do.you.have.dedicated.personnel.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
pDP
dev.off()

# make How much time do you currently need to ready a dataset from your lab for publication and re-use plot 
temp = data %>% select( 'Response ID',TimeConsum) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()

temp = temp %>% filter(TimeConsum != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2))

pHMT = ggplot(data=temp) + 
  geom_histogram(mapping=aes(x=reorder(TimeConsum,percent),y=percent),
                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=TimeConsum,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size) +
  geom_text(aes(x=TimeConsum,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() +
  ggtitle(str_wrap(paste0('How much time do you currently need to ready a dataset from your lab for publication and re-use? ','n = ',sum(temp$n)),width = 70)) + 
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size)) +
  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})

ragg::agg_tiff('Q19.How.much.time.do.you.currently.need.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
pHMT
dev.off()


# make a Top Sharing Data Problems plot
# hier würde ich die nicht angegebenen werte wirklich entfernen alles andere sieht
# dämlich aus und macht unnötig Probleme bzw ist schwer zu sehen was nun die wichtigen Dinge sind
temp = data %>% select( 'Response ID',TopSharingProblemsCat,TopSharingProblems) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()

# change level order
temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 2')
temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 3')
temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 4')
temp$TopSharingProblemsCat = relevel(temp$TopSharingProblemsCat,'Rank 5')

temp = temp %>% filter(TopSharingProblems != 'Not answered') %>% group_by_at(vars('TopSharingProblems','TopSharingProblemsCat')) %>% summarise(n = n()) %>% droplevels() %>%
  mutate(percent = round(n / sum(n) * 100,2)) 
temp_abs = temp %>% group_by(TopSharingProblems) %>% summarise(n_abs = sum(n))

pTSP = ggplot(data=temp) + 
  geom_histogram(aes(x=reorder(TopSharingProblems,percent,function(x) max(x[TopSharingProblemsCat == 'Rank 1'],na.rm = TRUE)), y = percent,
                     group=TopSharingProblemsCat,fill=TopSharingProblemsCat), stat = 'identity',width = 0.75,position = 'stack') + ylim(0,105) +
  geom_text(aes(x=TopSharingProblems, y = percent,group=TopSharingProblemsCat,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size,) +
  geom_text(data = temp_abs,aes(x=TopSharingProblems,y=100,label = paste0('n=',n_abs)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() + 
  ggtitle(str_wrap(paste0('Please rank the top 5 most pressing issues: '),width = 100)) +
  theme(legend.position = "bottom", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size),
        legend.text=element_text(size=glob_text_size)) + guides(fill = guide_legend(reverse = TRUE,nrow = 1, byrow = TRUE)) +
  scale_color_manual(values=rev(color_pal_vec[1:5]))  + scale_fill_manual(values = rev(color_pal_vec[1:5])) + 
  scale_x_discrete(labels=function(x){str_wrap(x,width = 40)}) # nice regular expression solution for multiple lined labels

ragg::agg_tiff('Q20.Please.rank.the.top.most.pressing.issues.tiff', width = glob_fig_width, height = nrow(temp_abs)+2, units = "cm", 
              res = 600,scaling = 0.75)
pTSP
dev.off()

#### Membership ####
# more elegant
data0 = neuro_data %>% dplyr::select( 'Response ID',
                                      starts_with('Are you member of'))

comb_string_vec = c('Are you member of')
comb_col_names = c('MemberOf')

data = wide_to_long_as(data0,comb_string_vec,comb_col_names)

# make a Are you member of, or otherwise involved in, other NFDI consortia or initiatives plot 
temp = data %>% select( 'Response ID',MemberOf) %>% mutate_if(is.factor,replace_factor_na) %>%  unique()

temp = temp %>% filter(MemberOf != 'Not answered') %>% group_by_at(vars(-'Response ID')) %>% summarise(n = n()) %>% 
  mutate(percent = round(n / sum(n) * 100,2))

pMO = ggplot(data=temp) + 
  geom_histogram(mapping=aes(x=reorder(MemberOf,percent),y=percent),
                 stat = 'identity',width = 0.75,colour = color_pal_vec[1], fill=color_pal_vec[1]) +
  ylim(0,max(temp$percent)+5) +
  geom_text(aes(x=MemberOf,y=percent,label = paste0(round(percent,glob_dec_round),'%')),position=position_stack(vjust=0.5),
            colour = "white", size = glob_insideBar_text_size) +
  geom_text(aes(x=MemberOf,y=percent,label = paste0('n=',n)),hjust = -0.25,
            colour = "black", size = glob_insideBar_text_size) +
  xlab('') + ylab('percent (%)') + coord_flip() +
  ggtitle(str_wrap(paste0('Are you member of, or otherwise involved in, other NFDI consortia or initiatives? ','n = ',sum(temp$n)),width = 100)) + 
  theme(legend.position = "none", legend.box = "horizontel",legend.title = element_blank(),
        axis.text = element_text(angle = 0, vjust = 0.5, hjust=0.5,size = glob_text_size),
        axis.title = element_text(size = glob_text_size),
        plot.title = element_text(size = glob_title_text_size)) +
  scale_x_discrete(labels=function(x){str_wrap(x,width = 25)})

ragg::agg_tiff('Q21.Are.you.member.of.tiff', width = glob_fig_width, height = nrow(temp)+2, units = "cm", res = 600)
pMO
dev.off()