create_Picture4.R 133 KB


  1. # clean workspace
  2. rm(list=ls())
  3. # The csv File has to be in the same directory
  4. setwd(dirname(rstudioapi::getSourceEditorContext()$path))
  5. # clear the console
  6. cat("\014")
  7. # load libraries
  8. library(ggplot2)
  9. library(dplyr)
  10. library(ggpubr)
  11. library(ragg)
  12. library(reshape2)
  13. library(data.table)
  14. library(stringr)
  15. library(cowplot)
  16. library(ggrepel)
  17. library(forcats)
  18. library(yarrr)
  19. # Load the data
  20. # catch NA strings
  21. #neuro_data <- read.csv("results-survey197421_nurkomplett.csv",row.names=NULL,na.strings=c("","N/A"),sep=',')
  22. neuro_data2 <- read.csv("results-survey197421_alledaten.csv",row.names=NULL,na.strings=c("","N/A"),sep=',')
  23. neuro_data <- neuro_data2[!is.na(neuro_data2$My.current..primary..position.is.),]
  24. # Es geht um die Frage was die Leute die Daten teilen von den anderen Unterscheidet
  25. # und insgesamt um die Frage was es fuer Probleme gibt in unserer Dateninfrastruktur
  26. colnames(neuro_data)[1]<-"Response.ID"
  27. # remove one outlier ==> empty row
  28. neuro_data = neuro_data[neuro_data$Response.ID != 78,]
  29. # remove whitespaces and commas
  30. colnames(neuro_data) = str_replace_all(colnames(neuro_data)," ",".")
  31. colnames(neuro_data) = str_replace_all(colnames(neuro_data),",",".")
  32. #######################################################
  33. #### Figure 1 #########################################
  34. ###############
  35. #### Neuro Disciplines + Current Position #############
  36. #######################################################
  37. data0 = neuro_data %>% dplyr::select(Response.ID,
  38. starts_with('Have.you.shared.data.with'),
  39. starts_with('I.work.at'),
  40. starts_with('My.current..'),
  41. starts_with('Which.neuroscience.discipline.s.'),
  42. starts_with('Please.state.if.your.')
  43. )
  44. comb_string_vec = c('I.work.at',
  45. 'My.current..',
  46. 'Which.neuroscience.discipline.s.',
  47. 'Please.state.if.your.',
  48. 'Have.you.shared.data.with')
  49. comb_col_names = c('WorkPlaces',
  50. 'CurrentPosition',
  51. 'NeuroDiscipline',
  52. 'FimilarDataTypes',
  53. 'DataSharing')
  54. # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
  55. library(data.table)
  56. for(i in seq(1,length(comb_string_vec),1)){
  57. data0 = data.table::melt(as.data.table(data0),
  58. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  59. measure=list(grep(comb_string_vec[i],colnames(data0))),
  60. variable.name = paste0(comb_col_names[i],'Cat'),
  61. value.name = comb_col_names[i],value.factor=TRUE)
  62. # make some nicer labels
  63. data0 = as.data.frame(data0)
  64. level_strings = levels(data0[,ncol(data0)-1])
  65. # iterate over the level strings and update them
  66. for(s in seq(1,length(level_strings),1)){
  67. level_string = level_strings[s]
  68. temp = str_locate(level_string, '\\.\\.\\.')
  69. level_string = substr(level_string,temp[2],nchar(level_string))
  70. level_string = gsub('\\.|\\.\\.',' ',level_string)
  71. level_string = gsub('e g','e.g.',level_string)
  72. level_strings[s] = level_string
  73. }
  74. # reset the labels
  75. levels(data0[,ncol(data0)-1]) = level_strings
  76. }
  77. data = data0
  78. ################################
  79. # make a Current Position plot
  80. temp = data %>% select(Response.ID,CurrentPosition) %>% na.omit() %>% unique() %>% group_by(CurrentPosition) %>% filter(n() >= 3)
  81. # calc relative frequency to make more own plots
  82. temp_relFreq <- 0
  83. temp_relFreq = temp %>%
  84. group_by_at(vars(-Response.ID)) %>%
  85. summarise(n = n()) %>%
  86. mutate(percent = round(n / sum(n)*100,0)) %>%
  87. mutate(ordering = c(1,7,5,6,3,2,4)) %>%
  88. arrange(ordering) %>%
  89. mutate(CurrentPosition = str_replace(CurrentPosition,"Research data management focused staff", "RDM staff"))
  90. #mutate(CurrentPosition = fct_reorder(temp_relFreq$CurrentPosition, temp_relFreq$ordering, min))
  91. # function to wrap around long labels
  92. my_label_func = function(x){gsub('(.{1,20})(\\s|$)', '\\1\n', x)}
  93. # get the label positions and create the inside plot labels
  94. df_label1 <- temp_relFreq %>%
  95. arrange(desc(CurrentPosition)) %>%
  96. mutate(lab_ypos = cumsum(percent) - 0.5*percent,
  97. lab_label = my_label_func(
  98. paste0(CurrentPosition,': ',percent,'%'))) %>%
  99. filter(n > 10)
  100. df_label2 <- temp_relFreq %>%
  101. arrange(desc(CurrentPosition)) %>%
  102. mutate(lab_ypos = cumsum(percent) - 0.5*percent,
  103. lab_label = my_label_func(paste0(CurrentPosition,': ',percent, '%'))) %>% filter(n <= 10)
  104. mycol <- yarrr::piratepal("xmen", plot.result = TRUE, trans = .1)
  105. ## No 'x' mapping; bars of constant width; polar coordinates with theta
  106. ## applied to the Y axis
  107. F1B = ggplot(data=temp_relFreq, aes(x=factor(1),fill = CurrentPosition)) +
  108. geom_bar(width = 1) +
  109. coord_polar(theta = "y") +
  110. scale_fill_brewer(palette = "Accent")
  111. F1B
  112. df = temp_relFreq
  113. p <- ggplot(df, aes(x=1, y=percent, fill=CurrentPosition)) +
  114. #geom_histogram(stats = "identity") +
  115. geom_bar(stat="identity") +
  116. ggtitle(paste0('Current Position (n = ', sum(df$n))) +
  117. coord_polar(theta='y') +
  118. scale_fill_brewer(palette = "Set3")
  119. print(p)
  120. p <- p + geom_bar(stat="identity", color='black')
  121. p <- p +
  122. theme(axis.ticks=element_blank(), # the axis ticks
  123. axis.title=element_blank(), # the axis labels
  124. axis.text=element_blank(),
  125. panel.grid = element_blank())
  126. print(p)
  127. p<-p +
  128. xlab('')+
  129. ylab('')
  130. print(p)
  131. F1B = ggplot(data=temp_relFreq) +
  132. #geom_histogram(mapping=aes(x=factor(1),y=percent,fill=CurrentPosition),
  133. #geom_histogram(mapping=aes(x=factor(1),y=percent,fill=CurrentPosition),
  134. geom_histogram(mapping=aes(x=factor(1),y=percent,fill=CurrentPosition),
  135. #geom_histogram(mapping=aes(x=reorder(CurrentPosition,ordering),y=percent,fill=CurrentPosition),
  136. stat = 'identity',
  137. width = 1) +
  138. #coord_polar(theta = "y",start=0, clip = "on") +
  139. coord_polar(theta = "y") +
  140. #scale_x_continuous(limits = c(0,360)) +
  141. xlab('') + ylab('') + ggtitle(paste0('Current Position (n = ',sum(temp_relFreq$percent),'%)')) +
  142. theme(text = element_text(size=7), axis.ticks = element_blank(),axis.text = element_blank(),panel.grid = element_blank(),legend.position = "none") +
  143. geom_text_repel(data=df_label1,aes(x=factor(1),y = lab_ypos, label = lab_label), colour = "black", box.padding = 0.5,point.size = NA,nudge_x = 0, size = 5.8/.pt) +
  144. geom_text_repel(data=df_label2,aes(x=factor(1),y = lab_ypos, label = lab_label), colour = "black", box.padding = 0.5,point.size = NA,nudge_x = 1, size = 5.8/.pt) +
  145. scale_fill_brewer(palette = "Set3") #mycol) #Set3
  146. F1B
  147. #F1B <- p
  148. sprintf("Absolute und relative Häufigkeiten der %s", "Current Position")
  149. print(temp_relFreq, quote = TRUE, row.names = FALSE)
  150. #png('CurrentPosition.png', width = 30, height = 20, units = "cm", res = 300)
  151. ragg::agg_tiff("Fig1B.tiff", width = 8, height = 8, units = "cm", res = 600, scaling = 1)
  152. F1B
  153. dev.off()
  154. # make a Neuro Discipline plot
  155. # Neuro Discipline questions are Yes/No questions ==> just need the ones who answered with Yes
  156. temp = data %>%
  157. select(Response.ID,NeuroDisciplineCat,NeuroDiscipline) %>%
  158. filter(NeuroDiscipline != 'No') %>%
  159. na.omit() %>%
  160. unique() %>%
  161. group_by(NeuroDiscipline) %>%
  162. filter(n() >= 3) %>%
  163. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"imaging", "Imaging")) %>%
  164. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Theoretical", "Theoret.")) %>%
  165. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Theoretical", "Theoret.")) %>%
  166. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"behavioral", "Behav.")) %>%
  167. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"neuroscience", "Neurosci.")) %>%
  168. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Neuroscience", "Neurosci.")) %>%
  169. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. electrophysiological recording behavior tracking ", "")) %>%
  170. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. patient involvement clinical trials ", "")) %>%
  171. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"science", "Science")) %>%
  172. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. modeling simulation ", ""))
  173. # calc abs numbers to make more own plots
  174. temp_absNumbers = temp %>%
  175. group_by_at(vars(-Response.ID)) %>%
  176. summarise(n = n()) %>%
  177. mutate(percent = round(n / sum(temp_relFreq$n)*100)) %>%
  178. arrange(desc(percent))
  179. #temp_relFreq2 = temp %>%
  180. # group_by_at(vars(-Response.ID, -NeuroDiscipline)) %>%
  181. # summarise(n = n()) %>%
  182. # mutate(percent = n / 218) %>%
  183. # arrange(desc(percent))
  184. #print(temp_relFreq2, quote = TRUE, row.names = FALSE)
  185. F1A = ggplot(data=temp_absNumbers) +
  186. geom_histogram(mapping=aes(x=reorder(NeuroDisciplineCat,percent),y=percent),
  187. colour = 'darkblue', fill='darkblue',
  188. stat = 'identity',
  189. width = 0.5) +
  190. coord_flip() +
  191. #xlab('') + ylab('') + ggtitle(paste0('Neuro Disciplines \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
  192. xlab('') + ylab('') + ggtitle(paste0('Neuro Disciplines (n = ',sum(temp_absNumbers$n), ')')) +
  193. #lab('') + ylab('') + ggtitle(paste0('Neuro Disciplines \n(n = ',sum(temp_absNumbers$n),)) +
  194. geom_text(aes(x=NeuroDisciplineCat,y = percent, label = paste0(" ", percent,'%')), colour = "white",hjust=1.2, size = 8/.pt) +
  195. scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20)) +
  196. #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  197. theme(text = element_text(size=8))
  198. F1A
  199. ragg::agg_tiff("Fig1A.tiff", width = 8, height = 8, units = "cm", res = 600, scaling = 1)
  200. F1A
  201. #plt
  202. dev.off()
  203. # plot both graphs into one figure
  204. F1<-plot_grid(F1A,F1B,nrow = 1,ncol = 2,align = "h",axis = "lr",scale = 1,rel_widths = c(1,1))
  205. F1
  206. ragg::agg_tiff("Fig1.tiff", width = 17.5, height = 8, units = "cm", res = 600, scaling = 1)
  207. #tiff('Fig1_CurrentntPosition_AND_NeuroDiscipline.tiff', width = 30, height = 20, units = "cm", res = 300)
  208. #png('CurrentntPositionNeuroDiszipline.png', width = 30, height = 20, units = "cm", res = 300)
  209. F1
  210. dev.off()
  211. ####################################################
  212. #### Figure 2 ######################################
  213. ###############
  214. #### General Data Sharing ##########################
  215. data0 = neuro_data %>% dplyr::select(Response.ID,
  216. I.work.at...I.am.affiliated.with.,
  217. My.current..primary..position.is.,
  218. starts_with('Have.you.shared.data.with'),
  219. starts_with('Do.you.have.existing.data.sets.'),
  220. starts_with('Which.neuroscience.discipline.s.'),
  221. starts_with('Please.state.if.your.')
  222. )
  223. colnames(data0)[which(names(data0) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
  224. colnames(data0)[which(names(data0) == 'I.work.at...I.am.affiliated.with.')] <- "Work_Place"
  225. colnames(data0)[which(names(data0) == 'My.current..primary..position.is.')] <- 'CurrentPosition'
  226. comb_string_vec = c('Which.neuroscience.discipline.s.',
  227. 'Please.state.if.your.',
  228. 'Have.you.shared.data.with')
  229. comb_col_names = c('NeuroDiscipline',
  230. 'FimilarDataTypes',
  231. 'DataSharing')
  232. # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
  233. library(data.table)
  234. for(i in seq(1,length(comb_string_vec),1)){
  235. data0 = data.table::melt(as.data.table(data0),
  236. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  237. measure=list(grep(comb_string_vec[i],colnames(data0))),
  238. variable.name = paste0(comb_col_names[i],'Cat'),
  239. value.name = comb_col_names[i],value.factor=TRUE)
  240. # make some nicer labels
  241. data0 = as.data.frame(data0)
  242. level_strings = levels(data0[,ncol(data0)-1])
  243. # iterate over the level strings and update them
  244. for(s in seq(1,length(level_strings),1)){
  245. level_string = level_strings[s]
  246. temp = str_locate(level_string, '\\.\\.\\.')
  247. level_string = substr(level_string,temp[2],nchar(level_string))
  248. level_string = gsub('\\.|\\.\\.',' ',level_string)
  249. level_string = gsub('e g','e.g.',level_string)
  250. level_strings[s] = level_string
  251. }
  252. # reset the labels
  253. levels(data0[,ncol(data0)-1]) = level_strings
  254. }
  255. data = data0
  256. temp2 = neuro_data %>% select(Response.ID, Have.you.shared.data.with.....External.collaborators.) %>%
  257. na.omit()
  258. ###
  259. # DAta Sharing
  260. #temp = data %>% select(Response.ID,FimilarDataTypesCat,FimilarDataTypes) %>% filter(FimilarDataTypes != 'No') %>%
  261. # na.omit() %>% unique() %>% group_by(FimilarDataTypes) %>% filter(n() >= 3)
  262. temp = data %>% select(Response.ID,DataSharingCat,DataSharing) %>%
  263. filter(DataSharing != 'No') %>%
  264. na.omit() %>%
  265. unique() %>%
  266. group_by(DataSharing) %>%
  267. filter(n() >= 3)
  268. #temp = data %>% select(Response.ID,DataSharingCat,DataSharing) %>% filter()
  269. # na.omit() %>% unique() %>% group_by(DataSharing) %>% filter(n() >= 3)
  270. # calc abs numbers to make more own plots
  271. temp_absNumbers = temp %>%
  272. group_by_at(vars(-Response.ID)) %>%
  273. summarise(n = n()) %>%
  274. mutate(percent = round(n/144*100,0))
  275. temp_relFreq = temp %>% group_by_at(vars(-Response.ID, -DataSharing)) %>% summarise(n = n()) %>% mutate(percent = n / 144)
  276. p = ggplot(data=temp_absNumbers) +
  277. #geom_histogram(mapping=aes(x=reorder(TaskStandardToolsCat, percent),y= percent),
  278. geom_histogram(mapping=aes(x=reorder(DataSharingCat, percent) ,y=percent),
  279. colour = 'darkblue', fill='darkblue',
  280. stat = 'identity',
  281. width = 0.5) +
  282. coord_flip() +
  283. theme(text = element_text(size=11)) +
  284. #theme(plot.margin = unit(c(0.5,0.2,0.2,5), "cm")) +
  285. xlab('') + ylab('') + ggtitle(paste0('Datasharing') ) +
  286. geom_text(aes(x=DataSharingCat,y = percent, label = paste0(percent,'%')), colour = "white",hjust=1.2) +
  287. scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20))
  288. #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) + # nice regular expression solution for multiple lined labels
  289. # die theme text groesse ist unabhaengig von der Hoehe der Grafik sie bezieht sich auf die resolution
  290. # 600 pro cm
  291. #In the theme, the size is defined in pts. So here 15, means 15 pts. In geom_text, the size is defined in mm, so it's 15 mm.
  292. #
  293. #What is the relation between pts and mm or in ? If we want exactly the same size for the title and the text in the plot, how can we define it ? It needs some conversion :
  294. #
  295. # 1 pt = 1/72 in
  296. #1 pt = 0.35 mm
  297. #So if we want the text to be the same size as the title, the size in mm will be 15 pt * 0.35 pt/mm = 5.25 mm
  298. #
  299. #In ggplot, there is a constant defined to make the conversion, .pt = 2.845276. (1/.pt = 0.35). You can type in .pt in the console and it will display its value :
  300. #
  301. # ggplot2::.pt
  302. ## [1] 2.845276
  303. #So to make the conversion :
  304. #
  305. # from pt to mm : mm = pt / .pt -> 15 / 2.845276 = 5.27
  306. #from mm to pt : pt = mm * .pt -> 5.27 * 2.845276 = 15
  307. #Let's change the size of the geom_text to be the same of the title by using size = 15/.pt :
  308. #
  309. # plt <- penguins %>%
  310. # ggplot(aes(bill_length_mm, bill_depth_mm, color = species)) +
  311. # geom_point()+
  312. # geom_text(x = 45, y = 20, label = "Example of font problem", size = 15/.pt, inherit.aes = FALSE) +
  313. # labs(title = "Bill length and depth relation by species") +
  314. # theme(plot.title = element_text(size = 15))
  315. # Affinity designer sagt 2 cm margin ...
  316. p
  317. #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
  318. #pFD
  319. ragg::agg_tiff("Fig2_DataSharing3.tiff", width = 17.5, height = 6, units = "cm", res = 600, scaling = 1)
  320. p
  321. #plt
  322. dev.off()
  323. #####################################
  324. ####
  325. # Reusing data of others
  326. #####################################
  327. neuro_data_tmp = 0
  328. neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
  329. Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
  330. Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
  331. Have.you.shared.data.with.....Publicly.
  332. )
  333. #Existing_Data
  334. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
  335. # Other_can_answer
  336. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
  337. # Shared_Publicly
  338. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
  339. # wieviele haben keine Daten
  340. temp_absNumbers = neuro_data_tmp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
  341. temp_absNumbers = neuro_data_tmp %>%
  342. na.omit %>%
  343. filter(!Existing_Data == 'I have no datasets') %>%
  344. group_by_at(vars(Existing_Data)) %>%
  345. summarise(n = n()) %>%
  346. mutate(percent = n / sum(n))
  347. cat("Von den Antwortenden die mindestens einen Datensatz haben ... wieviel Prozent haben diesen verfuegbar gemacht")
  348. cat(temp_absNumbers$n[3], '(', round(temp_absNumbers$percent[3],3)*100, '%) haben diese DAten verfuegbar gemacht')
  349. print(temp_absNumbers, quote = TRUE, row.names = FALSE)
  350. temp_absNumbers = neuro_data_tmp %>%
  351. na.omit %>%
  352. filter(!Existing_Data == 'I have no datasets') %>%
  353. group_by_at(vars(Other_can_answer)) %>%
  354. summarise(n = n()) %>%
  355. mutate(percent = n / sum(n))
  356. cat("Von den Antwortenden die mindestens einen Datensatz haben ... ")
  357. cat( temp_absNumbers$n[3], '(', round(temp_absNumbers$percent[3],3)*100, '%) , of all respondents that have at least one dataset are of the opinion that other researchers could answer their own research questions by re-using data from their research.')
  358. temp_absNumbers = neuro_data_tmp %>%
  359. na.omit %>%
  360. #filter(!Existing_Data == 'I have no datasets') %>%
  361. filter(Other_can_answer=='Yes') %>%
  362. group_by_at(vars(Shared_Publicly)) %>%
  363. summarise(n = n()) %>%
  364. mutate(percent = n / sum(n))
  365. S1 = 'However, even for this subgroup, of scientists in possession of data of which they think are valuable to others '
  366. S2 = '% have never shared any of their data publicly.'
  367. cat(S1, round(temp_absNumbers$percent[1],3)*100, S2)
  368. ##########################################################
  369. ##########################################################
  370. # Research data management skills are essential for preparing, analyzing, and publicly sharing data.
  371. # Only 18% think that they have proficiency in research data management and only 34% think that they
  372. # know which research data management methods are available.
  373. # Interestingly, 58% of all respondents nevertheless think that they can handle their research data
  374. # according to community standards. This could be due to the availability of data research managers
  375. # who help in data handling.
  376. # However, only 25 (20%) of participants have dedicated personnel with research data management
  377. # or data curation expertise.
  378. neuro_data_tmp = 0
  379. neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
  380. Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
  381. Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
  382. Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
  383. What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
  384. Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
  385. What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
  386. What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
  387. What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
  388. Have.you.shared.data.with.....Publicly.
  389. )
  390. #Existing_Data
  391. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
  392. # Other_can_answer
  393. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
  394. # Shared_Publicly
  395. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
  396. # I_know_how_to_publish_my_data_reproducible
  397. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
  398. # I_have_RDM_personal
  399. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
  400. # I_can_handle_RD_community_standards
  401. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
  402. # I_have_proficiency_in_RDM
  403. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
  404. # Iam_highly_knowledgeable_in_RDM
  405. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
  406. # I_know_RDM_available_Methods
  407. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
  408. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  409. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  410. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  411. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  412. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  413. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  414. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  415. S1 = 'Research data management skills are essential for preparing, analyzing, and publicly sharing data. /n Only '
  416. S2 = 'think that they have proficiency in research data management and only '
  417. S3 = 'think that they know which research data management methods are available. /n Interestingly, '
  418. S4 = 'of all respondents nevertheless think that they can handle their research data according to community standards. This could be due to the availability of data research managers who help in data handling. However, only '
  419. S5 = 'of participants have dedicated personnel with research data management or data curation expertise.'
  420. cat(S1)
  421. agree = c('Fully agree', 'Rather agree')
  422. disagree = c('Fully disagree', 'Rather disagree')
  423. not_agree = c('Fully agree', 'Rather agree', 'Undecided')
  424. not_disagree = c('Fully disagree', 'Rather disagree', 'Undecided')
  425. df_np = neuro_data_tmp %>%
  426. #select(I_have_proficiency_in_RDM) %>%
  427. mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " agree"), "Agree")) %>%
  428. mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " disagree"), "Disagree")) %>%
  429. group_by_at(vars(I_have_proficiency_in_RDM)) %>%
  430. na.omit %>%
  431. summarise(n = n()) %>%
  432. mutate(percent = n / sum(n))
  433. cat(df_np$n[df_np$I_have_proficiency_in_RDM=='Agree'], '(', round(df_np$percent[df_np$I_have_proficiency_in_RDM=='Agree'],3)*100, '%)')
  434. cat(S2)
  435. df_np = neuro_data_tmp %>%
  436. #select(I_have_proficiency_in_RDM) %>%
  437. mutate(I_know_RDM_available_Methods = replace(I_know_RDM_available_Methods, str_detect(I_know_RDM_available_Methods, " agree"), "Agree")) %>%
  438. mutate(I_know_RDM_available_Methods = replace(I_know_RDM_available_Methods, str_detect(I_know_RDM_available_Methods, " disagree"), "Disagree")) %>%
  439. group_by_at(vars(I_know_RDM_available_Methods)) %>%
  440. na.omit %>%
  441. summarise(n = n()) %>%
  442. mutate(percent = n / sum(n))
  443. cat(df_np$n[df_np$I_know_RDM_available_Methods=='Agree'], '(', round(df_np$percent[df_np$I_know_RDM_available_Methods=='Agree'],3)*100, '%)')
  444. cat(S3)
  445. df_np = neuro_data_tmp %>%
  446. #select(I_have_proficiency_in_RDM) %>%
  447. mutate(I_can_handle_RD_community_standards = replace(I_can_handle_RD_community_standards, str_detect(I_can_handle_RD_community_standards, " agree"), "Agree")) %>%
  448. mutate(I_can_handle_RD_community_standards = replace(I_can_handle_RD_community_standards, str_detect(I_can_handle_RD_community_standards, " disagree"), "Disagree")) %>%
  449. group_by_at(vars(I_can_handle_RD_community_standards)) %>%
  450. na.omit %>%
  451. summarise(n = n()) %>%
  452. mutate(percent = n / sum(n))
  453. cat(df_np$n[df_np$I_can_handle_RD_community_standards=='Agree'], '(', round(df_np$percent[df_np$I_can_handle_RD_community_standards=='Agree'],3)*100, '%)')
  454. cat(S4)
  455. df_np = neuro_data_tmp %>%
  456. #select(I_have_proficiency_in_RDM) %>%
  457. group_by_at(vars(I_have_RDM_personal)) %>%
  458. na.omit %>%
  459. summarise(n = n()) %>%
  460. mutate(percent = n / sum(n))
  461. cat(df_np$n[df_np$I_have_RDM_personal=='Yes, in my lab'], '(', round(df_np$percent[df_np$I_have_RDM_personal=='Yes, in my lab'],3)*100, '%)')
  462. cat(S5)
  463. ###########################################################
  464. ###########################################################
  465. # Use of tools and standards
  466. ###########################################################
  467. #
  468. # We inquired about the use of existing tools and standards for different research
  469. # data management activities, if this process step was relevant for the participants.
  470. neuro_data_tmp = 0
  471. neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
  472. Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
  473. Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
  474. Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
  475. What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
  476. Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
  477. What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
  478. What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
  479. What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
  480. Have.you.shared.data.with.....Publicly.,
  481. starts_with('For.which.of.these.tasks.'),
  482. starts_with('To.what.degree.do.you.')
  483. )
  484. #Existing_Data
  485. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
  486. # Other_can_answer
  487. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
  488. # Shared_Publicly
  489. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
  490. # I_know_how_to_publish_my_data_reproducible
  491. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
  492. # I_have_RDM_personal
  493. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
  494. # I_can_handle_RD_community_standards
  495. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
  496. # I_have_proficiency_in_RDM
  497. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
  498. # Iam_highly_knowledgeable_in_RDM
  499. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
  500. # I_know_RDM_available_Methods
  501. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
  502. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  503. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  504. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  505. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  506. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  507. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  508. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  509. comb_string_vec = c('For.which.of.these.tasks.',
  510. 'To.what.degree.do.you.')
  511. comb_col_names = c('TaskStandardTools',
  512. 'TaskStandardToolsDegree')
  513. data0 <- neuro_data_tmp
  514. library(data.table)
  515. for(i in seq(1,length(comb_string_vec),1)){
  516. data0 = data.table::melt(as.data.table(data0),
  517. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  518. measure=list(grep(comb_string_vec[i],colnames(data0))),
  519. variable.name = paste0(comb_col_names[i],'Cat'),
  520. value.name = comb_col_names[i],value.factor=TRUE)
  521. # make some nicer labels
  522. data0 = as.data.frame(data0)
  523. level_strings = levels(data0[,ncol(data0)-1])
  524. # iterate over the level strings and update them
  525. for(s in seq(1,length(level_strings),1)){
  526. level_string = level_strings[s]
  527. temp = str_locate(level_string, '\\.\\.\\.')
  528. level_string = substr(level_string,temp[2],nchar(level_string))
  529. level_string = gsub('\\.|\\.\\.',' ',level_string)
  530. level_string = gsub('e g','e.g.',level_string)
  531. level_strings[s] = level_string
  532. }
  533. # reset the labels
  534. levels(data0[,ncol(data0)-1]) = level_strings
  535. }
  536. data = data0
  537. S1 = 'We inquired about the use of existing tools and standards for different research data man-agement activities, if this process step was relevant for the participants. '
  538. cat(S1)
  539. agree = c('Fully agree', 'Rather agree')
  540. disagree = c('Fully disagree', 'Rather disagree')
  541. not_agree = c('Fully agree', 'Rather agree', 'Undecided')
  542. not_disagree = c('Fully disagree', 'Rather disagree', 'Undecided')
  543. df_np = neuro_data_tmp %>%
  544. #select(I_have_proficiency_in_RDM) %>%
  545. mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " agree"), "Agree")) %>%
  546. mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " disagree"), "Disagree")) %>%
  547. group_by_at(vars(I_have_proficiency_in_RDM)) %>%
  548. na.omit %>%
  549. summarise(n = n()) %>%
  550. mutate(percent = n / sum(n))
  551. cat(df_np$n[df_np$I_have_proficiency_in_RDM=='Agree'], '(', round(df_np$percent[df_np$I_have_proficiency_in_RDM=='Agree'],3)*100, '%)')
  552. cat(S2)
  553. df_np = neuro_data_tmp %>%
  554. #select(I_have_proficiency_in_RDM) %>%
  555. mutate(I_know_RDM_available_Methods = replace(I_know_RDM_available_Methods, str_detect(I_know_RDM_available_Methods, " agree"), "Agree")) %>%
  556. mutate(I_know_RDM_available_Methods = replace(I_know_RDM_available_Methods, str_detect(I_know_RDM_available_Methods, " disagree"), "Disagree")) %>%
  557. group_by_at(vars(I_know_RDM_available_Methods)) %>%
  558. na.omit %>%
  559. summarise(n = n()) %>%
  560. mutate(percent = n / sum(n))
  561. cat(df_np$n[df_np$I_know_RDM_available_Methods=='Agree'], '(', round(df_np$percent[df_np$I_know_RDM_available_Methods=='Agree'],3)*100, '%)')
  562. cat(S3)
  563. df_np = neuro_data_tmp %>%
  564. #select(I_have_proficiency_in_RDM) %>%
  565. mutate(I_can_handle_RD_community_standards = replace(I_can_handle_RD_community_standards, str_detect(I_can_handle_RD_community_standards, " agree"), "Agree")) %>%
  566. mutate(I_can_handle_RD_community_standards = replace(I_can_handle_RD_community_standards, str_detect(I_can_handle_RD_community_standards, " disagree"), "Disagree")) %>%
  567. group_by_at(vars(I_can_handle_RD_community_standards)) %>%
  568. na.omit %>%
  569. summarise(n = n()) %>%
  570. mutate(percent = n / sum(n))
  571. cat(df_np$n[df_np$I_can_handle_RD_community_standards=='Agree'], '(', round(df_np$percent[df_np$I_can_handle_RD_community_standards=='Agree'],3)*100, '%)')
  572. cat(S4)
  573. df_np = neuro_data_tmp %>%
  574. #select(I_have_proficiency_in_RDM) %>%
  575. group_by_at(vars(I_have_RDM_personal)) %>%
  576. na.omit %>%
  577. summarise(n = n()) %>%
  578. mutate(percent = n / sum(n))
  579. cat(df_np$n[df_np$I_have_RDM_personal=='Yes, in my lab'], '(', round(df_np$percent[df_np$I_have_RDM_personal=='Yes, in my lab'],3)*100, '%)')
  580. cat(S5)
  581. ######################################################################
  582. ####################################################
  583. #### Figure 3 ######################################
  584. ###############
  585. #### Tools ##########################
  586. # make a Task Standard Tools plot
  587. # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  588. # remove Comment columes
  589. temp = data %>%
  590. select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>%
  591. filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  592. #filter(TaskStandardTools != 'No') %>%
  593. #filter(TaskStandardTools != 'No') %>%
  594. na.omit() %>%
  595. unique() %>%
  596. droplevels()
  597. # calc relative frequency to make more own plots
  598. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / 159)
  599. ###
  600. # Use of standard Tools
  601. temp_absNumbers = temp %>%
  602. group_by_at(vars(TaskStandardToolsCat, TaskStandardTools)) %>%
  603. summarise(n = n()) %>%
  604. mutate(percent = round(n /sum(n)*100,0)) %>%
  605. #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  606. filter(TaskStandardTools != 'No') %>%
  607. filter(TaskStandardToolsCat != ' Simulation ') %>%
  608. arrange(percent)
  609. pFD = ggplot(data=temp_absNumbers) +
  610. geom_histogram(mapping=aes(x=reorder(TaskStandardToolsCat, percent),y= percent),
  611. colour = 'darkblue', fill='darkblue',
  612. stat = 'identity',
  613. width = 0.5) +
  614. coord_flip() +
  615. #theme(axis.text.x = element_text(color = "grey20", size = 10, angle = 90, hjust = .5, vjust = .5, face = "plain"),
  616. # axis.text.y = element_text(color = "grey20", size = 10, angle = 0, hjust = 1, vjust = 0, face = "plain"),
  617. # axis.title.x = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = 0, face = "plain"),
  618. # axis.title.y = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = .5, face = "plain")) +
  619. theme(text = element_text(size=11)) +
  620. xlab('') + ylab('') + ggtitle(paste0('Use of Standard Tools for ...')) +
  621. geom_text(aes(x=TaskStandardToolsCat,y = percent, label = paste0(percent,"%")), colour = "white",hjust=1.5) +
  622. scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20))
  623. #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  624. pFD
  625. #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
  626. #pFD
  627. ragg::agg_tiff("Fig3.tiff", width = 17.5, height = 10, units = "cm", res = 600, scaling = 1)
  628. pFD
  629. #plt
  630. dev.off()
  631. ######################################################################
  632. ######################################################################
  633. ####################################################
  634. #### Figure 4 ######################################
  635. ###############
  636. #### Tools vs. DataSharing #########################
  637. # recreate different datasets
  638. # more elegant
  639. neuro_data_tmp = 0
  640. neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
  641. Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
  642. Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
  643. Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
  644. What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
  645. Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
  646. What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
  647. What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
  648. What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
  649. Have.you.shared.data.with.....Publicly.,
  650. starts_with('For.which.of.these.tasks.'),
  651. starts_with('To.what.degree.do.you.'),
  652. starts_with('Think.of.re.using.data.'),
  653. starts_with('Think.of.data.sharing.')
  654. )
  655. #Existing_Data
  656. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
  657. # Other_can_answer
  658. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
  659. # Shared_Publicly
  660. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
  661. # I_know_how_to_publish_my_data_reproducible
  662. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
  663. # I_have_RDM_personal
  664. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
  665. # I_can_handle_RD_community_standards
  666. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
  667. # I_have_proficiency_in_RDM
  668. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
  669. # Iam_highly_knowledgeable_in_RDM
  670. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
  671. # I_know_RDM_available_Methods
  672. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
  673. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  674. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  675. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  676. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  677. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  678. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  679. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  680. comb_string_vec = c('For.which.of.these.tasks.',
  681. 'To.what.degree.do.you.',
  682. 'Think.of.re.using.data.',
  683. 'Think.of.data.sharing.')
  684. comb_col_names = c('TaskStandardTools',
  685. 'TaskStandardToolsDegree',
  686. 'ThinkReusingData',
  687. 'ThinkSharingData')
  688. library(data.table)
  689. data0 = neuro_data_tmp
  690. for(i in seq(1,length(comb_string_vec),1)){
  691. data0 = data.table::melt(as.data.table(data0),
  692. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  693. measure=list(grep(comb_string_vec[i],colnames(data0))),
  694. variable.name = paste0(comb_col_names[i],'Cat'),
  695. value.name = comb_col_names[i],value.factor=TRUE)
  696. # make some nicer labels
  697. data0 = as.data.frame(data0)
  698. level_strings = levels(data0[,ncol(data0)-1])
  699. # iterate over the level strings and update them
  700. for(s in seq(1,length(level_strings),1)){
  701. level_string = level_strings[s]
  702. temp = str_locate(level_string, '\\.\\.\\.')
  703. level_string = substr(level_string,temp[2],nchar(level_string))
  704. level_string = gsub('\\.|\\.\\.',' ',level_string)
  705. level_string = gsub('e g','e.g.',level_string)
  706. level_strings[s] = level_string
  707. }
  708. # reset the labels
  709. levels(data0[,ncol(data0)-1]) = level_strings
  710. }
  711. data = data0
  712. # make a Task Standard Tools plot
  713. # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  714. # remove Comment columes
  715. temp = data %>%
  716. #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
  717. select(Response.ID,Shared_Publicly, TaskStandardToolsCat, TaskStandardTools) %>%
  718. filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  719. filter( TaskStandardToolsCat != "No") %>%
  720. #filter(TaskStandardTools != 'No') %>%
  721. #filter(TaskStandardTools != 'No') %>%
  722. na.omit() %>%
  723. unique() %>%
  724. droplevels()
  725. # calc relative frequency to make more own plots
  726. temp_relFreq = temp %>%
  727. group_by_at(vars(-Response.ID, Shared_Publicly,-TaskStandardToolsCat)) %>%
  728. summarise(n = n()) %>%
  729. mutate(share = n / 159)
  730. no = temp_relFreq %>%
  731. filter(Shared_Publicly=="No" ) %>%
  732. mutate(procent = n/sum(n)*100)
  733. yes = temp_relFreq %>%
  734. filter(Shared_Publicly=="Yes" ) %>%
  735. mutate(procent = n/sum(n)*100)
  736. cat("Althoug the use of standard tools are in very different areas there is")
  737. cat("the trend that those how generally use more standard tools are more likely to share their data.")
  738. cat("In the group that did not share their data publicly only")
  739. cat(no$procent[2], "% use standard Tools. While in the group who shares data ")
  740. cat(yes$procent[2], "% use standard Tools. ")
  741. cat("A possible explanation could be that scientists who work a lot with standard tools find it easier to overcome the heavily standardized rules of public sharing of data. ")
  742. cat("Formally, of course, it cannot be excluded that the dominant causality is opposite. However, we consider it unlikely that the motivation to share data is the main driver for a general affinity to use standard methods. ")
  743. ###
  744. # Use of standard Tools Degree
  745. temp = data %>%
  746. #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
  747. select(Response.ID,Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree) %>%
  748. filter(!grepl('Comment|Other',TaskStandardToolsDegreeCat)) %>%
  749. filter( TaskStandardToolsDegreeCat != "No") %>%
  750. #filter(TaskStandardTools != 'No') %>%
  751. #filter(TaskStandardTools != 'No') %>%
  752. na.omit() %>%
  753. unique() %>%
  754. droplevels()
  755. temp_absNumbers = 0
  756. temp_absNumbers = temp %>%
  757. group_by_at(vars(Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree)) %>%
  758. summarise(n = n()) %>%
  759. mutate(percent = round(n /sum(n)*100,0)) %>%
  760. #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  761. #filter(TaskStandardToolsDegree == 'Mostly' | TaskStandardToolsDegree == "As much as possible") %>%
  762. filter(TaskStandardToolsDegree == 'Mostly') %>%
  763. # filter(Shared_Publicly == "Yes") %>%
  764. filter(TaskStandardToolsDegreeCat != ' Simulation ') %>%
  765. group_by_at(vars(Shared_Publicly))
  766. # arrange(percent)
  767. temp_absNumbers_all = temp %>%
  768. group_by_at(vars(Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree)) %>%
  769. summarise(n = n()) %>%
  770. mutate(percent = round(n /sum(n)*100,0)) %>%
  771. #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  772. #filter(TaskStandardToolsDegree == 'Mostly' | TaskStandardToolsDegree == "As much as possible") %>%
  773. #filter(TaskStandardToolsDegree == 'Mostly') %>%
  774. # filter(Shared_Publicly == "Yes") %>%
  775. filter(TaskStandardToolsDegreeCat != ' Simulation ') %>%
  776. group_by_at(vars(Shared_Publicly))
  777. # arrange(percent)
  778. yes = sum(temp_absNumbers$n[temp_absNumbers$Shared_Publicly=="Yes"])/ sum(temp_absNumbers_all$n[temp_absNumbers$Shared_Publicly=="Yes"])*100
  779. no = sum(temp_absNumbers$n[temp_absNumbers$Shared_Publicly=="No"])/ sum(temp_absNumbers_all$n[temp_absNumbers$Shared_Publicly=="No"])*100
  780. cat(yes, " % answer mostly if they share their data while only ")
  781. cat(no, " % using mostly standard methods for their work if they did not share their data openly")
  782. cat("Respondents who share their data publicly have a " , (yes-no)/no*100, "% higher rate to using 'mostly' standard tools in their daily work")
  783. ####################################################
  784. #### ######################################
  785. ###############
  786. #### Obstacles for Data sharing DataSharing #########################
  787. data0 = neuro_data %>% dplyr::select(Response.ID,
  788. starts_with('Have.you.shared.data.with'),
  789. starts_with('Please.indicate.'),
  790. starts_with('How.do.you.process.and.analyze.your.data.'),
  791. )
  792. comb_string_vec = c('Please.indicate.',
  793. 'How.do.you.process.and.analyze.your.data.',
  794. 'Have.you.shared.data.with')
  795. comb_col_names = c('SharingProblems',
  796. 'HowAnalyzeData',
  797. 'DataSharing')
  798. library(data.table)
  799. for(i in seq(1,length(comb_string_vec),1)){
  800. data0 = data.table::melt(as.data.table(data0),
  801. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  802. measure=list(grep(comb_string_vec[i],colnames(data0))),
  803. variable.name = paste0(comb_col_names[i],'Cat'),
  804. value.name = comb_col_names[i],value.factor=TRUE)
  805. # make some nicer labels
  806. data0 = as.data.frame(data0)
  807. level_strings = levels(data0[,ncol(data0)-1])
  808. # iterate over the level strings and update them
  809. for(s in seq(1,length(level_strings),1)){
  810. level_string = level_strings[s]
  811. temp = str_locate(level_string, '\\.\\.\\.')
  812. level_string = substr(level_string,temp[2],nchar(level_string))
  813. level_string = gsub('\\.|\\.\\.',' ',level_string)
  814. level_string = gsub('e g','e.g.',level_string)
  815. level_strings[s] = level_string
  816. }
  817. # reset the labels
  818. levels(data0[,ncol(data0)-1]) = level_strings
  819. }
  820. data = data0
  821. S1 = 'We inquired about the use of existing tools and standards for different research data man-agement activities, if this process step was relevant for the participants. '
  822. cat(S1)
  823. agree = c('Fully agree', 'Rather agree')
  824. disagree = c('Fully disagree', 'Rather disagree')
  825. not_agree = c('Fully agree', 'Rather agree', 'Undecided')
  826. not_disagree = c('Fully disagree', 'Rather disagree', 'Undecided')
  827. temp = data %>%
  828. select(Response.ID,SharingProblemsCat,SharingProblems) %>%
  829. mutate(SharingProblems = replace(SharingProblems, str_detect(SharingProblems, " agree"), "Agree")) %>%
  830. mutate(SharingProblems = replace(SharingProblems, str_detect(SharingProblems, " disagree"), "Disagree")) %>%
  831. na.omit %>%
  832. group_by_at(vars(SharingProblemsCat, SharingProblems)) %>%
  833. summarise(n = n()) %>%
  834. mutate(percent = round(n / sum(n)*100 ,0))
  835. ownership = 0
  836. ownership <- temp %>%
  837. filter(SharingProblemsCat == " I do not want to use a public repository because my data ownership intellectual property might be violated ") #%>%
  838. institution <- temp %>%
  839. filter(SharingProblemsCat == " My institutional policy allows to upload data to a public repository ") #%>%
  840. legal <- temp %>%
  841. filter(SharingProblemsCat == ' Legal aspects licensing national laws are significant hurdles for public repository usage ')
  842. rights <- temp %>%
  843. filter(SharingProblemsCat == ' For my research project s I am unsure if I own the rights to upload the data to a public repository ')
  844. sufficient_guidance<- temp%>%
  845. filter(SharingProblemsCat == ' There is sufficient guidance towards choosing an appropriate repository for my data ')
  846. time <- temp %>%
  847. filter(SharingProblemsCat == ' There is a lack of time to deposit data in a repository ')
  848. expertise <- temp %>%
  849. filter(SharingProblemsCat == ' There is a lack of expertise and human resources to deposit data in a repository ')
  850. technic <- temp %>%
  851. filter(SharingProblemsCat == ' Technical hurdles are too high to upload to a repository large data transfer lack of requested metadata ')
  852. #their_way <- temp %>%
  853. # filter(SharingProblemsCat == ' For my research project s I am unsure if I own the rights to upload the data to a public repository ')
  854. #filter(SharingProblems == "Agree") #%>%
  855. #$select(percent)
  856. # temp$n[temp$SharingProblemsCat==" I do not want to use a public repository because my data ownership intellectual property might be violated " && SharingProblems == "Agree"]
  857. #First of all, we did not find major general opposition to public data sharing.
  858. cat( ownership$percent[ownership$SharingProblems=="Agree"] ,'% are reluctant to share data publicly because the data ownership or intellectual property might be violated (vs. ', ownership$percent[ownership$SharingProblems=="Disagree"] , ').')
  859. cat('Interestingly, ', institution$percent[institution$SharingProblems=="Undecided"], '% participants did not know whether their institutional policy allow to up-load data to a public repository')
  860. cat(' while further ', institution$percent[institution$SharingProblems=="Disargree"],'% are sure that they did not' )
  861. cat(institution$percent[institution$SharingProblems=="Disargree"])
  862. cat('Further ', 100-rights$percent[rights$SharingProblems=="Disagree"], ' are not sure whether they own the rights to upload data from their own research project')
  863. cat(legal$percent[legal$SharingProblems=="Agree"], ' %) see legal aspects as significant hurdles for public repository usage.')
  864. cat('These answers indicate major uncertainties with regard to legal issues.')
  865. cat('Only ', sufficient_guidance$percent[sufficient_guidance$SharingProblems=="Agree"], '% think that there is sufficient guidance towards choosing an appropriate repository for my data')
  866. cat(time$percent[time$SharingProblems=="Agree"], '% think that there is a lack of time to deposit data in a repository.')
  867. cat('while only ',time$percent[time$SharingProblems=="Disagree"], '% disagree on this point')
  868. cat(expertise$percent[expertise$SharingProblems=="Agree"], "% think that there is a lack of expertise and human resources to deposit data in a repository")
  869. cat(technic$percent[technic$SharingProblems=="Agree"], "% think that the technical hurdles are too high to uplad tdat ato a repository")
  870. cat('83% of respondents did not think that their research data must be handled in their very own, individual way. The lack of professional data management was reported as problem. 70 (54%) participants think that they would share more of their data if they had better data man-agement while only 32% think that a better data management would not increase the amount of own data to share. Due to the lack of professional data management, the preparation of an dataset for public use is a time-consuming process. 70% of those respondents how have previously prepared data for publication and re-use say that the time that they need to ready a dataset requires more than a day while 39% need even more than a week. Accordingly, 76 (60%) think that there is a lack of time to deposit data in a repository while only 31 (24%) did not think that time is a problem for the deposition of data in a public repository.')
  871. ######################################################################
  872. ######################################################################
  873. # recreate different datasets
  874. # more elegant
  875. neuro_data_tmp = 0
  876. neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
  877. Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
  878. Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
  879. Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
  880. What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
  881. Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
  882. What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
  883. What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
  884. What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
  885. Have.you.shared.data.with.....Publicly.,
  886. My.current..primary..position.is.,
  887. starts_with('For.which.of.these.tasks.'),
  888. starts_with('To.what.degree.do.you.'),
  889. starts_with('Think.of.re.using.data.'),
  890. starts_with('Which.neuroscience.discipline.s.'),
  891. starts_with('Think.of.data.sharing.')
  892. )
  893. #CurrentPosition
  894. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'My.current..primary..position.is.')] <- "CurrentPosition"
  895. #Existing_Data
  896. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
  897. # Other_can_answer
  898. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
  899. # Shared_Publicly
  900. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
  901. # I_know_how_to_publish_my_data_reproducible
  902. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
  903. # I_have_RDM_personal
  904. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
  905. # I_can_handle_RD_community_standards
  906. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
  907. # I_have_proficiency_in_RDM
  908. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
  909. # Iam_highly_knowledgeable_in_RDM
  910. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
  911. # I_know_RDM_available_Methods
  912. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
  913. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  914. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  915. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  916. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  917. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  918. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  919. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  920. comb_string_vec = c('For.which.of.these.tasks.',
  921. 'To.what.degree.do.you.',
  922. 'Think.of.re.using.data.',
  923. 'Which.neuroscience.discipline.s.',
  924. 'Think.of.data.sharing.')
  925. comb_col_names = c('TaskStandardTools',
  926. 'TaskStandardToolsDegree',
  927. 'ThinkReusingData',
  928. 'NeuroDiscipline',
  929. 'ThinkSharingData')
  930. library(data.table)
  931. data0 = neuro_data_tmp
  932. for(i in seq(1,length(comb_string_vec),1)){
  933. data0 = data.table::melt(as.data.table(data0),
  934. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  935. measure=list(grep(comb_string_vec[i],colnames(data0))),
  936. variable.name = paste0(comb_col_names[i],'Cat'),
  937. value.name = comb_col_names[i],value.factor=TRUE)
  938. # make some nicer labels
  939. data0 = as.data.frame(data0)
  940. level_strings = levels(data0[,ncol(data0)-1])
  941. # iterate over the level strings and update them
  942. for(s in seq(1,length(level_strings),1)){
  943. level_string = level_strings[s]
  944. temp = str_locate(level_string, '\\.\\.\\.')
  945. level_string = substr(level_string,temp[2],nchar(level_string))
  946. level_string = gsub('\\.|\\.\\.',' ',level_string)
  947. level_string = gsub('e g','e.g.',level_string)
  948. level_strings[s] = level_string
  949. }
  950. # reset the labels
  951. levels(data0[,ncol(data0)-1]) = level_strings
  952. }
  953. data = data0
  954. # make a Task Standard Tools plot
  955. # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  956. # remove Comment columes
  957. temp = data %>%
  958. #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
  959. select(Response.ID,Shared_Publicly, CurrentPosition) %>%
  960. #filter(!grepl('Comment|Other',CurrentPositionCat)) %>%
  961. #filter( CurrentPositionCat != "No") %>%
  962. #filter(TaskStandardTools != 'No') %>%
  963. #filter(TaskStandardTools != 'No') %>%
  964. #na.omit() %>%
  965. unique() #%>%
  966. #droplevels()
  967. # calc relative frequency to make more own plots
  968. temp_relFreq = temp %>%
  969. group_by_at(vars(-Response.ID, Shared_Publicly)) %>%
  970. summarise(n = n()) %>%
  971. #mutate(share = n / 159) %>%
  972. filter(n >= 3) %>%
  973. # calc abs numbers to make more own plots
  974. na.omit()
  975. library(reshape2)
  976. # here Plot mit 2 Farben
  977. #############################
  978. # hier weiter machen
  979. # alle gleiche Current Position auf 100 und dann den Anteil die Daten Teilen als %
  980. #data2 <- reshape(temp_relFreq, idvar = "CurrentPosition", timevar = "Shared_Publicly", direction = "wide")
  981. temp_absNumbers <- dcast(temp_relFreq, CurrentPosition ~ Shared_Publicly) %>%
  982. mutate(percent = round(Yes/ (No + Yes)*100,0)) %>%
  983. na.omit() %>%
  984. arrange(desc(percent)) %>%
  985. mutate(CurrentPosition = str_replace(CurrentPosition,"Research data management focused staff", "RDM staff"))
  986. pFD = ggplot(data=temp_absNumbers) +
  987. geom_histogram(mapping=aes(x=reorder(CurrentPosition, percent),y= percent),
  988. colour = 'darkblue', fill='darkblue',
  989. stat = 'identity',
  990. width = 0.5) +
  991. coord_flip() +
  992. #theme(axis.text.x = element_text(color = "grey20", size = 10, angle = 90, hjust = .5, vjust = .5, face = "plain"),
  993. # axis.text.y = element_text(color = "grey20", size = 10, angle = 0, hjust = 1, vjust = 0, face = "plain"),
  994. # axis.title.x = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = 0, face = "plain"),
  995. # axis.title.y = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = .5, face = "plain")) +
  996. theme(text = element_text(size=11)) +
  997. xlab('') + ylab('') + ggtitle(paste0('Datasharing for different scientific positions')) +
  998. geom_text(aes(x=CurrentPosition,y = percent, label = paste0(percent,"%")), colour = "white",hjust=1.5) +
  999. scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20))
  1000. #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1001. pFD
  1002. #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
  1003. #pFD
  1004. ragg::agg_tiff("Fig5_Position_vs_Sharing.tiff", width = 17.5, height = 6, units = "cm", res = 600, scaling = 1)
  1005. pFD
  1006. #plt
  1007. dev.off()
  1008. temp = data %>%
  1009. #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
  1010. select(Response.ID,Shared_Publicly, NeuroDisciplineCat, NeuroDiscipline) %>%
  1011. filter(NeuroDiscipline == "Yes") %>%
  1012. #filter(!grepl('Comment|Other',CurrentPositionCat)) %>%
  1013. #filter( CurrentPositionCat != "No") %>%
  1014. #filter(TaskStandardTools != 'No') %>%
  1015. #filter(TaskStandardTools != 'No') %>%
  1016. select(-NeuroDiscipline) %>%
  1017. na.omit() %>%
  1018. unique() #%>%
  1019. #droplevels()
  1020. # calc relative frequency to make more own plots
  1021. temp_relFreq = temp %>%
  1022. group_by_at(vars(-Response.ID, Shared_Publicly)) %>%
  1023. summarise(n = n()) %>%
  1024. #mutate(share = n / 159) %>%
  1025. filter(n >= 3) %>%
  1026. # calc abs numbers to make more own plots
  1027. na.omit()
  1028. library(reshape2)
  1029. # here Plot mit 2 Farben
  1030. #############################
  1031. # hier weiter machen
  1032. # alle gleiche Current Position auf 100 und dann den Anteil die Daten Teilen als %
  1033. #data2 <- reshape(temp_relFreq, idvar = "CurrentPosition", timevar = "Shared_Publicly", direction = "wide")
  1034. temp_absNumbers <- dcast(temp_relFreq, NeuroDisciplineCat ~ Shared_Publicly) %>%
  1035. mutate(percent = round(Yes/ (No + Yes)*100,0)) %>%
  1036. na.omit() %>%
  1037. arrange(desc(percent)) %>%
  1038. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"imaging", "Imaging")) %>%
  1039. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Theoretical", "Theoret.")) %>%
  1040. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Theoretical", "Theoret.")) %>%
  1041. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"behavioral", "Behav.")) %>%
  1042. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"neuroscience", "Neurosci.")) %>%
  1043. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Neuroscience", "Neurosci.")) %>%
  1044. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. electrophysiological recording behavior tracking ", "")) %>%
  1045. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. patient involvement clinical trials ", "")) %>%
  1046. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"science", "Science")) %>%
  1047. mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. modeling simulation ", ""))
  1048. pFD = ggplot(data=temp_absNumbers) +
  1049. geom_histogram(mapping=aes(x=reorder(NeuroDisciplineCat, percent),y= percent),
  1050. colour = 'darkblue', fill='darkblue',
  1051. stat = 'identity',
  1052. width = 0.5) +
  1053. coord_flip() +
  1054. #theme(axis.text.x = element_text(color = "grey20", size = 10, angle = 90, hjust = .5, vjust = .5, face = "plain"),
  1055. # axis.text.y = element_text(color = "grey20", size = 10, angle = 0, hjust = 1, vjust = 0, face = "plain"),
  1056. # axis.title.x = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = 0, face = "plain"),
  1057. # axis.title.y = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = .5, face = "plain")) +
  1058. theme(text = element_text(size=11)) +
  1059. xlab('') + ylab('') + ggtitle(paste0('Datasharing for different neuroscientific subdisciplines')) +
  1060. geom_text(aes(x=NeuroDisciplineCat,y = percent, label = paste0(percent,"%")), colour = "white",hjust=1.5) +
  1061. scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 40))
  1062. #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1063. pFD
  1064. #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
  1065. #pFD
  1066. ragg::agg_tiff("Fig6_Discipline_vs_Sharing.tiff", width = 17.5, height = 10, units = "cm", res = 600, scaling = 1)
  1067. pFD
  1068. #plt
  1069. dev.off()
  1070. no = temp_relFreq %>%
  1071. filter(Shared_Publicly=="No" ) %>%
  1072. mutate(procent = n/sum(n)*100)
  1073. yes = temp_relFreq %>%
  1074. filter(Shared_Publicly=="Yes" ) %>%
  1075. mutate(procent = n/sum(n)*100)
  1076. cat("Althoug the use of standard tools are in very different areas there is")
  1077. cat("the trend that those how generally use more standard tools are more likely to share their data.")
  1078. cat("In the group that did not share their data publicly only")
  1079. cat(no$procent[2], "% use standard Tools. While in the group who shares data ")
  1080. cat(yes$procent[2], "% use standard Tools. ")
  1081. cat("A possible explanation could be that scientists who work a lot with standard tools find it easier to overcome the heavily standardized rules of public sharing of data. ")
  1082. cat("Formally, of course, it cannot be excluded that the dominant causality is opposite. However, we consider it unlikely that the motivation to share data is the main driver for a general affinity to use standard methods. ")
  1083. ###
  1084. # Use of standard Tools Degree
  1085. temp = data %>%
  1086. #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
  1087. select(Response.ID,Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree) %>%
  1088. filter(!grepl('Comment|Other',TaskStandardToolsDegreeCat)) %>%
  1089. filter( TaskStandardToolsDegreeCat != "No") %>%
  1090. #filter(TaskStandardTools != 'No') %>%
  1091. #filter(TaskStandardTools != 'No') %>%
  1092. na.omit() %>%
  1093. unique() %>%
  1094. droplevels()
  1095. temp_absNumbers = 0
  1096. temp_absNumbers = temp %>%
  1097. group_by_at(vars(Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree)) %>%
  1098. summarise(n = n()) %>%
  1099. mutate(percent = round(n /sum(n)*100,0)) %>%
  1100. #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  1101. #filter(TaskStandardToolsDegree == 'Mostly' | TaskStandardToolsDegree == "As much as possible") %>%
  1102. filter(TaskStandardToolsDegree == 'Mostly') %>%
  1103. # filter(Shared_Publicly == "Yes") %>%
  1104. filter(TaskStandardToolsDegreeCat != ' Simulation ') %>%
  1105. group_by_at(vars(Shared_Publicly))
  1106. # arrange(percent)
  1107. temp_absNumbers_all = temp %>%
  1108. group_by_at(vars(Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree)) %>%
  1109. summarise(n = n()) %>%
  1110. mutate(percent = round(n /sum(n)*100,0)) %>%
  1111. #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  1112. #filter(TaskStandardToolsDegree == 'Mostly' | TaskStandardToolsDegree == "As much as possible") %>%
  1113. #filter(TaskStandardToolsDegree == 'Mostly') %>%
  1114. # filter(Shared_Publicly == "Yes") %>%
  1115. filter(TaskStandardToolsDegreeCat != ' Simulation ') %>%
  1116. group_by_at(vars(Shared_Publicly))
  1117. # arrange(percent)
  1118. yes = sum(temp_absNumbers$n[temp_absNumbers$Shared_Publicly=="Yes"])/ sum(temp_absNumbers_all$n[temp_absNumbers$Shared_Publicly=="Yes"])*100
  1119. no = sum(temp_absNumbers$n[temp_absNumbers$Shared_Publicly=="No"])/ sum(temp_absNumbers_all$n[temp_absNumbers$Shared_Publicly=="No"])*100
  1120. cat(yes, " % answer mostly if they share their data while only ")
  1121. cat(no, " % using mostly standard methods for their work if they did not share their data openly")
  1122. cat("Respondents who share their data publicly have a " , (yes-no)/no*100, "% higher rate to using 'mostly' standard tools in their daily work")
  1123. ######################################################################
  1124. ######################################################################
  1125. # Zusammenhangsanalyse mit dem Data Sharing ... was hat einen Einfluss auf data Sharing
  1126. # recreate different datasets
  1127. # more elegant
  1128. neuro_data_tmp = 0
  1129. neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
  1130. Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
  1131. Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
  1132. Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
  1133. What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
  1134. Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
  1135. How.much.time.do.you.currently.need.to.ready.a.dataset.from.your.lab.for.publication.and.re.use.,
  1136. Have.you.shared.data.with.....Publicly.,
  1137. starts_with('For.which.of.these.tasks.'),
  1138. starts_with('To.what.degree.do.you.'),
  1139. starts_with('Think.of.re.using.data.'),
  1140. starts_with('Think.of.data.sharing.')
  1141. )
  1142. #Existing_Data
  1143. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
  1144. # Other_can_answer
  1145. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
  1146. # Shared_Publicly
  1147. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
  1148. # I_know_how_to_publish_my_data_reproducible
  1149. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
  1150. # I_have_RDM_personal
  1151. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
  1152. # I_can_handle_RD_community_standards
  1153. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
  1154. # I_have_proficiency_in_RDM
  1155. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
  1156. # Iam_highly_knowledgeable_in_RDM
  1157. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
  1158. # I_know_RDM_available_Methods
  1159. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
  1160. # how_much_time
  1161. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'How.much.time.do.you.currently.need.to.ready.a.dataset.from.your.lab.for.publication.and.re.use.')] <- 'how_much_time'
  1162. comb_string_vec = c('For.which.of.these.tasks.',
  1163. 'To.what.degree.do.you.',
  1164. 'Think.of.re.using.data.',
  1165. 'Think.of.data.sharing.')
  1166. comb_col_names = c('TaskStandardTools',
  1167. 'TaskStandardToolsDegree',
  1168. 'ThinkReusingData',
  1169. 'ThinkSharingData')
  1170. library(data.table)
  1171. data0 = neuro_data_tmp
  1172. for(i in seq(1,length(comb_string_vec),1)){
  1173. data0 = data.table::melt(as.data.table(data0),
  1174. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  1175. measure=list(grep(comb_string_vec[i],colnames(data0))),
  1176. variable.name = paste0(comb_col_names[i],'Cat'),
  1177. value.name = comb_col_names[i],value.factor=TRUE)
  1178. # make some nicer labels
  1179. data0 = as.data.frame(data0)
  1180. level_strings = levels(data0[,ncol(data0)-1])
  1181. # iterate over the level strings and update them
  1182. for(s in seq(1,length(level_strings),1)){
  1183. level_string = level_strings[s]
  1184. temp = str_locate(level_string, '\\.\\.\\.')
  1185. level_string = substr(level_string,temp[2],nchar(level_string))
  1186. level_string = gsub('\\.|\\.\\.',' ',level_string)
  1187. level_string = gsub('e g','e.g.',level_string)
  1188. level_strings[s] = level_string
  1189. }
  1190. # reset the labels
  1191. levels(data0[,ncol(data0)-1]) = level_strings
  1192. }
  1193. data = data0
  1194. agree = c('Fully agree', 'Rather agree')
  1195. disagree = c('Fully disagree', 'Rather disagree')
  1196. not_agree = c('Fully agree', 'Rather agree', 'Undecided')
  1197. not_disagree = c('Fully disagree', 'Rather disagree', 'Undecided')
  1198. temp_absNumbers = data %>%
  1199. select(Response.ID, how_much_time) %>%
  1200. #select(I_have_proficiency_in_RDM) %>%
  1201. # mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " agree"), "Agree")) %>%
  1202. # mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " disagree"), "Disagree")) %>%
  1203. group_by_at(vars(how_much_time)) %>%
  1204. na.omit %>%
  1205. unique() %>%
  1206. # droplevels() %>%
  1207. summarise(n = n()) %>%
  1208. mutate(percent = round(n / sum(n)*100,0)) %>%
  1209. arrange(percent)
  1210. pFD = ggplot(data=temp_absNumbers) +
  1211. geom_histogram(mapping=aes(x=reorder(how_much_time, percent),y= percent),
  1212. colour = 'darkblue', fill='darkblue',
  1213. stat = 'identity',
  1214. width = 0.5) +
  1215. coord_flip() +
  1216. #theme(axis.text.x = element_text(color = "grey20", size = 10, angle = 90, hjust = .5, vjust = .5, face = "plain"),
  1217. # axis.text.y = element_text(color = "grey20", size = 10, angle = 0, hjust = 1, vjust = 0, face = "plain"),
  1218. # axis.title.x = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = 0, face = "plain"),
  1219. # axis.title.y = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = .5, face = "plain")) +
  1220. theme(text = element_text(size=11)) +
  1221. xlab('') + ylab('') + ggtitle(paste0('Time needed to ready a dataset for publication and reuse')) +
  1222. geom_text(aes(x=how_much_time,y = percent, label = paste0(percent,"%")), colour = "white",hjust=1.5) +
  1223. scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20))
  1224. #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1225. pFD
  1226. #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
  1227. #pFD
  1228. ragg::agg_tiff("Fig4_Time.tiff", width = 17.5, height = 7, units = "cm", res = 600, scaling = 1)
  1229. pFD
  1230. #plt
  1231. dev.off()
  1232. ######################################################################
  1233. ######################################################################
  1234. ######################################################################
  1235. ######################################################################
  1236. ######################################################################
  1237. ######################################################################
  1238. # What is the factor that most strongly seperates shareers from non-sharers
  1239. # Try something
  1240. # recreate different datasets
  1241. # more elegant
  1242. neuro_data_tmp = 0
  1243. neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
  1244. Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
  1245. Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
  1246. Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
  1247. What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
  1248. Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
  1249. What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
  1250. What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
  1251. What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
  1252. Have.you.shared.data.with.....Publicly.,
  1253. My.current..primary..position.is.,
  1254. starts_with('For.which.of.these.tasks.'),
  1255. starts_with('To.what.degree.do.you.'),
  1256. starts_with('Think.of.re.using.data.'),
  1257. starts_with('Which.neuroscience.discipline.s.'),
  1258. starts_with('Applying.research.data.management..'),
  1259. starts_with('Think.of.data.sharing.')
  1260. )
  1261. #CurrentPosition
  1262. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'My.current..primary..position.is.')] <- "CurrentPosition"
  1263. #Existing_Data
  1264. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
  1265. # Other_can_answer
  1266. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
  1267. # Shared_Publicly
  1268. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
  1269. # I_know_how_to_publish_my_data_reproducible
  1270. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
  1271. # I_have_RDM_personal
  1272. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
  1273. # I_can_handle_RD_community_standards
  1274. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
  1275. # I_have_proficiency_in_RDM
  1276. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
  1277. # Iam_highly_knowledgeable_in_RDM
  1278. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
  1279. # I_know_RDM_available_Methods
  1280. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
  1281. # RDM_personal
  1282. colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'RDM_personal'
  1283. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  1284. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  1285. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  1286. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  1287. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  1288. #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
  1289. comb_string_vec = c('For.which.of.these.tasks.',
  1290. 'To.what.degree.do.you.',
  1291. 'Think.of.re.using.data.',
  1292. 'Which.neuroscience.discipline.s.',
  1293. 'Think.of.data.sharing.')
  1294. comb_col_names = c('TaskStandardTools',
  1295. 'TaskStandardToolsDegree',
  1296. 'ThinkReusingData',
  1297. 'NeuroDiscipline',
  1298. 'ThinkSharingData')
  1299. library(data.table)
  1300. data0 = neuro_data_tmp
  1301. for(i in seq(1,length(comb_string_vec),1)){
  1302. data0 = data.table::melt(as.data.table(data0),
  1303. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  1304. measure=list(grep(comb_string_vec[i],colnames(data0))),
  1305. variable.name = paste0(comb_col_names[i],'Cat'),
  1306. value.name = comb_col_names[i],value.factor=TRUE)
  1307. # make some nicer labels
  1308. data0 = as.data.frame(data0)
  1309. level_strings = levels(data0[,ncol(data0)-1])
  1310. # iterate over the level strings and update them
  1311. for(s in seq(1,length(level_strings),1)){
  1312. level_string = level_strings[s]
  1313. temp = str_locate(level_string, '\\.\\.\\.')
  1314. level_string = substr(level_string,temp[2],nchar(level_string))
  1315. level_string = gsub('\\.|\\.\\.',' ',level_string)
  1316. level_string = gsub('e g','e.g.',level_string)
  1317. level_strings[s] = level_string
  1318. }
  1319. # reset the labels
  1320. levels(data0[,ncol(data0)-1]) = level_strings
  1321. }
  1322. data = data0
  1323. #
  1324. # I_can_handle_RD_community_standards
  1325. # I_have_proficiency_in_RDM
  1326. # Iam_highly_knowledgeable_in_RDM
  1327. # I_know_RDM_available_Methods
  1328. # make a Task Standard Tools plot
  1329. # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  1330. # remove Comment columes
  1331. temp = data %>%
  1332. #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
  1333. select(Response.ID,
  1334. Shared_Publicly,
  1335. CurrentPosition,
  1336. I_can_handle_RD_community_standards,
  1337. I_have_proficiency_in_RDM,
  1338. Iam_highly_knowledgeable_in_RDM,
  1339. I_know_RDM_available_Methods,
  1340. I_have_RDM_personal) %>%
  1341. unique()
  1342. # mutate(SharingProblems = replace(SharingProblems, str_detect(SharingProblems, " agree"), "Agree")) %>%
  1343. # mutate(SharingProblems = replace(SharingProblems, str_detect(SharingProblems, " disagree"), "Disagree")) %>%
  1344. #filter(!grepl('Comment|Other',CurrentPositionCat)) %>%
  1345. #filter( CurrentPositionCat != "No") %>%
  1346. #filter(TaskStandardTools != 'No') %>%
  1347. #filter(TaskStandardTools != 'No') %>%
  1348. #na.omit() %>%
  1349. #unique() #%>%
  1350. #droplevels()
  1351. df = temp
  1352. df <- data.frame(lapply(df, function(x) { gsub("Fully agree", "Agree", x)}))
  1353. df <- data.frame(lapply(df, function(x) { gsub("Rather agree", "Agree", x)}))
  1354. df <- data.frame(lapply(df, function(x) { gsub("Rather disagree", "Disagree", x)}))
  1355. df <- data.frame(lapply(df, function(x) { gsub("Fully disagree", "Disagree", x)}))
  1356. # calc relative frequency to make more own plots
  1357. for(s in seq(3,length(df),1)){
  1358. print(colnames(df)[s])
  1359. df_tmp = df[,c(1,2,s)]
  1360. temp_relFreq = df_tmp %>%
  1361. group_by_at(vars(-Response.ID, Shared_Publicly)) %>%
  1362. summarise(n = n()) %>%
  1363. filter(n >= 3) %>%
  1364. na.omit() %>%
  1365. mutate(percent = round(n/sum(n)*50,0))
  1366. print(temp_relFreq)
  1367. cat("Agree and Shared = ", temp_relFreq$n[temp_relFreq$Shared_Publicly=="Yes" & temp_relFreq[,2]=="Agree"] / temp_relFreq$n[temp_relFreq$Shared_Publicly=="Yes" & temp_relFreq[,2]=="Disagree"] )
  1368. }
  1369. # diese resultierenden Tables habe ich mir dann per hand angeschaut
  1370. # und das Verhaeltnis zwischen Yes Agree und Yes Disagree ausgerechnet
  1371. # und ins paper geschrieben
  1372. ######################################################################
  1373. ######################################################################
  1374. ######################################################################
  1375. ######################################################################
  1376. ######################################################################
  1377. ######################################################################
  1378. ######################################################################
  1379. ######################################################################
  1380. ######################################################################
  1381. # recreate different datasets
  1382. # more elegant
  1383. data0 = neuro_data %>% dplyr::select(Response.ID,
  1384. starts_with('Have.you.shared.data.with'),
  1385. starts_with('For.which.of.these.tasks.'),
  1386. starts_with('To.what.degree.do.you.'),
  1387. starts_with('Think.of.re.using.data.'),
  1388. starts_with('Think.of.data.sharing.')
  1389. )
  1390. comb_string_vec = c('For.which.of.these.tasks.',
  1391. 'To.what.degree.do.you.',
  1392. 'Think.of.re.using.data.',
  1393. 'Think.of.data.sharing.',
  1394. 'Have.you.shared.data.with')
  1395. comb_col_names = c('TaskStandardTools',
  1396. 'TaskStandardToolsDegree',
  1397. 'ThinkReusingData',
  1398. 'ThinkSharingData',
  1399. 'DataSharing')
  1400. library(data.table)
  1401. for(i in seq(1,length(comb_string_vec),1)){
  1402. data0 = data.table::melt(as.data.table(data0),
  1403. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  1404. measure=list(grep(comb_string_vec[i],colnames(data0))),
  1405. variable.name = paste0(comb_col_names[i],'Cat'),
  1406. value.name = comb_col_names[i],value.factor=TRUE)
  1407. # make some nicer labels
  1408. data0 = as.data.frame(data0)
  1409. level_strings = levels(data0[,ncol(data0)-1])
  1410. # iterate over the level strings and update them
  1411. for(s in seq(1,length(level_strings),1)){
  1412. level_string = level_strings[s]
  1413. temp = str_locate(level_string, '\\.\\.\\.')
  1414. level_string = substr(level_string,temp[2],nchar(level_string))
  1415. level_string = gsub('\\.|\\.\\.',' ',level_string)
  1416. level_string = gsub('e g','e.g.',level_string)
  1417. level_strings[s] = level_string
  1418. }
  1419. # reset the labels
  1420. levels(data0[,ncol(data0)-1]) = level_strings
  1421. }
  1422. data = data0
  1423. # make a Task Standard Tools plot
  1424. # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  1425. # remove Comment columes
  1426. temp = data %>%
  1427. select(Response.ID,TaskStandardToolsCat,TaskStandardTools,DataSharingCat,DataSharing) %>%
  1428. filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  1429. #filter(TaskStandardTools != 'No') %>%
  1430. na.omit() %>%
  1431. unique() %>%
  1432. droplevels()
  1433. # calc relative frequency to make more own plots
  1434. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1435. ###
  1436. # DAta Sharing
  1437. #temp = data %>% select(Response.ID,DataSharingCat,DataSharing) %>% filter(DataSharing != 'No') %>%
  1438. # na.omit() %>% unique() %>% group_by(DataSharing) %>% filter(n() >= 3)
  1439. # calc abs numbers to make more own plots
  1440. temp_absNumbers = temp %>% group_by_at(vars(TaskStandardToolsCat, TaskStandardTools)) %>% summarise(n = n()) %>% mutate(percent = n /sum(n))
  1441. temp_absNumbers = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
  1442. temp_relFreq = temp %>% group_by_at(vars(-Response.ID, -DataSharing)) %>% summarise(n = n()) %>% mutate(percent = n / 144)
  1443. pFD = ggplot(data=temp_absNumbers) +
  1444. geom_histogram(mapping=aes(x=DataSharingCat,y=n),
  1445. colour = 'darkblue', fill='darkblue',
  1446. stat = 'identity',
  1447. width = 0.5) +
  1448. coord_flip() +
  1449. xlab('') + ylab('') + ggtitle(paste0('Datasharing (n = ',sum(temp_absNumbers$n),')')) +
  1450. geom_text(aes(x=DataSharingCat,y = n, label = n), colour = "white",hjust=1.5) +
  1451. scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1452. pFD
  1453. tiff('Fig2_DataSharing.tiff', width = 17.5, height = 7, units = "cm", res = 600)
  1454. pFD
  1455. dev.off()
  1456. pTST = ggplot(data=temp_relFreq) +
  1457. geom_histogram(mapping=aes(x=TaskStandardToolsCat,y=share,color=DataSharing,fill=DataSharing),
  1458. stat = 'identity',
  1459. width = 0.5) +
  1460. xlab('') + ylab('percent (%)') +
  1461. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1462. theme(legend.position = "left", legend.box = "vertical") +
  1463. facet_grid(TaskStandardTools~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1464. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1465. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1466. png('For.which.of.these.tasks.do.you.use.available.tools.or.standards.png', width = 30, height = 20, units = "cm", res = 300)
  1467. pTST
  1468. dev.off()
  1469. # make a Task Standard Tools Degree plot
  1470. temp = data %>% select(Response.ID,TaskStandardToolsDegreeCat,TaskStandardToolsDegree,DataSharingCat,DataSharing) %>% filter(!grepl('Other',TaskStandardToolsDegreeCat)) %>%
  1471. na.omit() %>% unique()
  1472. # combine some levels
  1473. temp$TaskStandardToolsDegree = as.character(temp$TaskStandardToolsDegree)
  1474. temp$TaskStandardToolsDegree[temp$TaskStandardToolsDegree == 'As much as possible' | temp$TaskStandardToolsDegree == 'Mostly'] = 'Offten'
  1475. temp$TaskStandardToolsDegree[temp$TaskStandardToolsDegree == 'Occasionally' | temp$TaskStandardToolsDegree == 'This is not relevant for my scientific work'] = 'Rare'
  1476. temp$TaskStandardToolsDegree = as.factor(temp$TaskStandardToolsDegree)
  1477. # calc relative frequency to make more own plots
  1478. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1479. pTSD = ggplot(data=temp_relFreq) +
  1480. geom_histogram(mapping=aes(x=TaskStandardToolsDegreeCat,y=share,color=DataSharing,fill=DataSharing),
  1481. stat = 'identity',
  1482. width = 0.5) +
  1483. xlab('') + ylab('percent (%)') +
  1484. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1485. theme(legend.position = "left", legend.box = "vertical") +
  1486. facet_grid(TaskStandardToolsDegree~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1487. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1488. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1489. png('To.what.degree.do.you.use.available.tools.or.standards.png', width = 30, height = 20, units = "cm", res = 300)
  1490. pTSD
  1491. dev.off()
  1492. # make a Think of Reusing Data plot
  1493. temp = data %>% select(Response.ID,ThinkReusingDataCat,ThinkReusingData,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
  1494. # calc relative frequency to make more own plots
  1495. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1496. pTRD = ggplot(data=temp_relFreq) +
  1497. geom_histogram(mapping=aes(x=ThinkReusingDataCat,y=share,color=DataSharing,fill=DataSharing),
  1498. stat = 'identity',
  1499. width = 0.5) +
  1500. xlab('') + ylab('percent (%)') +
  1501. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1502. theme(legend.position = "left", legend.box = "vertical") +
  1503. facet_grid(ThinkReusingData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1504. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1505. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1506. png('Think.of.re.using.data.from.repositories.png', width = 30, height = 20, units = "cm", res = 300)
  1507. pTRD
  1508. dev.off()
  1509. # make a Think of Sharing Data plot
  1510. temp = data %>% select(Response.ID,ThinkSharingDataCat,ThinkSharingData,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
  1511. # calc relative frequency to make more own plots
  1512. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1513. pTSD = ggplot(data=temp_relFreq) +
  1514. geom_histogram(mapping=aes(x=ThinkSharingDataCat,y=share,color=DataSharing,fill=DataSharing),
  1515. stat = 'identity',
  1516. width = 0.5) +
  1517. xlab('') + ylab('percent (%)') +
  1518. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1519. theme(legend.position = "left", legend.box = "vertical") +
  1520. facet_grid(ThinkSharingData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1521. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1522. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1523. png('Think.of.sharing.with.researchers.who.are.NOT.direct.collaborators.png', width = 30, height = 20, units = "cm", res = 300)
  1524. pTSD
  1525. dev.off()
  1526. ####################################################################
  1527. # make a Fimiliar Data Types plot
  1528. # Fimiliar Data Types are Yes/No questions ==> just need the ones who answered with Yes
  1529. temp = data %>% select(Response.ID,FimilarDataTypesCat,FimilarDataTypes) %>% filter(FimilarDataTypes != 'No') %>%
  1530. na.omit() %>% unique() %>% group_by(FimilarDataTypes) %>% filter(n() >= 3)
  1531. # calc abs numbers to make more own plots
  1532. temp_absNumbers = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
  1533. pFD = ggplot(data=temp_absNumbers) +
  1534. geom_histogram(mapping=aes(x=FimilarDataTypesCat,y=n),
  1535. colour = 'darkblue', fill='darkblue',
  1536. stat = 'identity',
  1537. width = 0.5) +
  1538. coord_flip() +
  1539. xlab('') + ylab('') + ggtitle(paste0('Fimilar Datatypes \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
  1540. geom_text(aes(x=FimilarDataTypesCat,y = n, label = n), colour = "white",hjust=1.5) +
  1541. scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1542. #png('Please.state.if.your.work.includes.png', width = 30, height = 20, units = "cm", res = 300)
  1543. pFD
  1544. #dev.off()
  1545. ###
  1546. # DAta availability
  1547. temp = data %>% select(Response.ID,FimilarDataTypesCat,FimilarDataTypes) %>% filter(FimilarDataTypes != 'No') %>%
  1548. na.omit() %>% unique() %>% group_by(FimilarDataTypes) %>% filter(n() >= 3)
  1549. # calc abs numbers to make more own plots
  1550. temp_absNumbers = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
  1551. pFD = ggplot(data=temp_absNumbers) +
  1552. geom_histogram(mapping=aes(x=FimilarDataTypesCat,y=n),
  1553. colour = 'darkblue', fill='darkblue',
  1554. stat = 'identity',
  1555. width = 0.5) +
  1556. coord_flip() +
  1557. xlab('') + ylab('') + ggtitle(paste0('Fimilar Datatypes \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
  1558. geom_text(aes(x=FimilarDataTypesCat,y = n, label = n), colour = "white",hjust=1.5) +
  1559. scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1560. data0 = neuro_data %>% dplyr::select(Response.ID,
  1561. starts_with('Have.you.shared.data.with'),
  1562. starts_with('For.which.of.these.tasks.'),
  1563. starts_with('To.what.degree.do.you.'),
  1564. starts_with('Think.of.re.using.data.'),
  1565. starts_with('Think.of.data.sharing.')
  1566. )
  1567. comb_string_vec = c('For.which.of.these.tasks.',
  1568. 'To.what.degree.do.you.',
  1569. 'Think.of.re.using.data.',
  1570. 'Think.of.data.sharing.',
  1571. 'Have.you.shared.data.with')
  1572. comb_col_names = c('TaskStandardTools',
  1573. 'TaskStandardToolsDegree',
  1574. 'ThinkReusingData',
  1575. 'ThinkSharingData',
  1576. 'DataSharing')
  1577. library(data.table)
  1578. for(i in seq(1,length(comb_string_vec),1)){
  1579. data0 = data.table::melt(as.data.table(data0),
  1580. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  1581. measure=list(grep(comb_string_vec[i],colnames(data0))),
  1582. variable.name = paste0(comb_col_names[i],'Cat'),
  1583. value.name = comb_col_names[i],value.factor=TRUE)
  1584. # make some nicer labels
  1585. data0 = as.data.frame(data0)
  1586. level_strings = levels(data0[,ncol(data0)-1])
  1587. # iterate over the level strings and update them
  1588. for(s in seq(1,length(level_strings),1)){
  1589. level_string = level_strings[s]
  1590. temp = str_locate(level_string, '\\.\\.\\.')
  1591. level_string = substr(level_string,temp[2],nchar(level_string))
  1592. level_string = gsub('\\.|\\.\\.',' ',level_string)
  1593. level_string = gsub('e g','e.g.',level_string)
  1594. level_strings[s] = level_string
  1595. }
  1596. # reset the labels
  1597. levels(data0[,ncol(data0)-1]) = level_strings
  1598. }
  1599. data = data0
  1600. # STandard Tools
  1601. # make a Current Position plot
  1602. # TaskStandardToolsCat,TaskStandardTools
  1603. # make a Task Standard Tools plot
  1604. # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  1605. # remove Comment columes
  1606. #temp = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools,DataSharingCat,DataSharing) %>% filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  1607. # #filter(TaskStandardTools != 'No') %>%
  1608. # na.omit() %>% unique() %>% droplevels()
  1609. temp = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>% filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  1610. filter(TaskStandardTools != 'No') %>%
  1611. na.omit() %>% unique() %>%
  1612. group_by(TaskStandardTools) %>%
  1613. filter(n() >= 3)
  1614. # temp <- factor(temp$TaskStandardToolsCat, levels = temp$TaskStandardToolsCat)
  1615. # = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>% filter(TaskStandardTools != 'No') %>%
  1616. # na.omit() %>% unique() %>% group_by(TaskStandardTools) %>% filter(n() >= 3)
  1617. # calc abs numbers to make more own plots
  1618. temp_absNumbers = temp %>%
  1619. group_by_at(vars(-Response.ID)) %>%
  1620. summarise(n = n()) %>%
  1621. arrange(desc(n))
  1622. pST = ggplot(data=temp_absNumbers) +
  1623. geom_histogram(mapping=aes(x=TaskStandardToolsCat,y=n),
  1624. colour = 'darkblue', fill='darkblue',
  1625. stat = 'identity',
  1626. width = 0.5) +
  1627. coord_flip() +
  1628. xlab('') + ylab('') + ggtitle(paste0('Use of Standard Tools \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
  1629. geom_text(aes(x=TaskStandardToolsCat,y = n, label = n), colour = "white",hjust=1.5) +
  1630. scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1631. pST
  1632. tiff('UseOfStandardTools.tiff', width = 30, height = 20, units = "cm", res = 300)
  1633. pST
  1634. dev.off()
  1635. ######################################################################
  1636. ######################################################################
  1637. temp_absNumbers = neuro_data_tmp %>%
  1638. na.omit %>%
  1639. filter(!Existing_Data == 'I have no datasets') %>%
  1640. group_by_at(vars(Other_can_answer)) %>%
  1641. summarise(n = n()) %>%
  1642. mutate(percent = n / sum(n))
  1643. cat("Von den Antwortenden die mindestens einen Datensatz haben ... ")
  1644. cat( temp_absNumbers$n[3], '(', round(temp_absNumbers$percent[3],3)*100, '%) , of all respondents that have at least one dataset are of the opinion that other researchers could answer their own research questions by re-using data from their research.')
  1645. temp_absNumbers = neuro_data_tmp %>%
  1646. na.omit %>%
  1647. #filter(!Existing_Data == 'I have no datasets') %>%
  1648. filter(Other_can_answer=='Yes') %>%
  1649. group_by_at(vars(Shared_Publicly)) %>%
  1650. summarise(n = n()) %>%
  1651. mutate(percent = n / sum(n))
  1652. S1 = 'However, even for this subgroup, of scientists in possession of data of which they think are valuable to others '
  1653. S2 = '% have never shared any of their data publicly.'
  1654. cat(S1, round(temp_absNumbers$percent[1],3)*100, S2)
  1655. #############################################################
  1656. # von denen die DAten haben wieviele denken das diese DAten auch fuer andere Sinnvoll sind?
  1657. #q1 = 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..'
  1658. #data3 = data[data$Existing_Data!=NA,]
  1659. #Factors promoting public data sharing
  1660. # To identify factors that promote public data sharing answers of participants
  1661. # were filtered on whether they have already shared their data in
  1662. # public repositories or not. We excluded all subject which did not have any data.
  1663. #1. delete subjects which did not have any data
  1664. #vec <- c("I have no datasets")
  1665. #data1 <- data0[data0$Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse. %in% vec,]
  1666. data1 <- neuro_data %>% filter(! Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse. == "I have no datasets")
  1667. x<-colnames(neuro_data)
  1668. typeof(x)
  1669. length(x)
  1670. data0 = neuro_data %>% dplyr::select(Response.ID,
  1671. starts_with('Have.you.shared.data.with'),
  1672. starts_with('For.which.of.these.tasks.'),
  1673. starts_with('To.what.degree.do.you.'),
  1674. starts_with('Think.of.re.using.data.'),
  1675. starts_with('Think.of.data.sharing.')
  1676. )
  1677. comb_string_vec = c('For.which.of.these.tasks.',
  1678. 'To.what.degree.do.you.',
  1679. 'Think.of.re.using.data.',
  1680. 'Think.of.data.sharing.',
  1681. 'Have.you.shared.data.with')
  1682. comb_col_names = c('TaskStandardTools',
  1683. 'TaskStandardToolsDegree',
  1684. 'ThinkReusingData',
  1685. 'ThinkSharingData',
  1686. 'DataSharing')
  1687. data0 = data1 %>% dplyr::select(Response.ID,
  1688. starts_with('Have.you.shared.data.with.....Publicly.'),
  1689. starts_with('My.current..primary..position.is'),
  1690. starts_with('Think.of.re.using.data.'),
  1691. starts_with('How.much.time.do.you.currently.need')
  1692. )
  1693. #data0 <- data0 %>% filter()
  1694. comb_string_vec = c('Think.of.re.using.data.')
  1695. comb_col_names = c('ThinkREusingData')
  1696. library(data.table)
  1697. for(i in seq(1,length(comb_string_vec),1)){
  1698. data0 = data.table::melt(as.data.table(data0),
  1699. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  1700. measure=list(grep(comb_string_vec[i],colnames(data0))),
  1701. variable.name = paste0(comb_col_names[i],'Cat'),
  1702. value.name = comb_col_names[i],value.factor=TRUE)
  1703. # make some nicer labels
  1704. data0 = as.data.frame(data0)
  1705. level_strings = levels(data0[,ncol(data0)-1])
  1706. # iterate over the level strings and update them
  1707. for(s in seq(1,length(level_strings),1)){
  1708. level_string = level_strings[s]
  1709. temp = str_locate(level_string, '\\.\\.\\.')
  1710. level_string = substr(level_string,temp[2],nchar(level_string))
  1711. level_string = gsub('\\.|\\.\\.',' ',level_string)
  1712. level_string = gsub('e g','e.g.',level_string)
  1713. level_strings[s] = level_string
  1714. }
  1715. # reset the labels
  1716. levels(data0[,ncol(data0)-1]) = level_strings
  1717. }
  1718. data = data0
  1719. datax <-melt(data, id = c())
  1720. ####################################
  1721. # Time needed for ready dataset
  1722. # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  1723. # remove Comment columes
  1724. temp = data %>% select(Response.ID,TimeNeededCat,TimeNeeded,DataSharingCat,DataSharing) %>% filter(!grepl('Comment|Other',TimeNeededCat)) %>%
  1725. #filter(TimeNeededCat != 'No') %>%
  1726. na.omit() %>% unique() %>% droplevels()
  1727. #temp = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>% filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  1728. # filter(TaskStandardTools != 'No') %>%
  1729. # na.omit() %>% unique() %>% group_by(TaskStandardTools) %>% filter(n() >= 3)
  1730. # = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>% filter(TaskStandardTools != 'No') %>%
  1731. # na.omit() %>% unique() %>% group_by(TaskStandardTools) %>% filter(n() >= 3)
  1732. View(temp)
  1733. # calc abs numbers to make more own plots
  1734. temp_absNumbers = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
  1735. pST = ggplot(data=temp_absNumbers) +
  1736. geom_histogram(mapping=aes(x=TaskStandardToolsCat,y=n),
  1737. colour = 'darkblue', fill='darkblue',
  1738. stat = 'identity',
  1739. width = 0.5) +
  1740. coord_flip() +
  1741. xlab('') + ylab('') + ggtitle(paste0('Use of Standard Tools \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
  1742. geom_text(aes(x=TaskStandardToolsCat,y = n, label = n), colour = "white",hjust=1.5) +
  1743. scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1744. pST
  1745. tiff('UseOfStandardTools.tiff', width = 17.5, height = 7, units = "cm", res = 600)
  1746. pST
  1747. dev.off()
  1748. ################old##########################
  1749. #############################################
  1750. # more elegant
  1751. data0 = neuro_data %>% dplyr::select(Response.ID,
  1752. starts_with('Have.you.shared.data.with'),
  1753. starts_with('Do.you.have.existing.data'),
  1754. starts_with('I.work.at'),
  1755. starts_with('My.current..'),
  1756. starts_with('Which.neuroscience.discipline.s.'),
  1757. starts_with('Please.state.if.your.')
  1758. )
  1759. comb_string_vec = c('I.work.at',
  1760. 'My.current..',
  1761. 'Which.neuroscience.discipline.s.',
  1762. 'Please.state.if.your.',
  1763. 'Have.you.shared.data.with',
  1764. 'Do.you.have.existing.data')
  1765. comb_col_names = c('WorkPlaces',
  1766. 'CurrentPosition',
  1767. 'NeuroDiscipline',
  1768. 'FimilarDataTypes',
  1769. 'DataSharing',
  1770. 'ExistingData')
  1771. # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
  1772. library(data.table)
  1773. for(i in seq(1,length(comb_string_vec),1)){
  1774. data0 = data.table::melt(as.data.table(data0),
  1775. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  1776. measure=list(grep(comb_string_vec[i],colnames(data0))),
  1777. variable.name = paste0(comb_col_names[i],'Cat'),
  1778. value.name = comb_col_names[i],value.factor=TRUE)
  1779. # make some nicer labels
  1780. data0 = as.data.frame(data0)
  1781. level_strings = levels(data0[,ncol(data0)-1])
  1782. # iterate over the level strings and update them
  1783. for(s in seq(1,length(level_strings),1)){
  1784. level_string = level_strings[s]
  1785. temp = str_locate(level_string, '\\.\\.\\.')
  1786. level_string = substr(level_string,temp[2],nchar(level_string))
  1787. level_string = gsub('\\.|\\.\\.',' ',level_string)
  1788. level_string = gsub('e g','e.g.',level_string)
  1789. level_strings[s] = level_string
  1790. }
  1791. # reset the labels
  1792. levels(data0[,ncol(data0)-1]) = level_strings
  1793. }
  1794. data = data0
  1795. # make a WorkPlaces plot filter out the 'Other' answers
  1796. temp = data %>% select(Response.ID,WorkPlacesCat,WorkPlaces) %>% na.omit() %>% unique() %>% group_by(WorkPlaces) %>% filter(n() >= 3)
  1797. pWP = ggplot(data=temp) +
  1798. geom_bar(mapping=aes(x=WorkPlaces,y=..count..),position=position_dodge()) +
  1799. xlab('') + ylab('count') +
  1800. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1), plot.margin=unit(c(0,0,0,0), 'cm')) +
  1801. #facet_grid(.~DataSharingCat,scales = 'fixed',margins = FALSE) +
  1802. scale_fill_brewer(palette = 'Accent') +
  1803. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1804. png('I.work.at.png', width = 30, height = 20, units = "cm", res = 300)
  1805. pWP
  1806. dev.off()
  1807. # make a Current Position plot
  1808. temp = data %>% select(Response.ID,CurrentPosition,DataSharingCat,DataSharing) %>% na.omit() %>% unique() %>% group_by(CurrentPosition) %>% filter(n() >= 3)
  1809. # calc relative frequency to make more own plots
  1810. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1811. pCP = ggplot(data=temp_relFreq) +
  1812. geom_histogram(mapping=aes(x=CurrentPosition,y=share,color=DataSharing,fill=DataSharing),
  1813. stat = 'identity',
  1814. width = 0.5) +
  1815. xlab('') + ylab('percent (%)') +
  1816. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1817. theme(legend.position = "left", legend.box = "vertical") +
  1818. facet_grid(.~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1819. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1820. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1821. png('My.current.position.png', width = 30, height = 20, units = "cm", res = 300)
  1822. pCP
  1823. dev.off()
  1824. # make a Neuro Discipline plot
  1825. # Neuro Discipline questions are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  1826. temp = data %>% select(Response.ID,NeuroDisciplineCat,NeuroDiscipline,DataSharingCat,DataSharing) %>% filter(NeuroDiscipline != 'No') %>%
  1827. na.omit() %>% unique() %>% group_by(NeuroDiscipline) %>% filter(n() >= 5)
  1828. # calc relative frequency to make more own plots
  1829. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1830. pND = ggplot(data=temp_relFreq) +
  1831. geom_histogram(mapping=aes(x=NeuroDisciplineCat,y=share,color=DataSharing,fill=DataSharing),
  1832. stat = 'identity',
  1833. width = 0.5) +
  1834. xlab('') + ylab('percent (%)') +
  1835. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1836. theme(legend.position = "left", legend.box = "vertical") +
  1837. facet_grid(NeuroDiscipline~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1838. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1839. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1840. png('Which.neuroscience.discipline.png', width = 30, height = 20, units = "cm", res = 300)
  1841. pND
  1842. dev.off()
  1843. # make a Fimiliar Data Types plot
  1844. # Fimiliar Data Types are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  1845. temp = data %>% select(Response.ID,FimilarDataTypesCat,FimilarDataTypes,DataSharingCat,DataSharing) %>% #filter(FimilarDataTypes != 'No') %>%
  1846. na.omit() %>% unique() %>% group_by(FimilarDataTypes) %>% filter(n() >= 5)
  1847. # calc relative frequency to make more own plots
  1848. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1849. pFD = ggplot(data=temp_relFreq) +
  1850. geom_histogram(mapping=aes(x=FimilarDataTypesCat,y=share,color=DataSharing,fill=DataSharing),
  1851. stat = 'identity',
  1852. width = 0.5) +
  1853. xlab('') + ylab('percent (%)') +
  1854. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1855. theme(legend.position = "left", legend.box = "vertical") +
  1856. facet_grid(FimilarDataTypes~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1857. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1858. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1859. png('Please.state.if.your.work.includes.png', width = 30, height = 20, units = "cm", res = 300)
  1860. pFD
  1861. dev.off()
  1862. # recreate different datasets
  1863. # more elegant
  1864. data0 = neuro_data %>% dplyr::select(Response.ID,
  1865. starts_with('Have.you.shared.data.with'),
  1866. starts_with('For.which.of.these.tasks.'),
  1867. starts_with('To.what.degree.do.you.'),
  1868. starts_with('Think.of.re.using.data.'),
  1869. starts_with('Think.of.data.sharing.')
  1870. )
  1871. comb_string_vec = c('For.which.of.these.tasks.',
  1872. 'To.what.degree.do.you.',
  1873. 'Think.of.re.using.data.',
  1874. 'Think.of.data.sharing.',
  1875. 'Have.you.shared.data.with')
  1876. comb_col_names = c('TaskStandardTools',
  1877. 'TaskStandardToolsDegree',
  1878. 'ThinkReusingData',
  1879. 'ThinkSharingData',
  1880. 'DataSharing')
  1881. library(data.table)
  1882. for(i in seq(1,length(comb_string_vec),1)){
  1883. data0 = data.table::melt(as.data.table(data0),
  1884. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  1885. measure=list(grep(comb_string_vec[i],colnames(data0))),
  1886. variable.name = paste0(comb_col_names[i],'Cat'),
  1887. value.name = comb_col_names[i],value.factor=TRUE)
  1888. # make some nicer labels
  1889. data0 = as.data.frame(data0)
  1890. level_strings = levels(data0[,ncol(data0)-1])
  1891. # iterate over the level strings and update them
  1892. for(s in seq(1,length(level_strings),1)){
  1893. level_string = level_strings[s]
  1894. temp = str_locate(level_string, '\\.\\.\\.')
  1895. level_string = substr(level_string,temp[2],nchar(level_string))
  1896. level_string = gsub('\\.|\\.\\.',' ',level_string)
  1897. level_string = gsub('e g','e.g.',level_string)
  1898. level_strings[s] = level_string
  1899. }
  1900. # reset the labels
  1901. levels(data0[,ncol(data0)-1]) = level_strings
  1902. }
  1903. data = data0
  1904. # make a Task Standard Tools plot
  1905. # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
  1906. # remove Comment columes
  1907. temp = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools,DataSharingCat,DataSharing) %>% filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
  1908. #filter(TaskStandardTools != 'No') %>%
  1909. na.omit() %>% unique() %>% droplevels()
  1910. # calc relative frequency to make more own plots
  1911. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1912. pTST = ggplot(data=temp_relFreq) +
  1913. geom_histogram(mapping=aes(x=TaskStandardToolsCat,y=share,color=DataSharing,fill=DataSharing),
  1914. stat = 'identity',
  1915. width = 0.5) +
  1916. xlab('') + ylab('percent (%)') +
  1917. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1918. theme(legend.position = "left", legend.box = "vertical") +
  1919. facet_grid(TaskStandardTools~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1920. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1921. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1922. png('For.which.of.these.tasks.do.you.use.available.tools.or.standards.png', width = 30, height = 20, units = "cm", res = 300)
  1923. pTST
  1924. dev.off()
  1925. # make a Task Standard Tools Degree plot
  1926. temp = data %>% select(Response.ID,TaskStandardToolsDegreeCat,TaskStandardToolsDegree,DataSharingCat,DataSharing) %>% filter(!grepl('Other',TaskStandardToolsDegreeCat)) %>%
  1927. na.omit() %>% unique()
  1928. # combine some levels
  1929. temp$TaskStandardToolsDegree = as.character(temp$TaskStandardToolsDegree)
  1930. temp$TaskStandardToolsDegree[temp$TaskStandardToolsDegree == 'As much as possible' | temp$TaskStandardToolsDegree == 'Mostly'] = 'Offten'
  1931. temp$TaskStandardToolsDegree[temp$TaskStandardToolsDegree == 'Occasionally' | temp$TaskStandardToolsDegree == 'This is not relevant for my scientific work'] = 'Rare'
  1932. temp$TaskStandardToolsDegree = as.factor(temp$TaskStandardToolsDegree)
  1933. # calc relative frequency to make more own plots
  1934. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1935. pTSD = ggplot(data=temp_relFreq) +
  1936. geom_histogram(mapping=aes(x=TaskStandardToolsDegreeCat,y=share,color=DataSharing,fill=DataSharing),
  1937. stat = 'identity',
  1938. width = 0.5) +
  1939. xlab('') + ylab('percent (%)') +
  1940. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1941. theme(legend.position = "left", legend.box = "vertical") +
  1942. facet_grid(TaskStandardToolsDegree~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1943. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1944. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1945. png('To.what.degree.do.you.use.available.tools.or.standards.png', width = 30, height = 20, units = "cm", res = 300)
  1946. pTSD
  1947. dev.off()
  1948. # make a Think of Reusing Data plot
  1949. temp = data %>% select(Response.ID,ThinkReusingDataCat,ThinkReusingData,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
  1950. # calc relative frequency to make more own plots
  1951. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1952. pTRD = ggplot(data=temp_relFreq) +
  1953. geom_histogram(mapping=aes(x=ThinkReusingDataCat,y=share,color=DataSharing,fill=DataSharing),
  1954. stat = 'identity',
  1955. width = 0.5) +
  1956. xlab('') + ylab('percent (%)') +
  1957. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1958. theme(legend.position = "left", legend.box = "vertical") +
  1959. facet_grid(ThinkReusingData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1960. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1961. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1962. png('Think.of.re.using.data.from.repositories.png', width = 30, height = 20, units = "cm", res = 300)
  1963. pTRD
  1964. dev.off()
  1965. # make a Think of Sharing Data plot
  1966. temp = data %>% select(Response.ID,ThinkSharingDataCat,ThinkSharingData,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
  1967. # calc relative frequency to make more own plots
  1968. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  1969. pTSD = ggplot(data=temp_relFreq) +
  1970. geom_histogram(mapping=aes(x=ThinkSharingDataCat,y=share,color=DataSharing,fill=DataSharing),
  1971. stat = 'identity',
  1972. width = 0.5) +
  1973. xlab('') + ylab('percent (%)') +
  1974. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  1975. theme(legend.position = "left", legend.box = "vertical") +
  1976. facet_grid(ThinkSharingData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  1977. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  1978. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  1979. png('Think.of.sharing.with.researchers.who.are.NOT.direct.collaborators.png', width = 30, height = 20, units = "cm", res = 300)
  1980. pTSD
  1981. dev.off()
  1982. ### where are the problems ###
  1983. # recreate different datasets
  1984. # more elegant
  1985. data0 = neuro_data %>% dplyr::select(Response.ID,
  1986. starts_with('Have.you.shared.data.with'),
  1987. starts_with('Please.indicate.'),
  1988. starts_with('How.do.you.process.and.analyze.your.data.'),
  1989. )
  1990. comb_string_vec = c('Please.indicate.',
  1991. 'How.do.you.process.and.analyze.your.data.',
  1992. 'Have.you.shared.data.with')
  1993. comb_col_names = c('SharingProblems',
  1994. 'HowAnalyzeData',
  1995. 'DataSharing')
  1996. library(data.table)
  1997. for(i in seq(1,length(comb_string_vec),1)){
  1998. data0 = data.table::melt(as.data.table(data0),
  1999. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  2000. measure=list(grep(comb_string_vec[i],colnames(data0))),
  2001. variable.name = paste0(comb_col_names[i],'Cat'),
  2002. value.name = comb_col_names[i],value.factor=TRUE)
  2003. # make some nicer labels
  2004. data0 = as.data.frame(data0)
  2005. level_strings = levels(data0[,ncol(data0)-1])
  2006. # iterate over the level strings and update them
  2007. for(s in seq(1,length(level_strings),1)){
  2008. level_string = level_strings[s]
  2009. temp = str_locate(level_string, '\\.\\.\\.')
  2010. level_string = substr(level_string,temp[2],nchar(level_string))
  2011. level_string = gsub('\\.|\\.\\.',' ',level_string)
  2012. level_string = gsub('e g','e.g.',level_string)
  2013. level_strings[s] = level_string
  2014. }
  2015. # reset the labels
  2016. levels(data0[,ncol(data0)-1]) = level_strings
  2017. }
  2018. data = data0
  2019. # make a Sharing Data Problems plot
  2020. temp = data %>% select(Response.ID,SharingProblemsCat,SharingProblems,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
  2021. # calc relative frequency to make more own plots
  2022. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  2023. pSP = ggplot(data=temp_relFreq) +
  2024. geom_histogram(mapping=aes(x=SharingProblemsCat,y=share,color=DataSharing,fill=DataSharing),
  2025. stat = 'identity',
  2026. width = 0.5) +
  2027. xlab('') + ylab('percent (%)') +
  2028. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  2029. theme(legend.position = "left", legend.box = "vertical") +
  2030. facet_grid(SharingProblems~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  2031. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  2032. scale_x_discrete(labels=function(x){gsub('(.{1,70})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  2033. png('Sharing.problems.please.indicate.png', width = 40, height = 30, units = "cm", res = 300)
  2034. pSP
  2035. dev.off()
  2036. # make a Sharing Data Problems plot
  2037. temp = data %>% select(Response.ID,HowAnalyzeDataCat,HowAnalyzeData,DataSharingCat,DataSharing) %>% #filter(HowAnalyzeData != 'No') %>%
  2038. na.omit() %>% unique() %>% group_by(HowAnalyzeData) %>% filter(n() >= 5)
  2039. # calc relative frequency to make more own plots
  2040. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  2041. pHAD = ggplot(data=temp_relFreq) +
  2042. geom_histogram(mapping=aes(x=HowAnalyzeDataCat,y=share,color=DataSharing,fill=DataSharing),
  2043. stat = 'identity',
  2044. width = 0.5) +
  2045. xlab('') + ylab('percent (%)') +
  2046. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  2047. theme(legend.position = "left", legend.box = "vertical") +
  2048. facet_grid(HowAnalyzeData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
  2049. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  2050. scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  2051. png('How.do.you.process.and.analyze.your.data.png', width = 30, height = 20, units = "cm", res = 300)
  2052. pHAD
  2053. dev.off()
  2054. # recreate different datasets
  2055. # more elegant
  2056. data0 = neuro_data %>% dplyr::select(Response.ID,
  2057. starts_with('Have.you.shared.data.with'),
  2058. starts_with('What.is.your.opinion'),
  2059. starts_with('Applying.research.data.management.'),
  2060. starts_with('Please.rank.the.top.'),
  2061. )
  2062. comb_string_vec = c('What.is.your.opinion',
  2063. 'Please.rank.the.top.',
  2064. 'Applying.research.data.management.',
  2065. 'Have.you.shared.data.with')
  2066. comb_col_names = c('StatementsOpinion',
  2067. 'TopSharingProblems',
  2068. 'ApplyDataManagement',
  2069. 'DataSharing')
  2070. library(data.table)
  2071. for(i in seq(1,length(comb_string_vec),1)){
  2072. data0 = data.table::melt(as.data.table(data0),
  2073. id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
  2074. measure=list(grep(comb_string_vec[i],colnames(data0))),
  2075. variable.name = paste0(comb_col_names[i],'Cat'),
  2076. value.name = comb_col_names[i],value.factor=TRUE)
  2077. # make some nicer labels
  2078. data0 = as.data.frame(data0)
  2079. level_strings = levels(data0[,ncol(data0)-1])
  2080. # iterate over the level strings and update them
  2081. for(s in seq(1,length(level_strings),1)){
  2082. level_string = level_strings[s]
  2083. temp = str_locate(level_string, '\\.\\.\\.')
  2084. level_string = substr(level_string,temp[2],nchar(level_string))
  2085. level_string = gsub('\\.|\\.\\.',' ',level_string)
  2086. level_string = gsub('e g','e.g.',level_string)
  2087. level_strings[s] = level_string
  2088. }
  2089. # reset the labels
  2090. levels(data0[,ncol(data0)-1]) = level_strings
  2091. }
  2092. data = data0
  2093. # make a Top Sharing Data Problems plot
  2094. temp = data %>% select(Response.ID,TopSharingProblemsCat,TopSharingProblems) %>% na.omit() %>% unique()
  2095. pTSP = ggplot(data=temp) +
  2096. geom_bar(mapping=aes(x=TopSharingProblems,y=..prop..,group=1),
  2097. width = 0.5) +
  2098. xlab('') + ylab('percent (%)') +
  2099. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  2100. theme(legend.position = "left", legend.box = "vertical") +
  2101. facet_grid(.~TopSharingProblemsCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=25)) +
  2102. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  2103. scale_x_discrete(labels=function(x){gsub('(.{1,70})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  2104. png('Please.rank.the.top.most.pressing.issues.png', width = 40, height = 20, units = "cm", res = 300)
  2105. pTSP
  2106. dev.off()
  2107. # make a Apply Data Management plot
  2108. temp = data %>% select(Response.ID,ApplyDataManagementCat,ApplyDataManagement,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
  2109. # calc relative frequency to make more own plots
  2110. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  2111. pARM = ggplot(data=temp_relFreq) +
  2112. geom_histogram(mapping=aes(x=ApplyDataManagementCat,y=share,color=DataSharing,fill=DataSharing),
  2113. stat = 'identity',
  2114. width = 0.5) +
  2115. xlab('') + ylab('percent (%)') +
  2116. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  2117. theme(legend.position = "left", legend.box = "vertical") +
  2118. facet_grid(ApplyDataManagement~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=25)) +
  2119. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  2120. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  2121. png('Applying.research.data.management.png', width = 30, height = 20, units = "cm", res = 300)
  2122. pARM
  2123. dev.off()
  2124. # make a What is your opinion plot
  2125. temp = data %>% select(Response.ID,StatementsOpinionCat,StatementsOpinion,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
  2126. # calc relative frequency to make more own plots
  2127. temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
  2128. pOS = ggplot(data=temp_relFreq) +
  2129. geom_histogram(mapping=aes(x=StatementsOpinionCat,y=share,color=DataSharing,fill=DataSharing),
  2130. stat = 'identity',
  2131. width = 0.5) +
  2132. xlab('') + ylab('percent (%)') +
  2133. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
  2134. theme(legend.position = "left", legend.box = "vertical") +
  2135. facet_grid(StatementsOpinion~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=25)) +
  2136. scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
  2137. scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  2138. png('What.is.your.opinion.on.the.following.statements.png', width = 30, height = 20, units = "cm", res = 300)
  2139. pOS
  2140. dev.off()
  2141. #### polar plot try ####
  2142. cbp1 <- c("#000000", "#FFFFFF")
  2143. temp = data %>% select(Response.ID,CurrentPosition,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
  2144. pCP = ggplot(data=temp) +
  2145. geom_bar(mapping=aes(x=CurrentPosition,color=DataSharing,fill=DataSharingCat),width = 0.75) +
  2146. xlab('') + ylab('counts') +
  2147. theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1), plot.margin=unit(c(0,0,0,0), 'cm')) +
  2148. #facet_grid(.~DataSharingCat,scales = 'fixed',margins = FALSE) +
  2149. scale_color_manual(values = cbp1) + scale_fill_brewer(palette = "Dark2") + coord_polar(theta = 'y',clip='off') +
  2150. scale_x_discrete(labels=function(x){gsub('(.{1,20})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
  2151. pCP