12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747 |
- # clean workspace
- rm(list=ls())
- # The csv File has to be in the same directory
- setwd(dirname(rstudioapi::getSourceEditorContext()$path))
- # clear the console
- cat("\014")
- # load libraries
- library(ggplot2)
- library(dplyr)
- library(ggpubr)
- library(ragg)
- library(reshape2)
- library(data.table)
- library(stringr)
- library(cowplot)
- library(ggrepel)
- library(forcats)
- library(yarrr)
- # Load the data
- # catch NA strings
- #neuro_data <- read.csv("results-survey197421_nurkomplett.csv",row.names=NULL,na.strings=c("","N/A"),sep=',')
- neuro_data2 <- read.csv("results-survey197421_alledaten.csv",row.names=NULL,na.strings=c("","N/A"),sep=',')
- neuro_data <- neuro_data2[!is.na(neuro_data2$My.current..primary..position.is.),]
- # Es geht um die Frage was die Leute die Daten teilen von den anderen Unterscheidet
- # und insgesamt um die Frage was es fuer Probleme gibt in unserer Dateninfrastruktur
- colnames(neuro_data)[1]<-"Response.ID"
- # remove one outlier ==> empty row
- neuro_data = neuro_data[neuro_data$Response.ID != 78,]
- # remove whitespaces and commas
- colnames(neuro_data) = str_replace_all(colnames(neuro_data)," ",".")
- colnames(neuro_data) = str_replace_all(colnames(neuro_data),",",".")
- #######################################################
- #### Figure 1 #########################################
- ###############
- #### Neuro Disciplines + Current Position #############
- #######################################################
- data0 = neuro_data %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with'),
- starts_with('I.work.at'),
- starts_with('My.current..'),
- starts_with('Which.neuroscience.discipline.s.'),
- starts_with('Please.state.if.your.')
- )
- comb_string_vec = c('I.work.at',
- 'My.current..',
- 'Which.neuroscience.discipline.s.',
- 'Please.state.if.your.',
- 'Have.you.shared.data.with')
- comb_col_names = c('WorkPlaces',
- 'CurrentPosition',
- 'NeuroDiscipline',
- 'FimilarDataTypes',
- 'DataSharing')
- # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- ################################
- # make a Current Position plot
- temp = data %>% select(Response.ID,CurrentPosition) %>% na.omit() %>% unique() %>% group_by(CurrentPosition) %>% filter(n() >= 3)
- # calc relative frequency to make more own plots
- temp_relFreq <- 0
- temp_relFreq = temp %>%
- group_by_at(vars(-Response.ID)) %>%
- summarise(n = n()) %>%
- mutate(percent = round(n / sum(n)*100,0)) %>%
- mutate(ordering = c(1,7,5,6,3,2,4)) %>%
- arrange(ordering) %>%
- mutate(CurrentPosition = str_replace(CurrentPosition,"Research data management focused staff", "RDM staff"))
- #mutate(CurrentPosition = fct_reorder(temp_relFreq$CurrentPosition, temp_relFreq$ordering, min))
- # function to wrap around long labels
- my_label_func = function(x){gsub('(.{1,20})(\\s|$)', '\\1\n', x)}
- # get the label positions and create the inside plot labels
- df_label1 <- temp_relFreq %>%
- arrange(desc(CurrentPosition)) %>%
- mutate(lab_ypos = cumsum(percent) - 0.5*percent,
- lab_label = my_label_func(
- paste0(CurrentPosition,': ',percent,'%'))) %>%
- filter(n > 10)
- df_label2 <- temp_relFreq %>%
- arrange(desc(CurrentPosition)) %>%
- mutate(lab_ypos = cumsum(percent) - 0.5*percent,
- lab_label = my_label_func(paste0(CurrentPosition,': ',percent, '%'))) %>% filter(n <= 10)
- mycol <- yarrr::piratepal("xmen", plot.result = TRUE, trans = .1)
- ## No 'x' mapping; bars of constant width; polar coordinates with theta
- ## applied to the Y axis
- F1B = ggplot(data=temp_relFreq, aes(x=factor(1),fill = CurrentPosition)) +
- geom_bar(width = 1) +
- coord_polar(theta = "y") +
- scale_fill_brewer(palette = "Accent")
- F1B
- df = temp_relFreq
- p <- ggplot(df, aes(x=1, y=percent, fill=CurrentPosition)) +
- #geom_histogram(stats = "identity") +
- geom_bar(stat="identity") +
- ggtitle(paste0('Current Position (n = ', sum(df$n))) +
- coord_polar(theta='y') +
- scale_fill_brewer(palette = "Set3")
-
-
- print(p)
- p <- p + geom_bar(stat="identity", color='black')
- p <- p +
- theme(axis.ticks=element_blank(), # the axis ticks
- axis.title=element_blank(), # the axis labels
- axis.text=element_blank(),
- panel.grid = element_blank())
- print(p)
- p<-p +
- xlab('')+
- ylab('')
- print(p)
- F1B = ggplot(data=temp_relFreq) +
- #geom_histogram(mapping=aes(x=factor(1),y=percent,fill=CurrentPosition),
- #geom_histogram(mapping=aes(x=factor(1),y=percent,fill=CurrentPosition),
- geom_histogram(mapping=aes(x=factor(1),y=percent,fill=CurrentPosition),
- #geom_histogram(mapping=aes(x=reorder(CurrentPosition,ordering),y=percent,fill=CurrentPosition),
- stat = 'identity',
- width = 1) +
- #coord_polar(theta = "y",start=0, clip = "on") +
- coord_polar(theta = "y") +
- #scale_x_continuous(limits = c(0,360)) +
- xlab('') + ylab('') + ggtitle(paste0('Current Position (n = ',sum(temp_relFreq$percent),'%)')) +
- theme(text = element_text(size=7), axis.ticks = element_blank(),axis.text = element_blank(),panel.grid = element_blank(),legend.position = "none") +
- geom_text_repel(data=df_label1,aes(x=factor(1),y = lab_ypos, label = lab_label), colour = "black", box.padding = 0.5,point.size = NA,nudge_x = 0, size = 5.8/.pt) +
- geom_text_repel(data=df_label2,aes(x=factor(1),y = lab_ypos, label = lab_label), colour = "black", box.padding = 0.5,point.size = NA,nudge_x = 1, size = 5.8/.pt) +
- scale_fill_brewer(palette = "Set3") #mycol) #Set3
- F1B
- #F1B <- p
- sprintf("Absolute und relative Häufigkeiten der %s", "Current Position")
- print(temp_relFreq, quote = TRUE, row.names = FALSE)
- #png('CurrentPosition.png', width = 30, height = 20, units = "cm", res = 300)
- ragg::agg_tiff("Fig1B.tiff", width = 8, height = 8, units = "cm", res = 600, scaling = 1)
- F1B
- dev.off()
- # make a Neuro Discipline plot
- # Neuro Discipline questions are Yes/No questions ==> just need the ones who answered with Yes
- temp = data %>%
- select(Response.ID,NeuroDisciplineCat,NeuroDiscipline) %>%
- filter(NeuroDiscipline != 'No') %>%
- na.omit() %>%
- unique() %>%
- group_by(NeuroDiscipline) %>%
- filter(n() >= 3) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"imaging", "Imaging")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Theoretical", "Theoret.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Theoretical", "Theoret.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"behavioral", "Behav.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"neuroscience", "Neurosci.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Neuroscience", "Neurosci.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. electrophysiological recording behavior tracking ", "")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. patient involvement clinical trials ", "")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"science", "Science")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. modeling simulation ", ""))
-
- # calc abs numbers to make more own plots
- temp_absNumbers = temp %>%
- group_by_at(vars(-Response.ID)) %>%
- summarise(n = n()) %>%
- mutate(percent = round(n / sum(temp_relFreq$n)*100)) %>%
- arrange(desc(percent))
- #temp_relFreq2 = temp %>%
- # group_by_at(vars(-Response.ID, -NeuroDiscipline)) %>%
- # summarise(n = n()) %>%
- # mutate(percent = n / 218) %>%
- # arrange(desc(percent))
- #print(temp_relFreq2, quote = TRUE, row.names = FALSE)
- F1A = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=reorder(NeuroDisciplineCat,percent),y=percent),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- #xlab('') + ylab('') + ggtitle(paste0('Neuro Disciplines \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
- xlab('') + ylab('') + ggtitle(paste0('Neuro Disciplines (n = ',sum(temp_absNumbers$n), ')')) +
- #lab('') + ylab('') + ggtitle(paste0('Neuro Disciplines \n(n = ',sum(temp_absNumbers$n),)) +
- geom_text(aes(x=NeuroDisciplineCat,y = percent, label = paste0(" ", percent,'%')), colour = "white",hjust=1.2, size = 8/.pt) +
- scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20)) +
- #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- theme(text = element_text(size=8))
- F1A
- ragg::agg_tiff("Fig1A.tiff", width = 8, height = 8, units = "cm", res = 600, scaling = 1)
- F1A
- #plt
- dev.off()
- # plot both graphs into one figure
- F1<-plot_grid(F1A,F1B,nrow = 1,ncol = 2,align = "h",axis = "lr",scale = 1,rel_widths = c(1,1))
- F1
- ragg::agg_tiff("Fig1.tiff", width = 17.5, height = 8, units = "cm", res = 600, scaling = 1)
- #tiff('Fig1_CurrentntPosition_AND_NeuroDiscipline.tiff', width = 30, height = 20, units = "cm", res = 300)
- #png('CurrentntPositionNeuroDiszipline.png', width = 30, height = 20, units = "cm", res = 300)
- F1
- dev.off()
- ####################################################
- #### Figure 2 ######################################
- ###############
- #### General Data Sharing ##########################
- data0 = neuro_data %>% dplyr::select(Response.ID,
- I.work.at...I.am.affiliated.with.,
- My.current..primary..position.is.,
- starts_with('Have.you.shared.data.with'),
- starts_with('Do.you.have.existing.data.sets.'),
- starts_with('Which.neuroscience.discipline.s.'),
- starts_with('Please.state.if.your.')
- )
- colnames(data0)[which(names(data0) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
- colnames(data0)[which(names(data0) == 'I.work.at...I.am.affiliated.with.')] <- "Work_Place"
- colnames(data0)[which(names(data0) == 'My.current..primary..position.is.')] <- 'CurrentPosition'
- comb_string_vec = c('Which.neuroscience.discipline.s.',
- 'Please.state.if.your.',
- 'Have.you.shared.data.with')
- comb_col_names = c('NeuroDiscipline',
- 'FimilarDataTypes',
- 'DataSharing')
- # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- temp2 = neuro_data %>% select(Response.ID, Have.you.shared.data.with.....External.collaborators.) %>%
- na.omit()
-
- ###
- # DAta Sharing
- #temp = data %>% select(Response.ID,FimilarDataTypesCat,FimilarDataTypes) %>% filter(FimilarDataTypes != 'No') %>%
- # na.omit() %>% unique() %>% group_by(FimilarDataTypes) %>% filter(n() >= 3)
- temp = data %>% select(Response.ID,DataSharingCat,DataSharing) %>%
- filter(DataSharing != 'No') %>%
- na.omit() %>%
- unique() %>%
- group_by(DataSharing) %>%
- filter(n() >= 3)
- #temp = data %>% select(Response.ID,DataSharingCat,DataSharing) %>% filter()
- # na.omit() %>% unique() %>% group_by(DataSharing) %>% filter(n() >= 3)
- # calc abs numbers to make more own plots
- temp_absNumbers = temp %>%
- group_by_at(vars(-Response.ID)) %>%
- summarise(n = n()) %>%
- mutate(percent = round(n/144*100,0))
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID, -DataSharing)) %>% summarise(n = n()) %>% mutate(percent = n / 144)
- p = ggplot(data=temp_absNumbers) +
- #geom_histogram(mapping=aes(x=reorder(TaskStandardToolsCat, percent),y= percent),
- geom_histogram(mapping=aes(x=reorder(DataSharingCat, percent) ,y=percent),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- theme(text = element_text(size=11)) +
- #theme(plot.margin = unit(c(0.5,0.2,0.2,5), "cm")) +
- xlab('') + ylab('') + ggtitle(paste0('Datasharing') ) +
- geom_text(aes(x=DataSharingCat,y = percent, label = paste0(percent,'%')), colour = "white",hjust=1.2) +
- scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20))
- #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) + # nice regular expression solution for multiple lined labels
-
- # die theme text groesse ist unabhaengig von der Hoehe der Grafik sie bezieht sich auf die resolution
- # 600 pro cm
- #In the theme, the size is defined in pts. So here 15, means 15 pts. In geom_text, the size is defined in mm, so it's 15 mm.
- #
- #What is the relation between pts and mm or in ? If we want exactly the same size for the title and the text in the plot, how can we define it ? It needs some conversion :
- #
- # 1 pt = 1/72 in
- #1 pt = 0.35 mm
- #So if we want the text to be the same size as the title, the size in mm will be 15 pt * 0.35 pt/mm = 5.25 mm
- #
- #In ggplot, there is a constant defined to make the conversion, .pt = 2.845276. (1/.pt = 0.35). You can type in .pt in the console and it will display its value :
- #
- # ggplot2::.pt
- ## [1] 2.845276
- #So to make the conversion :
- #
- # from pt to mm : mm = pt / .pt -> 15 / 2.845276 = 5.27
- #from mm to pt : pt = mm * .pt -> 5.27 * 2.845276 = 15
- #Let's change the size of the geom_text to be the same of the title by using size = 15/.pt :
- #
- # plt <- penguins %>%
- # ggplot(aes(bill_length_mm, bill_depth_mm, color = species)) +
- # geom_point()+
- # geom_text(x = 45, y = 20, label = "Example of font problem", size = 15/.pt, inherit.aes = FALSE) +
- # labs(title = "Bill length and depth relation by species") +
- # theme(plot.title = element_text(size = 15))
- # Affinity designer sagt 2 cm margin ...
- p
- #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
- #pFD
- ragg::agg_tiff("Fig2_DataSharing3.tiff", width = 17.5, height = 6, units = "cm", res = 600, scaling = 1)
- p
- #plt
- dev.off()
- #####################################
- ####
- # Reusing data of others
- #####################################
- neuro_data_tmp = 0
- neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
- Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
- Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
- Have.you.shared.data.with.....Publicly.
- )
- #Existing_Data
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
- # Other_can_answer
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
- # Shared_Publicly
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
- # wieviele haben keine Daten
- temp_absNumbers = neuro_data_tmp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
- temp_absNumbers = neuro_data_tmp %>%
- na.omit %>%
- filter(!Existing_Data == 'I have no datasets') %>%
- group_by_at(vars(Existing_Data)) %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat("Von den Antwortenden die mindestens einen Datensatz haben ... wieviel Prozent haben diesen verfuegbar gemacht")
- cat(temp_absNumbers$n[3], '(', round(temp_absNumbers$percent[3],3)*100, '%) haben diese DAten verfuegbar gemacht')
- print(temp_absNumbers, quote = TRUE, row.names = FALSE)
- temp_absNumbers = neuro_data_tmp %>%
- na.omit %>%
- filter(!Existing_Data == 'I have no datasets') %>%
- group_by_at(vars(Other_can_answer)) %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat("Von den Antwortenden die mindestens einen Datensatz haben ... ")
- cat( temp_absNumbers$n[3], '(', round(temp_absNumbers$percent[3],3)*100, '%) , of all respondents that have at least one dataset are of the opinion that other researchers could answer their own research questions by re-using data from their research.')
- temp_absNumbers = neuro_data_tmp %>%
- na.omit %>%
- #filter(!Existing_Data == 'I have no datasets') %>%
- filter(Other_can_answer=='Yes') %>%
- group_by_at(vars(Shared_Publicly)) %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- S1 = 'However, even for this subgroup, of scientists in possession of data of which they think are valuable to others '
- S2 = '% have never shared any of their data publicly.'
- cat(S1, round(temp_absNumbers$percent[1],3)*100, S2)
- ##########################################################
- ##########################################################
- # Research data management skills are essential for preparing, analyzing, and publicly sharing data.
- # Only 18% think that they have proficiency in research data management and only 34% think that they
- # know which research data management methods are available.
- # Interestingly, 58% of all respondents nevertheless think that they can handle their research data
- # according to community standards. This could be due to the availability of data research managers
- # who help in data handling.
- # However, only 25 (20%) of participants have dedicated personnel with research data management
- # or data curation expertise.
- neuro_data_tmp = 0
- neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
- Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
- Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
- Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
- What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
- Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
- What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
- What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
- What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
- Have.you.shared.data.with.....Publicly.
- )
- #Existing_Data
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
- # Other_can_answer
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
- # Shared_Publicly
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
- # I_know_how_to_publish_my_data_reproducible
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
- # I_have_RDM_personal
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
- # I_can_handle_RD_community_standards
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
- # I_have_proficiency_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
- # Iam_highly_knowledgeable_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
- # I_know_RDM_available_Methods
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- S1 = 'Research data management skills are essential for preparing, analyzing, and publicly sharing data. /n Only '
- S2 = 'think that they have proficiency in research data management and only '
- S3 = 'think that they know which research data management methods are available. /n Interestingly, '
- S4 = 'of all respondents nevertheless think that they can handle their research data according to community standards. This could be due to the availability of data research managers who help in data handling. However, only '
- S5 = 'of participants have dedicated personnel with research data management or data curation expertise.'
- cat(S1)
- agree = c('Fully agree', 'Rather agree')
- disagree = c('Fully disagree', 'Rather disagree')
- not_agree = c('Fully agree', 'Rather agree', 'Undecided')
- not_disagree = c('Fully disagree', 'Rather disagree', 'Undecided')
- df_np = neuro_data_tmp %>%
- #select(I_have_proficiency_in_RDM) %>%
- mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " agree"), "Agree")) %>%
- mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " disagree"), "Disagree")) %>%
- group_by_at(vars(I_have_proficiency_in_RDM)) %>%
- na.omit %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat(df_np$n[df_np$I_have_proficiency_in_RDM=='Agree'], '(', round(df_np$percent[df_np$I_have_proficiency_in_RDM=='Agree'],3)*100, '%)')
- cat(S2)
- df_np = neuro_data_tmp %>%
- #select(I_have_proficiency_in_RDM) %>%
- mutate(I_know_RDM_available_Methods = replace(I_know_RDM_available_Methods, str_detect(I_know_RDM_available_Methods, " agree"), "Agree")) %>%
- mutate(I_know_RDM_available_Methods = replace(I_know_RDM_available_Methods, str_detect(I_know_RDM_available_Methods, " disagree"), "Disagree")) %>%
- group_by_at(vars(I_know_RDM_available_Methods)) %>%
- na.omit %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat(df_np$n[df_np$I_know_RDM_available_Methods=='Agree'], '(', round(df_np$percent[df_np$I_know_RDM_available_Methods=='Agree'],3)*100, '%)')
- cat(S3)
- df_np = neuro_data_tmp %>%
- #select(I_have_proficiency_in_RDM) %>%
- mutate(I_can_handle_RD_community_standards = replace(I_can_handle_RD_community_standards, str_detect(I_can_handle_RD_community_standards, " agree"), "Agree")) %>%
- mutate(I_can_handle_RD_community_standards = replace(I_can_handle_RD_community_standards, str_detect(I_can_handle_RD_community_standards, " disagree"), "Disagree")) %>%
- group_by_at(vars(I_can_handle_RD_community_standards)) %>%
- na.omit %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat(df_np$n[df_np$I_can_handle_RD_community_standards=='Agree'], '(', round(df_np$percent[df_np$I_can_handle_RD_community_standards=='Agree'],3)*100, '%)')
- cat(S4)
- df_np = neuro_data_tmp %>%
- #select(I_have_proficiency_in_RDM) %>%
- group_by_at(vars(I_have_RDM_personal)) %>%
- na.omit %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat(df_np$n[df_np$I_have_RDM_personal=='Yes, in my lab'], '(', round(df_np$percent[df_np$I_have_RDM_personal=='Yes, in my lab'],3)*100, '%)')
- cat(S5)
- ###########################################################
- ###########################################################
- # Use of tools and standards
- ###########################################################
- #
- # We inquired about the use of existing tools and standards for different research
- # data management activities, if this process step was relevant for the participants.
- neuro_data_tmp = 0
- neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
- Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
- Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
- Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
- What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
- Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
- What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
- What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
- What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
- Have.you.shared.data.with.....Publicly.,
- starts_with('For.which.of.these.tasks.'),
- starts_with('To.what.degree.do.you.')
- )
- #Existing_Data
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
- # Other_can_answer
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
- # Shared_Publicly
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
- # I_know_how_to_publish_my_data_reproducible
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
- # I_have_RDM_personal
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
- # I_can_handle_RD_community_standards
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
- # I_have_proficiency_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
- # Iam_highly_knowledgeable_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
- # I_know_RDM_available_Methods
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- comb_string_vec = c('For.which.of.these.tasks.',
- 'To.what.degree.do.you.')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree')
- data0 <- neuro_data_tmp
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- S1 = 'We inquired about the use of existing tools and standards for different research data man-agement activities, if this process step was relevant for the participants. '
- cat(S1)
- agree = c('Fully agree', 'Rather agree')
- disagree = c('Fully disagree', 'Rather disagree')
- not_agree = c('Fully agree', 'Rather agree', 'Undecided')
- not_disagree = c('Fully disagree', 'Rather disagree', 'Undecided')
- df_np = neuro_data_tmp %>%
- #select(I_have_proficiency_in_RDM) %>%
- mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " agree"), "Agree")) %>%
- mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " disagree"), "Disagree")) %>%
- group_by_at(vars(I_have_proficiency_in_RDM)) %>%
- na.omit %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat(df_np$n[df_np$I_have_proficiency_in_RDM=='Agree'], '(', round(df_np$percent[df_np$I_have_proficiency_in_RDM=='Agree'],3)*100, '%)')
- cat(S2)
- df_np = neuro_data_tmp %>%
- #select(I_have_proficiency_in_RDM) %>%
- mutate(I_know_RDM_available_Methods = replace(I_know_RDM_available_Methods, str_detect(I_know_RDM_available_Methods, " agree"), "Agree")) %>%
- mutate(I_know_RDM_available_Methods = replace(I_know_RDM_available_Methods, str_detect(I_know_RDM_available_Methods, " disagree"), "Disagree")) %>%
- group_by_at(vars(I_know_RDM_available_Methods)) %>%
- na.omit %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat(df_np$n[df_np$I_know_RDM_available_Methods=='Agree'], '(', round(df_np$percent[df_np$I_know_RDM_available_Methods=='Agree'],3)*100, '%)')
- cat(S3)
- df_np = neuro_data_tmp %>%
- #select(I_have_proficiency_in_RDM) %>%
- mutate(I_can_handle_RD_community_standards = replace(I_can_handle_RD_community_standards, str_detect(I_can_handle_RD_community_standards, " agree"), "Agree")) %>%
- mutate(I_can_handle_RD_community_standards = replace(I_can_handle_RD_community_standards, str_detect(I_can_handle_RD_community_standards, " disagree"), "Disagree")) %>%
- group_by_at(vars(I_can_handle_RD_community_standards)) %>%
- na.omit %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat(df_np$n[df_np$I_can_handle_RD_community_standards=='Agree'], '(', round(df_np$percent[df_np$I_can_handle_RD_community_standards=='Agree'],3)*100, '%)')
- cat(S4)
- df_np = neuro_data_tmp %>%
- #select(I_have_proficiency_in_RDM) %>%
- group_by_at(vars(I_have_RDM_personal)) %>%
- na.omit %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat(df_np$n[df_np$I_have_RDM_personal=='Yes, in my lab'], '(', round(df_np$percent[df_np$I_have_RDM_personal=='Yes, in my lab'],3)*100, '%)')
- cat(S5)
- ######################################################################
- ####################################################
- #### Figure 3 ######################################
- ###############
- #### Tools ##########################
- # make a Task Standard Tools plot
- # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- # remove Comment columes
- temp = data %>%
- select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>%
- filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- #filter(TaskStandardTools != 'No') %>%
- #filter(TaskStandardTools != 'No') %>%
- na.omit() %>%
- unique() %>%
- droplevels()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / 159)
- ###
- # Use of standard Tools
- temp_absNumbers = temp %>%
- group_by_at(vars(TaskStandardToolsCat, TaskStandardTools)) %>%
- summarise(n = n()) %>%
- mutate(percent = round(n /sum(n)*100,0)) %>%
- #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- filter(TaskStandardTools != 'No') %>%
- filter(TaskStandardToolsCat != ' Simulation ') %>%
- arrange(percent)
- pFD = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=reorder(TaskStandardToolsCat, percent),y= percent),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- #theme(axis.text.x = element_text(color = "grey20", size = 10, angle = 90, hjust = .5, vjust = .5, face = "plain"),
- # axis.text.y = element_text(color = "grey20", size = 10, angle = 0, hjust = 1, vjust = 0, face = "plain"),
- # axis.title.x = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = 0, face = "plain"),
- # axis.title.y = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = .5, face = "plain")) +
- theme(text = element_text(size=11)) +
- xlab('') + ylab('') + ggtitle(paste0('Use of Standard Tools for ...')) +
- geom_text(aes(x=TaskStandardToolsCat,y = percent, label = paste0(percent,"%")), colour = "white",hjust=1.5) +
- scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20))
- #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- pFD
- #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
- #pFD
- ragg::agg_tiff("Fig3.tiff", width = 17.5, height = 10, units = "cm", res = 600, scaling = 1)
- pFD
- #plt
- dev.off()
- ######################################################################
- ######################################################################
- ####################################################
- #### Figure 4 ######################################
- ###############
- #### Tools vs. DataSharing #########################
- # recreate different datasets
- # more elegant
- neuro_data_tmp = 0
- neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
- Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
- Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
- Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
- What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
- Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
- What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
- What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
- What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
- Have.you.shared.data.with.....Publicly.,
- starts_with('For.which.of.these.tasks.'),
- starts_with('To.what.degree.do.you.'),
- starts_with('Think.of.re.using.data.'),
- starts_with('Think.of.data.sharing.')
- )
- #Existing_Data
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
- # Other_can_answer
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
- # Shared_Publicly
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
- # I_know_how_to_publish_my_data_reproducible
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
- # I_have_RDM_personal
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
- # I_can_handle_RD_community_standards
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
- # I_have_proficiency_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
- # Iam_highly_knowledgeable_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
- # I_know_RDM_available_Methods
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- comb_string_vec = c('For.which.of.these.tasks.',
- 'To.what.degree.do.you.',
- 'Think.of.re.using.data.',
- 'Think.of.data.sharing.')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree',
- 'ThinkReusingData',
- 'ThinkSharingData')
- library(data.table)
- data0 = neuro_data_tmp
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- # make a Task Standard Tools plot
- # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- # remove Comment columes
- temp = data %>%
- #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
- select(Response.ID,Shared_Publicly, TaskStandardToolsCat, TaskStandardTools) %>%
- filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- filter( TaskStandardToolsCat != "No") %>%
- #filter(TaskStandardTools != 'No') %>%
- #filter(TaskStandardTools != 'No') %>%
- na.omit() %>%
- unique() %>%
- droplevels()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>%
- group_by_at(vars(-Response.ID, Shared_Publicly,-TaskStandardToolsCat)) %>%
- summarise(n = n()) %>%
- mutate(share = n / 159)
- no = temp_relFreq %>%
- filter(Shared_Publicly=="No" ) %>%
- mutate(procent = n/sum(n)*100)
- yes = temp_relFreq %>%
- filter(Shared_Publicly=="Yes" ) %>%
- mutate(procent = n/sum(n)*100)
- cat("Althoug the use of standard tools are in very different areas there is")
- cat("the trend that those how generally use more standard tools are more likely to share their data.")
- cat("In the group that did not share their data publicly only")
- cat(no$procent[2], "% use standard Tools. While in the group who shares data ")
- cat(yes$procent[2], "% use standard Tools. ")
- cat("A possible explanation could be that scientists who work a lot with standard tools find it easier to overcome the heavily standardized rules of public sharing of data. ")
- cat("Formally, of course, it cannot be excluded that the dominant causality is opposite. However, we consider it unlikely that the motivation to share data is the main driver for a general affinity to use standard methods. ")
- ###
- # Use of standard Tools Degree
- temp = data %>%
- #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
- select(Response.ID,Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree) %>%
- filter(!grepl('Comment|Other',TaskStandardToolsDegreeCat)) %>%
- filter( TaskStandardToolsDegreeCat != "No") %>%
- #filter(TaskStandardTools != 'No') %>%
- #filter(TaskStandardTools != 'No') %>%
- na.omit() %>%
- unique() %>%
- droplevels()
- temp_absNumbers = 0
- temp_absNumbers = temp %>%
- group_by_at(vars(Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree)) %>%
- summarise(n = n()) %>%
- mutate(percent = round(n /sum(n)*100,0)) %>%
- #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- #filter(TaskStandardToolsDegree == 'Mostly' | TaskStandardToolsDegree == "As much as possible") %>%
- filter(TaskStandardToolsDegree == 'Mostly') %>%
- # filter(Shared_Publicly == "Yes") %>%
- filter(TaskStandardToolsDegreeCat != ' Simulation ') %>%
- group_by_at(vars(Shared_Publicly))
- # arrange(percent)
- temp_absNumbers_all = temp %>%
- group_by_at(vars(Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree)) %>%
- summarise(n = n()) %>%
- mutate(percent = round(n /sum(n)*100,0)) %>%
- #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- #filter(TaskStandardToolsDegree == 'Mostly' | TaskStandardToolsDegree == "As much as possible") %>%
- #filter(TaskStandardToolsDegree == 'Mostly') %>%
- # filter(Shared_Publicly == "Yes") %>%
- filter(TaskStandardToolsDegreeCat != ' Simulation ') %>%
- group_by_at(vars(Shared_Publicly))
- # arrange(percent)
- yes = sum(temp_absNumbers$n[temp_absNumbers$Shared_Publicly=="Yes"])/ sum(temp_absNumbers_all$n[temp_absNumbers$Shared_Publicly=="Yes"])*100
- no = sum(temp_absNumbers$n[temp_absNumbers$Shared_Publicly=="No"])/ sum(temp_absNumbers_all$n[temp_absNumbers$Shared_Publicly=="No"])*100
- cat(yes, " % answer mostly if they share their data while only ")
- cat(no, " % using mostly standard methods for their work if they did not share their data openly")
- cat("Respondents who share their data publicly have a " , (yes-no)/no*100, "% higher rate to using 'mostly' standard tools in their daily work")
- ####################################################
- #### ######################################
- ###############
- #### Obstacles for Data sharing DataSharing #########################
- data0 = neuro_data %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with'),
- starts_with('Please.indicate.'),
- starts_with('How.do.you.process.and.analyze.your.data.'),
- )
- comb_string_vec = c('Please.indicate.',
- 'How.do.you.process.and.analyze.your.data.',
- 'Have.you.shared.data.with')
- comb_col_names = c('SharingProblems',
- 'HowAnalyzeData',
- 'DataSharing')
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- S1 = 'We inquired about the use of existing tools and standards for different research data man-agement activities, if this process step was relevant for the participants. '
- cat(S1)
- agree = c('Fully agree', 'Rather agree')
- disagree = c('Fully disagree', 'Rather disagree')
- not_agree = c('Fully agree', 'Rather agree', 'Undecided')
- not_disagree = c('Fully disagree', 'Rather disagree', 'Undecided')
- temp = data %>%
- select(Response.ID,SharingProblemsCat,SharingProblems) %>%
- mutate(SharingProblems = replace(SharingProblems, str_detect(SharingProblems, " agree"), "Agree")) %>%
- mutate(SharingProblems = replace(SharingProblems, str_detect(SharingProblems, " disagree"), "Disagree")) %>%
- na.omit %>%
- group_by_at(vars(SharingProblemsCat, SharingProblems)) %>%
- summarise(n = n()) %>%
- mutate(percent = round(n / sum(n)*100 ,0))
- ownership = 0
- ownership <- temp %>%
- filter(SharingProblemsCat == " I do not want to use a public repository because my data ownership intellectual property might be violated ") #%>%
- institution <- temp %>%
- filter(SharingProblemsCat == " My institutional policy allows to upload data to a public repository ") #%>%
- legal <- temp %>%
- filter(SharingProblemsCat == ' Legal aspects licensing national laws are significant hurdles for public repository usage ')
- rights <- temp %>%
- filter(SharingProblemsCat == ' For my research project s I am unsure if I own the rights to upload the data to a public repository ')
- sufficient_guidance<- temp%>%
- filter(SharingProblemsCat == ' There is sufficient guidance towards choosing an appropriate repository for my data ')
- time <- temp %>%
- filter(SharingProblemsCat == ' There is a lack of time to deposit data in a repository ')
- expertise <- temp %>%
- filter(SharingProblemsCat == ' There is a lack of expertise and human resources to deposit data in a repository ')
- technic <- temp %>%
- filter(SharingProblemsCat == ' Technical hurdles are too high to upload to a repository large data transfer lack of requested metadata ')
- #their_way <- temp %>%
- # filter(SharingProblemsCat == ' For my research project s I am unsure if I own the rights to upload the data to a public repository ')
- #filter(SharingProblems == "Agree") #%>%
- #$select(percent)
- # temp$n[temp$SharingProblemsCat==" I do not want to use a public repository because my data ownership intellectual property might be violated " && SharingProblems == "Agree"]
- #First of all, we did not find major general opposition to public data sharing.
- cat( ownership$percent[ownership$SharingProblems=="Agree"] ,'% are reluctant to share data publicly because the data ownership or intellectual property might be violated (vs. ', ownership$percent[ownership$SharingProblems=="Disagree"] , ').')
- cat('Interestingly, ', institution$percent[institution$SharingProblems=="Undecided"], '% participants did not know whether their institutional policy allow to up-load data to a public repository')
- cat(' while further ', institution$percent[institution$SharingProblems=="Disargree"],'% are sure that they did not' )
- cat(institution$percent[institution$SharingProblems=="Disargree"])
- cat('Further ', 100-rights$percent[rights$SharingProblems=="Disagree"], ' are not sure whether they own the rights to upload data from their own research project')
- cat(legal$percent[legal$SharingProblems=="Agree"], ' %) see legal aspects as significant hurdles for public repository usage.')
- cat('These answers indicate major uncertainties with regard to legal issues.')
- cat('Only ', sufficient_guidance$percent[sufficient_guidance$SharingProblems=="Agree"], '% think that there is sufficient guidance towards choosing an appropriate repository for my data')
- cat(time$percent[time$SharingProblems=="Agree"], '% think that there is a lack of time to deposit data in a repository.')
- cat('while only ',time$percent[time$SharingProblems=="Disagree"], '% disagree on this point')
- cat(expertise$percent[expertise$SharingProblems=="Agree"], "% think that there is a lack of expertise and human resources to deposit data in a repository")
- cat(technic$percent[technic$SharingProblems=="Agree"], "% think that the technical hurdles are too high to uplad tdat ato a repository")
- cat('83% of respondents did not think that their research data must be handled in their very own, individual way. The lack of professional data management was reported as problem. 70 (54%) participants think that they would share more of their data if they had better data man-agement while only 32% think that a better data management would not increase the amount of own data to share. Due to the lack of professional data management, the preparation of an dataset for public use is a time-consuming process. 70% of those respondents how have previously prepared data for publication and re-use say that the time that they need to ready a dataset requires more than a day while 39% need even more than a week. Accordingly, 76 (60%) think that there is a lack of time to deposit data in a repository while only 31 (24%) did not think that time is a problem for the deposition of data in a public repository.')
- ######################################################################
- ######################################################################
- # recreate different datasets
- # more elegant
- neuro_data_tmp = 0
- neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
- Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
- Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
- Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
- What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
- Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
- What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
- What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
- What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
- Have.you.shared.data.with.....Publicly.,
- My.current..primary..position.is.,
- starts_with('For.which.of.these.tasks.'),
- starts_with('To.what.degree.do.you.'),
- starts_with('Think.of.re.using.data.'),
- starts_with('Which.neuroscience.discipline.s.'),
- starts_with('Think.of.data.sharing.')
-
- )
- #CurrentPosition
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'My.current..primary..position.is.')] <- "CurrentPosition"
- #Existing_Data
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
- # Other_can_answer
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
- # Shared_Publicly
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
- # I_know_how_to_publish_my_data_reproducible
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
- # I_have_RDM_personal
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
- # I_can_handle_RD_community_standards
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
- # I_have_proficiency_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
- # Iam_highly_knowledgeable_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
- # I_know_RDM_available_Methods
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- comb_string_vec = c('For.which.of.these.tasks.',
- 'To.what.degree.do.you.',
- 'Think.of.re.using.data.',
- 'Which.neuroscience.discipline.s.',
- 'Think.of.data.sharing.')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree',
- 'ThinkReusingData',
- 'NeuroDiscipline',
- 'ThinkSharingData')
- library(data.table)
- data0 = neuro_data_tmp
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- # make a Task Standard Tools plot
- # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- # remove Comment columes
- temp = data %>%
- #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
- select(Response.ID,Shared_Publicly, CurrentPosition) %>%
- #filter(!grepl('Comment|Other',CurrentPositionCat)) %>%
- #filter( CurrentPositionCat != "No") %>%
- #filter(TaskStandardTools != 'No') %>%
- #filter(TaskStandardTools != 'No') %>%
- #na.omit() %>%
- unique() #%>%
- #droplevels()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>%
- group_by_at(vars(-Response.ID, Shared_Publicly)) %>%
- summarise(n = n()) %>%
- #mutate(share = n / 159) %>%
- filter(n >= 3) %>%
- # calc abs numbers to make more own plots
- na.omit()
-
- library(reshape2)
- # here Plot mit 2 Farben
- #############################
- # hier weiter machen
- # alle gleiche Current Position auf 100 und dann den Anteil die Daten Teilen als %
- #data2 <- reshape(temp_relFreq, idvar = "CurrentPosition", timevar = "Shared_Publicly", direction = "wide")
- temp_absNumbers <- dcast(temp_relFreq, CurrentPosition ~ Shared_Publicly) %>%
- mutate(percent = round(Yes/ (No + Yes)*100,0)) %>%
- na.omit() %>%
- arrange(desc(percent)) %>%
- mutate(CurrentPosition = str_replace(CurrentPosition,"Research data management focused staff", "RDM staff"))
- pFD = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=reorder(CurrentPosition, percent),y= percent),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- #theme(axis.text.x = element_text(color = "grey20", size = 10, angle = 90, hjust = .5, vjust = .5, face = "plain"),
- # axis.text.y = element_text(color = "grey20", size = 10, angle = 0, hjust = 1, vjust = 0, face = "plain"),
- # axis.title.x = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = 0, face = "plain"),
- # axis.title.y = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = .5, face = "plain")) +
- theme(text = element_text(size=11)) +
- xlab('') + ylab('') + ggtitle(paste0('Datasharing for different scientific positions')) +
- geom_text(aes(x=CurrentPosition,y = percent, label = paste0(percent,"%")), colour = "white",hjust=1.5) +
- scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20))
- #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- pFD
- #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
- #pFD
- ragg::agg_tiff("Fig5_Position_vs_Sharing.tiff", width = 17.5, height = 6, units = "cm", res = 600, scaling = 1)
- pFD
- #plt
- dev.off()
- temp = data %>%
- #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
- select(Response.ID,Shared_Publicly, NeuroDisciplineCat, NeuroDiscipline) %>%
- filter(NeuroDiscipline == "Yes") %>%
- #filter(!grepl('Comment|Other',CurrentPositionCat)) %>%
- #filter( CurrentPositionCat != "No") %>%
- #filter(TaskStandardTools != 'No') %>%
- #filter(TaskStandardTools != 'No') %>%
- select(-NeuroDiscipline) %>%
- na.omit() %>%
- unique() #%>%
- #droplevels()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>%
- group_by_at(vars(-Response.ID, Shared_Publicly)) %>%
- summarise(n = n()) %>%
- #mutate(share = n / 159) %>%
- filter(n >= 3) %>%
- # calc abs numbers to make more own plots
- na.omit()
- library(reshape2)
- # here Plot mit 2 Farben
- #############################
- # hier weiter machen
- # alle gleiche Current Position auf 100 und dann den Anteil die Daten Teilen als %
- #data2 <- reshape(temp_relFreq, idvar = "CurrentPosition", timevar = "Shared_Publicly", direction = "wide")
- temp_absNumbers <- dcast(temp_relFreq, NeuroDisciplineCat ~ Shared_Publicly) %>%
- mutate(percent = round(Yes/ (No + Yes)*100,0)) %>%
- na.omit() %>%
- arrange(desc(percent)) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"imaging", "Imaging")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Theoretical", "Theoret.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Theoretical", "Theoret.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"behavioral", "Behav.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"neuroscience", "Neurosci.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"Neuroscience", "Neurosci.")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. electrophysiological recording behavior tracking ", "")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. patient involvement clinical trials ", "")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"science", "Science")) %>%
- mutate(NeuroDisciplineCat = str_replace(NeuroDisciplineCat,"e.g. modeling simulation ", ""))
- pFD = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=reorder(NeuroDisciplineCat, percent),y= percent),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- #theme(axis.text.x = element_text(color = "grey20", size = 10, angle = 90, hjust = .5, vjust = .5, face = "plain"),
- # axis.text.y = element_text(color = "grey20", size = 10, angle = 0, hjust = 1, vjust = 0, face = "plain"),
- # axis.title.x = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = 0, face = "plain"),
- # axis.title.y = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = .5, face = "plain")) +
- theme(text = element_text(size=11)) +
- xlab('') + ylab('') + ggtitle(paste0('Datasharing for different neuroscientific subdisciplines')) +
- geom_text(aes(x=NeuroDisciplineCat,y = percent, label = paste0(percent,"%")), colour = "white",hjust=1.5) +
- scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 40))
- #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- pFD
- #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
- #pFD
- ragg::agg_tiff("Fig6_Discipline_vs_Sharing.tiff", width = 17.5, height = 10, units = "cm", res = 600, scaling = 1)
- pFD
- #plt
- dev.off()
- no = temp_relFreq %>%
- filter(Shared_Publicly=="No" ) %>%
- mutate(procent = n/sum(n)*100)
- yes = temp_relFreq %>%
- filter(Shared_Publicly=="Yes" ) %>%
- mutate(procent = n/sum(n)*100)
- cat("Althoug the use of standard tools are in very different areas there is")
- cat("the trend that those how generally use more standard tools are more likely to share their data.")
- cat("In the group that did not share their data publicly only")
- cat(no$procent[2], "% use standard Tools. While in the group who shares data ")
- cat(yes$procent[2], "% use standard Tools. ")
- cat("A possible explanation could be that scientists who work a lot with standard tools find it easier to overcome the heavily standardized rules of public sharing of data. ")
- cat("Formally, of course, it cannot be excluded that the dominant causality is opposite. However, we consider it unlikely that the motivation to share data is the main driver for a general affinity to use standard methods. ")
- ###
- # Use of standard Tools Degree
- temp = data %>%
- #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
- select(Response.ID,Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree) %>%
- filter(!grepl('Comment|Other',TaskStandardToolsDegreeCat)) %>%
- filter( TaskStandardToolsDegreeCat != "No") %>%
- #filter(TaskStandardTools != 'No') %>%
- #filter(TaskStandardTools != 'No') %>%
- na.omit() %>%
- unique() %>%
- droplevels()
- temp_absNumbers = 0
- temp_absNumbers = temp %>%
- group_by_at(vars(Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree)) %>%
- summarise(n = n()) %>%
- mutate(percent = round(n /sum(n)*100,0)) %>%
- #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- #filter(TaskStandardToolsDegree == 'Mostly' | TaskStandardToolsDegree == "As much as possible") %>%
- filter(TaskStandardToolsDegree == 'Mostly') %>%
- # filter(Shared_Publicly == "Yes") %>%
- filter(TaskStandardToolsDegreeCat != ' Simulation ') %>%
- group_by_at(vars(Shared_Publicly))
- # arrange(percent)
- temp_absNumbers_all = temp %>%
- group_by_at(vars(Shared_Publicly, TaskStandardToolsDegreeCat, TaskStandardToolsDegree)) %>%
- summarise(n = n()) %>%
- mutate(percent = round(n /sum(n)*100,0)) %>%
- #filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- #filter(TaskStandardToolsDegree == 'Mostly' | TaskStandardToolsDegree == "As much as possible") %>%
- #filter(TaskStandardToolsDegree == 'Mostly') %>%
- # filter(Shared_Publicly == "Yes") %>%
- filter(TaskStandardToolsDegreeCat != ' Simulation ') %>%
- group_by_at(vars(Shared_Publicly))
- # arrange(percent)
- yes = sum(temp_absNumbers$n[temp_absNumbers$Shared_Publicly=="Yes"])/ sum(temp_absNumbers_all$n[temp_absNumbers$Shared_Publicly=="Yes"])*100
- no = sum(temp_absNumbers$n[temp_absNumbers$Shared_Publicly=="No"])/ sum(temp_absNumbers_all$n[temp_absNumbers$Shared_Publicly=="No"])*100
- cat(yes, " % answer mostly if they share their data while only ")
- cat(no, " % using mostly standard methods for their work if they did not share their data openly")
- cat("Respondents who share their data publicly have a " , (yes-no)/no*100, "% higher rate to using 'mostly' standard tools in their daily work")
- ######################################################################
- ######################################################################
- # Zusammenhangsanalyse mit dem Data Sharing ... was hat einen Einfluss auf data Sharing
- # recreate different datasets
- # more elegant
- neuro_data_tmp = 0
- neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
- Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
- Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
- Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
- What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
- Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
- How.much.time.do.you.currently.need.to.ready.a.dataset.from.your.lab.for.publication.and.re.use.,
- Have.you.shared.data.with.....Publicly.,
- starts_with('For.which.of.these.tasks.'),
- starts_with('To.what.degree.do.you.'),
- starts_with('Think.of.re.using.data.'),
- starts_with('Think.of.data.sharing.')
- )
- #Existing_Data
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
- # Other_can_answer
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
- # Shared_Publicly
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
- # I_know_how_to_publish_my_data_reproducible
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
- # I_have_RDM_personal
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
- # I_can_handle_RD_community_standards
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
- # I_have_proficiency_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
- # Iam_highly_knowledgeable_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
- # I_know_RDM_available_Methods
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
- # how_much_time
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'How.much.time.do.you.currently.need.to.ready.a.dataset.from.your.lab.for.publication.and.re.use.')] <- 'how_much_time'
- comb_string_vec = c('For.which.of.these.tasks.',
- 'To.what.degree.do.you.',
- 'Think.of.re.using.data.',
- 'Think.of.data.sharing.')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree',
- 'ThinkReusingData',
- 'ThinkSharingData')
- library(data.table)
- data0 = neuro_data_tmp
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- agree = c('Fully agree', 'Rather agree')
- disagree = c('Fully disagree', 'Rather disagree')
- not_agree = c('Fully agree', 'Rather agree', 'Undecided')
- not_disagree = c('Fully disagree', 'Rather disagree', 'Undecided')
- temp_absNumbers = data %>%
- select(Response.ID, how_much_time) %>%
- #select(I_have_proficiency_in_RDM) %>%
- # mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " agree"), "Agree")) %>%
- # mutate(I_have_proficiency_in_RDM = replace(I_have_proficiency_in_RDM, str_detect(I_have_proficiency_in_RDM, " disagree"), "Disagree")) %>%
- group_by_at(vars(how_much_time)) %>%
- na.omit %>%
- unique() %>%
- # droplevels() %>%
- summarise(n = n()) %>%
- mutate(percent = round(n / sum(n)*100,0)) %>%
- arrange(percent)
- pFD = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=reorder(how_much_time, percent),y= percent),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- #theme(axis.text.x = element_text(color = "grey20", size = 10, angle = 90, hjust = .5, vjust = .5, face = "plain"),
- # axis.text.y = element_text(color = "grey20", size = 10, angle = 0, hjust = 1, vjust = 0, face = "plain"),
- # axis.title.x = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = 0, face = "plain"),
- # axis.title.y = element_text(color = "grey20", size = 12, angle = 0, hjust = .5, vjust = .5, face = "plain")) +
- theme(text = element_text(size=11)) +
- xlab('') + ylab('') + ggtitle(paste0('Time needed to ready a dataset for publication and reuse')) +
- geom_text(aes(x=how_much_time,y = percent, label = paste0(percent,"%")), colour = "white",hjust=1.5) +
- scale_x_discrete(labels = function(x) str_wrap(str_replace_all(x, " " , " "), width = 20))
- #scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- pFD
- #tiff('Fig3_Use_Standard_Tools.tiff', width = 17.5, height = 8, units = "cm", res = 600)
- #pFD
- ragg::agg_tiff("Fig4_Time.tiff", width = 17.5, height = 7, units = "cm", res = 600, scaling = 1)
- pFD
- #plt
- dev.off()
- ######################################################################
- ######################################################################
- ######################################################################
- ######################################################################
- ######################################################################
- ######################################################################
- # What is the factor that most strongly seperates shareers from non-sharers
- # Try something
- # recreate different datasets
- # more elegant
- neuro_data_tmp = 0
- neuro_data_tmp = neuro_data %>% dplyr::select(Response.ID,
- Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..,
- Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.,
- Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.,
- What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.,
- Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.,
- What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.,
- What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.,
- What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.,
- Have.you.shared.data.with.....Publicly.,
- My.current..primary..position.is.,
- starts_with('For.which.of.these.tasks.'),
- starts_with('To.what.degree.do.you.'),
- starts_with('Think.of.re.using.data.'),
- starts_with('Which.neuroscience.discipline.s.'),
- starts_with('Applying.research.data.management..'),
- starts_with('Think.of.data.sharing.')
-
- )
- #CurrentPosition
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'My.current..primary..position.is.')] <- "CurrentPosition"
- #Existing_Data
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse.')] <- "Existing_Data"
- # Other_can_answer
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..')] <- "Other_can_answer"
- # Shared_Publicly
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Have.you.shared.data.with.....Publicly.')] <- "Shared_Publicly"
- # I_know_how_to_publish_my_data_reproducible
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Please.indicate...I.know.how.to.publish.my.data.analysis.workflows.in.a.reproducible.manner.')]<- 'I_know_how_to_publish_my_data_reproducible'
- # I_have_RDM_personal
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'I_have_RDM_personal'
- # I_can_handle_RD_community_standards
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.can.handle.my.research.data.according.to.community.standards.')] <- 'I_can_handle_RD_community_standards'
- # I_have_proficiency_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.have.proficiency.in.research.data.management.')] <- 'I_have_proficiency_in_RDM'
- # Iam_highly_knowledgeable_in_RDM
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...Overall..I.am.highly.knowledgeable.about.research.data.management.in.my.research.field.')] <- 'Iam_highly_knowledgeable_in_RDM'
- # I_know_RDM_available_Methods
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'What.is.your.opinion.on.the.following.statements...I.know.well.which.research.data.management.methods.are.available.')] <- 'I_know_RDM_available_Methods'
- # RDM_personal
- colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == 'Do.you.have.dedicated.personnel.with.research.data.management.or.data.curation.expertise.')] <- 'RDM_personal'
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- #colnames(neuro_data_tmp)[which(names(neuro_data_tmp) == '')] <- ''
- comb_string_vec = c('For.which.of.these.tasks.',
- 'To.what.degree.do.you.',
- 'Think.of.re.using.data.',
- 'Which.neuroscience.discipline.s.',
- 'Think.of.data.sharing.')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree',
- 'ThinkReusingData',
- 'NeuroDiscipline',
- 'ThinkSharingData')
- library(data.table)
- data0 = neuro_data_tmp
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- #
- # I_can_handle_RD_community_standards
- # I_have_proficiency_in_RDM
- # Iam_highly_knowledgeable_in_RDM
- # I_know_RDM_available_Methods
- # make a Task Standard Tools plot
- # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- # remove Comment columes
- temp = data %>%
- #select(Response.ID,TaskStandardToolsCat,TaskStandardTools, Shared_Publicly) %>%
- select(Response.ID,
- Shared_Publicly,
- CurrentPosition,
- I_can_handle_RD_community_standards,
- I_have_proficiency_in_RDM,
- Iam_highly_knowledgeable_in_RDM,
- I_know_RDM_available_Methods,
- I_have_RDM_personal) %>%
- unique()
-
-
- # mutate(SharingProblems = replace(SharingProblems, str_detect(SharingProblems, " agree"), "Agree")) %>%
- # mutate(SharingProblems = replace(SharingProblems, str_detect(SharingProblems, " disagree"), "Disagree")) %>%
-
- #filter(!grepl('Comment|Other',CurrentPositionCat)) %>%
- #filter( CurrentPositionCat != "No") %>%
- #filter(TaskStandardTools != 'No') %>%
- #filter(TaskStandardTools != 'No') %>%
- #na.omit() %>%
- #unique() #%>%
- #droplevels()
- df = temp
- df <- data.frame(lapply(df, function(x) { gsub("Fully agree", "Agree", x)}))
- df <- data.frame(lapply(df, function(x) { gsub("Rather agree", "Agree", x)}))
- df <- data.frame(lapply(df, function(x) { gsub("Rather disagree", "Disagree", x)}))
- df <- data.frame(lapply(df, function(x) { gsub("Fully disagree", "Disagree", x)}))
- # calc relative frequency to make more own plots
- for(s in seq(3,length(df),1)){
- print(colnames(df)[s])
- df_tmp = df[,c(1,2,s)]
- temp_relFreq = df_tmp %>%
- group_by_at(vars(-Response.ID, Shared_Publicly)) %>%
- summarise(n = n()) %>%
- filter(n >= 3) %>%
- na.omit() %>%
- mutate(percent = round(n/sum(n)*50,0))
-
- print(temp_relFreq)
- cat("Agree and Shared = ", temp_relFreq$n[temp_relFreq$Shared_Publicly=="Yes" & temp_relFreq[,2]=="Agree"] / temp_relFreq$n[temp_relFreq$Shared_Publicly=="Yes" & temp_relFreq[,2]=="Disagree"] )
-
-
- }
- # diese resultierenden Tables habe ich mir dann per hand angeschaut
- # und das Verhaeltnis zwischen Yes Agree und Yes Disagree ausgerechnet
- # und ins paper geschrieben
- ######################################################################
- ######################################################################
- ######################################################################
- ######################################################################
- ######################################################################
- ######################################################################
- ######################################################################
- ######################################################################
- ######################################################################
- # recreate different datasets
- # more elegant
- data0 = neuro_data %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with'),
- starts_with('For.which.of.these.tasks.'),
- starts_with('To.what.degree.do.you.'),
- starts_with('Think.of.re.using.data.'),
- starts_with('Think.of.data.sharing.')
- )
- comb_string_vec = c('For.which.of.these.tasks.',
- 'To.what.degree.do.you.',
- 'Think.of.re.using.data.',
- 'Think.of.data.sharing.',
- 'Have.you.shared.data.with')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree',
- 'ThinkReusingData',
- 'ThinkSharingData',
- 'DataSharing')
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- # make a Task Standard Tools plot
- # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- # remove Comment columes
- temp = data %>%
- select(Response.ID,TaskStandardToolsCat,TaskStandardTools,DataSharingCat,DataSharing) %>%
- filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- #filter(TaskStandardTools != 'No') %>%
- na.omit() %>%
- unique() %>%
- droplevels()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- ###
- # DAta Sharing
- #temp = data %>% select(Response.ID,DataSharingCat,DataSharing) %>% filter(DataSharing != 'No') %>%
- # na.omit() %>% unique() %>% group_by(DataSharing) %>% filter(n() >= 3)
- # calc abs numbers to make more own plots
- temp_absNumbers = temp %>% group_by_at(vars(TaskStandardToolsCat, TaskStandardTools)) %>% summarise(n = n()) %>% mutate(percent = n /sum(n))
- temp_absNumbers = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID, -DataSharing)) %>% summarise(n = n()) %>% mutate(percent = n / 144)
- pFD = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=DataSharingCat,y=n),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- xlab('') + ylab('') + ggtitle(paste0('Datasharing (n = ',sum(temp_absNumbers$n),')')) +
- geom_text(aes(x=DataSharingCat,y = n, label = n), colour = "white",hjust=1.5) +
- scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- pFD
- tiff('Fig2_DataSharing.tiff', width = 17.5, height = 7, units = "cm", res = 600)
- pFD
- dev.off()
- pTST = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=TaskStandardToolsCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(TaskStandardTools~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('For.which.of.these.tasks.do.you.use.available.tools.or.standards.png', width = 30, height = 20, units = "cm", res = 300)
- pTST
- dev.off()
- # make a Task Standard Tools Degree plot
- temp = data %>% select(Response.ID,TaskStandardToolsDegreeCat,TaskStandardToolsDegree,DataSharingCat,DataSharing) %>% filter(!grepl('Other',TaskStandardToolsDegreeCat)) %>%
- na.omit() %>% unique()
- # combine some levels
- temp$TaskStandardToolsDegree = as.character(temp$TaskStandardToolsDegree)
- temp$TaskStandardToolsDegree[temp$TaskStandardToolsDegree == 'As much as possible' | temp$TaskStandardToolsDegree == 'Mostly'] = 'Offten'
- temp$TaskStandardToolsDegree[temp$TaskStandardToolsDegree == 'Occasionally' | temp$TaskStandardToolsDegree == 'This is not relevant for my scientific work'] = 'Rare'
- temp$TaskStandardToolsDegree = as.factor(temp$TaskStandardToolsDegree)
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pTSD = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=TaskStandardToolsDegreeCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(TaskStandardToolsDegree~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('To.what.degree.do.you.use.available.tools.or.standards.png', width = 30, height = 20, units = "cm", res = 300)
- pTSD
- dev.off()
- # make a Think of Reusing Data plot
- temp = data %>% select(Response.ID,ThinkReusingDataCat,ThinkReusingData,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pTRD = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=ThinkReusingDataCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(ThinkReusingData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('Think.of.re.using.data.from.repositories.png', width = 30, height = 20, units = "cm", res = 300)
- pTRD
- dev.off()
- # make a Think of Sharing Data plot
- temp = data %>% select(Response.ID,ThinkSharingDataCat,ThinkSharingData,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pTSD = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=ThinkSharingDataCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(ThinkSharingData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('Think.of.sharing.with.researchers.who.are.NOT.direct.collaborators.png', width = 30, height = 20, units = "cm", res = 300)
- pTSD
- dev.off()
- ####################################################################
- # make a Fimiliar Data Types plot
- # Fimiliar Data Types are Yes/No questions ==> just need the ones who answered with Yes
- temp = data %>% select(Response.ID,FimilarDataTypesCat,FimilarDataTypes) %>% filter(FimilarDataTypes != 'No') %>%
- na.omit() %>% unique() %>% group_by(FimilarDataTypes) %>% filter(n() >= 3)
- # calc abs numbers to make more own plots
- temp_absNumbers = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
- pFD = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=FimilarDataTypesCat,y=n),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- xlab('') + ylab('') + ggtitle(paste0('Fimilar Datatypes \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
- geom_text(aes(x=FimilarDataTypesCat,y = n, label = n), colour = "white",hjust=1.5) +
- scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- #png('Please.state.if.your.work.includes.png', width = 30, height = 20, units = "cm", res = 300)
- pFD
- #dev.off()
- ###
- # DAta availability
- temp = data %>% select(Response.ID,FimilarDataTypesCat,FimilarDataTypes) %>% filter(FimilarDataTypes != 'No') %>%
- na.omit() %>% unique() %>% group_by(FimilarDataTypes) %>% filter(n() >= 3)
- # calc abs numbers to make more own plots
- temp_absNumbers = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
- pFD = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=FimilarDataTypesCat,y=n),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- xlab('') + ylab('') + ggtitle(paste0('Fimilar Datatypes \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
- geom_text(aes(x=FimilarDataTypesCat,y = n, label = n), colour = "white",hjust=1.5) +
- scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- data0 = neuro_data %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with'),
- starts_with('For.which.of.these.tasks.'),
- starts_with('To.what.degree.do.you.'),
- starts_with('Think.of.re.using.data.'),
- starts_with('Think.of.data.sharing.')
- )
- comb_string_vec = c('For.which.of.these.tasks.',
- 'To.what.degree.do.you.',
- 'Think.of.re.using.data.',
- 'Think.of.data.sharing.',
- 'Have.you.shared.data.with')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree',
- 'ThinkReusingData',
- 'ThinkSharingData',
- 'DataSharing')
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- # STandard Tools
- # make a Current Position plot
- # TaskStandardToolsCat,TaskStandardTools
- # make a Task Standard Tools plot
- # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- # remove Comment columes
- #temp = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools,DataSharingCat,DataSharing) %>% filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- # #filter(TaskStandardTools != 'No') %>%
- # na.omit() %>% unique() %>% droplevels()
- temp = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>% filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- filter(TaskStandardTools != 'No') %>%
- na.omit() %>% unique() %>%
- group_by(TaskStandardTools) %>%
- filter(n() >= 3)
- # temp <- factor(temp$TaskStandardToolsCat, levels = temp$TaskStandardToolsCat)
- # = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>% filter(TaskStandardTools != 'No') %>%
- # na.omit() %>% unique() %>% group_by(TaskStandardTools) %>% filter(n() >= 3)
- # calc abs numbers to make more own plots
- temp_absNumbers = temp %>%
- group_by_at(vars(-Response.ID)) %>%
- summarise(n = n()) %>%
- arrange(desc(n))
- pST = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=TaskStandardToolsCat,y=n),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- xlab('') + ylab('') + ggtitle(paste0('Use of Standard Tools \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
- geom_text(aes(x=TaskStandardToolsCat,y = n, label = n), colour = "white",hjust=1.5) +
- scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- pST
- tiff('UseOfStandardTools.tiff', width = 30, height = 20, units = "cm", res = 300)
- pST
- dev.off()
- ######################################################################
- ######################################################################
- temp_absNumbers = neuro_data_tmp %>%
- na.omit %>%
- filter(!Existing_Data == 'I have no datasets') %>%
- group_by_at(vars(Other_can_answer)) %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- cat("Von den Antwortenden die mindestens einen Datensatz haben ... ")
- cat( temp_absNumbers$n[3], '(', round(temp_absNumbers$percent[3],3)*100, '%) , of all respondents that have at least one dataset are of the opinion that other researchers could answer their own research questions by re-using data from their research.')
- temp_absNumbers = neuro_data_tmp %>%
- na.omit %>%
- #filter(!Existing_Data == 'I have no datasets') %>%
- filter(Other_can_answer=='Yes') %>%
- group_by_at(vars(Shared_Publicly)) %>%
- summarise(n = n()) %>%
- mutate(percent = n / sum(n))
- S1 = 'However, even for this subgroup, of scientists in possession of data of which they think are valuable to others '
- S2 = '% have never shared any of their data publicly.'
- cat(S1, round(temp_absNumbers$percent[1],3)*100, S2)
- #############################################################
- # von denen die DAten haben wieviele denken das diese DAten auch fuer andere Sinnvoll sind?
- #q1 = 'Think.of.data.sharing.with.researchers.who.are.NOT.direct.collaborators..Please.indicate...Other.researchers.could.answer.their.own.research.questions.by.re.using.data.from.my.research..'
- #data3 = data[data$Existing_Data!=NA,]
- #Factors promoting public data sharing
- # To identify factors that promote public data sharing answers of participants
- # were filtered on whether they have already shared their data in
- # public repositories or not. We excluded all subject which did not have any data.
- #1. delete subjects which did not have any data
- #vec <- c("I have no datasets")
- #data1 <- data0[data0$Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse. %in% vec,]
- data1 <- neuro_data %>% filter(! Do.you.have.existing.data.sets..experiments..that.should.be.kept.alive.by.making.them.available.for.reuse. == "I have no datasets")
- x<-colnames(neuro_data)
- typeof(x)
- length(x)
- data0 = neuro_data %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with'),
- starts_with('For.which.of.these.tasks.'),
- starts_with('To.what.degree.do.you.'),
- starts_with('Think.of.re.using.data.'),
- starts_with('Think.of.data.sharing.')
- )
- comb_string_vec = c('For.which.of.these.tasks.',
- 'To.what.degree.do.you.',
- 'Think.of.re.using.data.',
- 'Think.of.data.sharing.',
- 'Have.you.shared.data.with')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree',
- 'ThinkReusingData',
- 'ThinkSharingData',
- 'DataSharing')
- data0 = data1 %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with.....Publicly.'),
- starts_with('My.current..primary..position.is'),
- starts_with('Think.of.re.using.data.'),
- starts_with('How.much.time.do.you.currently.need')
- )
- #data0 <- data0 %>% filter()
- comb_string_vec = c('Think.of.re.using.data.')
- comb_col_names = c('ThinkREusingData')
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- datax <-melt(data, id = c())
- ####################################
- # Time needed for ready dataset
- # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- # remove Comment columes
- temp = data %>% select(Response.ID,TimeNeededCat,TimeNeeded,DataSharingCat,DataSharing) %>% filter(!grepl('Comment|Other',TimeNeededCat)) %>%
- #filter(TimeNeededCat != 'No') %>%
- na.omit() %>% unique() %>% droplevels()
- #temp = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>% filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- # filter(TaskStandardTools != 'No') %>%
- # na.omit() %>% unique() %>% group_by(TaskStandardTools) %>% filter(n() >= 3)
- # = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools) %>% filter(TaskStandardTools != 'No') %>%
- # na.omit() %>% unique() %>% group_by(TaskStandardTools) %>% filter(n() >= 3)
- View(temp)
- # calc abs numbers to make more own plots
- temp_absNumbers = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n())
- pST = ggplot(data=temp_absNumbers) +
- geom_histogram(mapping=aes(x=TaskStandardToolsCat,y=n),
- colour = 'darkblue', fill='darkblue',
- stat = 'identity',
- width = 0.5) +
- coord_flip() +
- xlab('') + ylab('') + ggtitle(paste0('Use of Standard Tools \n(n = ',sum(temp_absNumbers$n),', multiple answers possible)')) +
- geom_text(aes(x=TaskStandardToolsCat,y = n, label = n), colour = "white",hjust=1.5) +
- scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- pST
- tiff('UseOfStandardTools.tiff', width = 17.5, height = 7, units = "cm", res = 600)
- pST
- dev.off()
- ################old##########################
- #############################################
- # more elegant
- data0 = neuro_data %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with'),
- starts_with('Do.you.have.existing.data'),
- starts_with('I.work.at'),
- starts_with('My.current..'),
- starts_with('Which.neuroscience.discipline.s.'),
- starts_with('Please.state.if.your.')
- )
- comb_string_vec = c('I.work.at',
- 'My.current..',
- 'Which.neuroscience.discipline.s.',
- 'Please.state.if.your.',
- 'Have.you.shared.data.with',
- 'Do.you.have.existing.data')
- comb_col_names = c('WorkPlaces',
- 'CurrentPosition',
- 'NeuroDiscipline',
- 'FimilarDataTypes',
- 'DataSharing',
- 'ExistingData')
- # Diese Schleife ist mit Vorsicht zu genießen. Entstehende long format Datensatz kann sehr groß werden
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- # make a WorkPlaces plot filter out the 'Other' answers
- temp = data %>% select(Response.ID,WorkPlacesCat,WorkPlaces) %>% na.omit() %>% unique() %>% group_by(WorkPlaces) %>% filter(n() >= 3)
- pWP = ggplot(data=temp) +
- geom_bar(mapping=aes(x=WorkPlaces,y=..count..),position=position_dodge()) +
- xlab('') + ylab('count') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1), plot.margin=unit(c(0,0,0,0), 'cm')) +
- #facet_grid(.~DataSharingCat,scales = 'fixed',margins = FALSE) +
- scale_fill_brewer(palette = 'Accent') +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('I.work.at.png', width = 30, height = 20, units = "cm", res = 300)
- pWP
- dev.off()
- # make a Current Position plot
- temp = data %>% select(Response.ID,CurrentPosition,DataSharingCat,DataSharing) %>% na.omit() %>% unique() %>% group_by(CurrentPosition) %>% filter(n() >= 3)
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pCP = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=CurrentPosition,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(.~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('My.current.position.png', width = 30, height = 20, units = "cm", res = 300)
- pCP
- dev.off()
- # make a Neuro Discipline plot
- # Neuro Discipline questions are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- temp = data %>% select(Response.ID,NeuroDisciplineCat,NeuroDiscipline,DataSharingCat,DataSharing) %>% filter(NeuroDiscipline != 'No') %>%
- na.omit() %>% unique() %>% group_by(NeuroDiscipline) %>% filter(n() >= 5)
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pND = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=NeuroDisciplineCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(NeuroDiscipline~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('Which.neuroscience.discipline.png', width = 30, height = 20, units = "cm", res = 300)
- pND
- dev.off()
- # make a Fimiliar Data Types plot
- # Fimiliar Data Types are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- temp = data %>% select(Response.ID,FimilarDataTypesCat,FimilarDataTypes,DataSharingCat,DataSharing) %>% #filter(FimilarDataTypes != 'No') %>%
- na.omit() %>% unique() %>% group_by(FimilarDataTypes) %>% filter(n() >= 5)
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pFD = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=FimilarDataTypesCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(FimilarDataTypes~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('Please.state.if.your.work.includes.png', width = 30, height = 20, units = "cm", res = 300)
- pFD
- dev.off()
- # recreate different datasets
- # more elegant
- data0 = neuro_data %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with'),
- starts_with('For.which.of.these.tasks.'),
- starts_with('To.what.degree.do.you.'),
- starts_with('Think.of.re.using.data.'),
- starts_with('Think.of.data.sharing.')
- )
- comb_string_vec = c('For.which.of.these.tasks.',
- 'To.what.degree.do.you.',
- 'Think.of.re.using.data.',
- 'Think.of.data.sharing.',
- 'Have.you.shared.data.with')
- comb_col_names = c('TaskStandardTools',
- 'TaskStandardToolsDegree',
- 'ThinkReusingData',
- 'ThinkSharingData',
- 'DataSharing')
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- # make a Task Standard Tools plot
- # Standard Task Tools are Yes/No questions ==> just need the ones who answered with Yes ==> or?
- # remove Comment columes
- temp = data %>% select(Response.ID,TaskStandardToolsCat,TaskStandardTools,DataSharingCat,DataSharing) %>% filter(!grepl('Comment|Other',TaskStandardToolsCat)) %>%
- #filter(TaskStandardTools != 'No') %>%
- na.omit() %>% unique() %>% droplevels()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pTST = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=TaskStandardToolsCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(TaskStandardTools~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('For.which.of.these.tasks.do.you.use.available.tools.or.standards.png', width = 30, height = 20, units = "cm", res = 300)
- pTST
- dev.off()
- # make a Task Standard Tools Degree plot
- temp = data %>% select(Response.ID,TaskStandardToolsDegreeCat,TaskStandardToolsDegree,DataSharingCat,DataSharing) %>% filter(!grepl('Other',TaskStandardToolsDegreeCat)) %>%
- na.omit() %>% unique()
- # combine some levels
- temp$TaskStandardToolsDegree = as.character(temp$TaskStandardToolsDegree)
- temp$TaskStandardToolsDegree[temp$TaskStandardToolsDegree == 'As much as possible' | temp$TaskStandardToolsDegree == 'Mostly'] = 'Offten'
- temp$TaskStandardToolsDegree[temp$TaskStandardToolsDegree == 'Occasionally' | temp$TaskStandardToolsDegree == 'This is not relevant for my scientific work'] = 'Rare'
- temp$TaskStandardToolsDegree = as.factor(temp$TaskStandardToolsDegree)
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pTSD = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=TaskStandardToolsDegreeCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(TaskStandardToolsDegree~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('To.what.degree.do.you.use.available.tools.or.standards.png', width = 30, height = 20, units = "cm", res = 300)
- pTSD
- dev.off()
- # make a Think of Reusing Data plot
- temp = data %>% select(Response.ID,ThinkReusingDataCat,ThinkReusingData,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pTRD = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=ThinkReusingDataCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(ThinkReusingData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('Think.of.re.using.data.from.repositories.png', width = 30, height = 20, units = "cm", res = 300)
- pTRD
- dev.off()
- # make a Think of Sharing Data plot
- temp = data %>% select(Response.ID,ThinkSharingDataCat,ThinkSharingData,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pTSD = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=ThinkSharingDataCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(ThinkSharingData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('Think.of.sharing.with.researchers.who.are.NOT.direct.collaborators.png', width = 30, height = 20, units = "cm", res = 300)
- pTSD
- dev.off()
- ### where are the problems ###
- # recreate different datasets
- # more elegant
- data0 = neuro_data %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with'),
- starts_with('Please.indicate.'),
- starts_with('How.do.you.process.and.analyze.your.data.'),
- )
- comb_string_vec = c('Please.indicate.',
- 'How.do.you.process.and.analyze.your.data.',
- 'Have.you.shared.data.with')
- comb_col_names = c('SharingProblems',
- 'HowAnalyzeData',
- 'DataSharing')
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- # make a Sharing Data Problems plot
- temp = data %>% select(Response.ID,SharingProblemsCat,SharingProblems,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pSP = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=SharingProblemsCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(SharingProblems~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,70})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('Sharing.problems.please.indicate.png', width = 40, height = 30, units = "cm", res = 300)
- pSP
- dev.off()
- # make a Sharing Data Problems plot
- temp = data %>% select(Response.ID,HowAnalyzeDataCat,HowAnalyzeData,DataSharingCat,DataSharing) %>% #filter(HowAnalyzeData != 'No') %>%
- na.omit() %>% unique() %>% group_by(HowAnalyzeData) %>% filter(n() >= 5)
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pHAD = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=HowAnalyzeDataCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(HowAnalyzeData~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=30)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,30})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('How.do.you.process.and.analyze.your.data.png', width = 30, height = 20, units = "cm", res = 300)
- pHAD
- dev.off()
- # recreate different datasets
- # more elegant
- data0 = neuro_data %>% dplyr::select(Response.ID,
- starts_with('Have.you.shared.data.with'),
- starts_with('What.is.your.opinion'),
- starts_with('Applying.research.data.management.'),
- starts_with('Please.rank.the.top.'),
- )
- comb_string_vec = c('What.is.your.opinion',
- 'Please.rank.the.top.',
- 'Applying.research.data.management.',
- 'Have.you.shared.data.with')
- comb_col_names = c('StatementsOpinion',
- 'TopSharingProblems',
- 'ApplyDataManagement',
- 'DataSharing')
- library(data.table)
- for(i in seq(1,length(comb_string_vec),1)){
- data0 = data.table::melt(as.data.table(data0),
- id= c(which(!grepl(comb_string_vec[i],colnames(data0)))),
- measure=list(grep(comb_string_vec[i],colnames(data0))),
- variable.name = paste0(comb_col_names[i],'Cat'),
- value.name = comb_col_names[i],value.factor=TRUE)
-
- # make some nicer labels
- data0 = as.data.frame(data0)
- level_strings = levels(data0[,ncol(data0)-1])
-
- # iterate over the level strings and update them
- for(s in seq(1,length(level_strings),1)){
- level_string = level_strings[s]
- temp = str_locate(level_string, '\\.\\.\\.')
- level_string = substr(level_string,temp[2],nchar(level_string))
- level_string = gsub('\\.|\\.\\.',' ',level_string)
- level_string = gsub('e g','e.g.',level_string)
- level_strings[s] = level_string
- }
- # reset the labels
- levels(data0[,ncol(data0)-1]) = level_strings
- }
- data = data0
- # make a Top Sharing Data Problems plot
- temp = data %>% select(Response.ID,TopSharingProblemsCat,TopSharingProblems) %>% na.omit() %>% unique()
- pTSP = ggplot(data=temp) +
- geom_bar(mapping=aes(x=TopSharingProblems,y=..prop..,group=1),
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(.~TopSharingProblemsCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=25)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,70})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('Please.rank.the.top.most.pressing.issues.png', width = 40, height = 20, units = "cm", res = 300)
- pTSP
- dev.off()
- # make a Apply Data Management plot
- temp = data %>% select(Response.ID,ApplyDataManagementCat,ApplyDataManagement,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pARM = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=ApplyDataManagementCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(ApplyDataManagement~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=25)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('Applying.research.data.management.png', width = 30, height = 20, units = "cm", res = 300)
- pARM
- dev.off()
- # make a What is your opinion plot
- temp = data %>% select(Response.ID,StatementsOpinionCat,StatementsOpinion,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
- # calc relative frequency to make more own plots
- temp_relFreq = temp %>% group_by_at(vars(-Response.ID)) %>% summarise(n = n()) %>% mutate(share = n / sum(n))
- pOS = ggplot(data=temp_relFreq) +
- geom_histogram(mapping=aes(x=StatementsOpinionCat,y=share,color=DataSharing,fill=DataSharing),
- stat = 'identity',
- width = 0.5) +
- xlab('') + ylab('percent (%)') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1)) +
- theme(legend.position = "left", legend.box = "vertical") +
- facet_grid(StatementsOpinion~DataSharingCat,scales = 'fixed',margins = FALSE,labeller = label_wrap_gen(width=25)) +
- scale_color_brewer(palette = 'Accent') + scale_fill_brewer(palette = "Accent") +
- scale_x_discrete(labels=function(x){gsub('(.{1,50})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- png('What.is.your.opinion.on.the.following.statements.png', width = 30, height = 20, units = "cm", res = 300)
- pOS
- dev.off()
- #### polar plot try ####
- cbp1 <- c("#000000", "#FFFFFF")
- temp = data %>% select(Response.ID,CurrentPosition,DataSharingCat,DataSharing) %>% na.omit() %>% unique()
- pCP = ggplot(data=temp) +
- geom_bar(mapping=aes(x=CurrentPosition,color=DataSharing,fill=DataSharingCat),width = 0.75) +
- xlab('') + ylab('counts') +
- theme(axis.text.x = element_text(angle = 60, vjust = 1, hjust=1), plot.margin=unit(c(0,0,0,0), 'cm')) +
- #facet_grid(.~DataSharingCat,scales = 'fixed',margins = FALSE) +
- scale_color_manual(values = cbp1) + scale_fill_brewer(palette = "Dark2") + coord_polar(theta = 'y',clip='off') +
- scale_x_discrete(labels=function(x){gsub('(.{1,20})(\\s|$)', '\\1\n', x)}) # nice regular expression solution for multiple lined labels
- pCP
|