library(data.table)
library(here)
library(magrittr)
library(ggplot2)
library(viridis)
library(binhf)
library(pwr)
library(knitr)
library(kableExtra)
library(maps)
library(mapproj)
library(rgeos)
library(rworldmap)
# Get directory of repository
base_path = here::here()

# Load pre-written functions
source_path = file.path(base_path, 'code', 'utils',
                        fsep = .Platform$file.sep)
source_files = list.files(source_path, pattern = "[.][rR]$",
                          full.names = TRUE, recursive = TRUE)
invisible(lapply(source_files, function(x) source(x)))
# Load data
data = Load_data() %>%
  Add_comp(.) %>%
  .[, run := as.factor(run)]

# get number of participants (to adjust figure height)
n_participants = length(unique(data$participant_id))

Demographic

Groups

group_info = data %>%
  .[, .(age = unique(age),
        sex = unique(sex),
        group = unique(group)),
    by = participant_id]

table = group_info %>%
  .[, .(n = .N,
        mean_age = round(mean(age), 2),
        sd_age = round(sd(age), 2),
        min_age = min(age),
        max_age = max(age)),
    by = group] %>%
  Prepare_data_for_plot(.)

knitr::kable(table) %>%
  kableExtra::kable_styling()
group n mean_age sd_age min_age max_age
Younger adults 64 24.42 3.47 18 30
Older adults 56 57.18 5.99 50 73
data_plot = Prepare_data_for_plot(group_info)
p_dist = ggplot(data = data_plot,
                aes(x = age,
                    fill = group)) +
  geom_bar() +
  scale_y_continuous(limits = c(0, max(table(data_plot$age))),
                     breaks = seq(0,
                                  max(table(data_plot$age)))) +
  scale_x_continuous(breaks = seq(20, max(data_plot$age), by = 5)) +
  theme(panel.grid.minor = element_blank(),
        legend.position = 'top')

Neurocodify_plot(p_dist) +
  theme(panel.grid = element_blank())

Countries

# Get location stats of participants based on prolific data
loc_data = data %>%
  .[, c('participant_id', 'group', 'age', 'curent_country_of_residence')] %>%
  .[, .(age_group = unique(group),
        age = unique(age),
        curent_country_of_residence = unique(curent_country_of_residence)),
    by = participant_id] %>%
  .[, region := curent_country_of_residence] %>%
  .[, .(n = .N,
        n_younger = length(grep('younger', age_group)),
        n_older = length(grep('older', age_group)),
        mean_age = round(mean(age), 2)),
    by = region] %>%
  .[order(-rank(n))]

# Display as table
knitr::kable(loc_data) %>%
  kableExtra::kable_styling()
region n n_younger n_older mean_age
United Kingdom 52 16 36 47.92
South Africa 19 13 6 34.37
Italy 12 11 1 26.58
Portugal 9 6 3 33.89
Greece 6 3 3 38.67
Poland 5 4 1 30.20
United States 5 4 1 32.20
Spain 4 3 1 32.50
Australia 1 1 0 24.00
Belgium 1 0 1 51.00
Estonia 1 1 0 23.00
Czech Republic 1 0 1 52.00
Switzerland 1 1 0 30.00
Chile 1 0 1 66.00
Germany 1 1 0 24.00
France 1 0 1 52.00
# Get polygons of world map
geo_data = data.table(map_data('world')) %>%
  .[, curent_country_of_residence := region]

# Get location of participants based on prolific data
loc_data = data %>%
  .[, c('participant_id', 'group', 'age', 'curent_country_of_residence')] %>%
  .[, .(age_group = unique(group),
        age = unique(age),
        curent_country_of_residence = unique(curent_country_of_residence)),
    by = participant_id] %>%
  .[, region := curent_country_of_residence] %>%
  .[, .(n = .N),
    by = region] %>%
  # Rename locations that mismatch with worldmap data
  .[region == 'United Kingdom', region := 'UK'] %>%
  .[region == 'United States', region := 'USA']

# Get central points of countries
wmap = rworldmap::getMap(resolution = 'low')
centroids = as.data.frame(rgeos::gCentroid(wmap, byid=TRUE))
centroids$region = rownames(centroids)
centroids = as.data.table(centroids) %>%
  .[, ':='(long_cent = x,
           lat_cent = y)] %>%
  # Rename locations that mismatch with worldmap data
  .[region == 'United States of America', region := 'USA'] %>%
  .[region == 'United Kingdom', region := 'UK']

# Check if country names in centroid data deviate from country names in prolific data
name_dev = !unique(loc_data$region) %in% unique(centroids$region)
if(any(name_dev)){
  devs = unique(loc_data$region)[name_dev]
  stop(paste('Deviation in country names between Prolific data and country centers: c(',
             paste(devs, collapse = ', '),
             ')',
             sep = ''))
}

# Check if country names in prolific data deviate from country names on world map
name_dev = !unique(loc_data$region) %in% unique(geo_data$region)
if(any(name_dev)){
  devs = unique(loc_data$region)[name_dev]
  stop(paste('Deviation in country names between Prolific data and world map: c(',
             paste(devs, collapse = ', '),
             ')',
             sep = ''))
}

# Add location data to participant counts
loc_data = merge(loc_data, geo_data)
# For the included countries switch location data from polygon data to central 
# points of country
for(country in unique(loc_data$region)){
  loc_data[region == country, ':='(long = centroids[region == country]$long_cent,
                                   lat = centroids[region == country]$lat_cent)]
}

# Get single point for each country
loc_data = loc_data %>%
  .[, .(n = unique(n),
        long = mean(long),
        lat = mean(lat),
        group = group[1]),
    by = region]

# Plot worldmap
p_map = ggplot(geo_data,
       aes(x = long,
           y = lat,
           group = group)) +
  theme_bw() +
  geom_polygon(color = 'grey',
               fill = 'white',
               size = 0.1) +
  # Add location of participants (size and fill of dot scale with count)
  geom_point(data = loc_data,
             aes(fill = n,
                 size = n),
             color = 'black',
             shape = 21,
             alpha = 0.5) +
  scale_fill_viridis(option='D',
                     guide = 'legend',
                     breaks = sort(unique(loc_data$n))) +
  scale_size_continuous(range = c(0.5,5),
                        breaks = sort(unique(loc_data$n))) +
  # Add map projection
  coord_map('mercator') +
  scale_y_continuous(limits = c(-180, 180)) +
  scale_x_continuous(limits = c(-200, 200),
                     expand = c(0,0)) +
  # Add complete count of participants
  annotate('label',
           x = -160,
           y = -73,
           label = paste('Total N = ', as.character(sum(loc_data$n)), sep = '')) +
  theme(panel.grid = element_blank(),
        panel.background = element_rect(fill = 'lightblue'),
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        legend.position = 'top')

p_map