doi
/
mpib_sp_eeg
forkattu lähteestä sappelhoff/mpib_sp_eeg


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442
							"""Convert the sourcedata of the experiment to BIDS format.

The script is to be run from a /code folder, nested in the BIDS folder that
also contains the /sourcedata folder.

BIDS-folder
├── code
│   └── source_to_bids.py
│   └── environment.yml
└── sourcedata
│   └── ...
│   └── ...
...

You need to have the following software dependencies installed for this code to work:

- mne_bids < 0.8
- mne < 0.23

All remaining dependencies will be installed automatically with the packages above.

This code is licensed under MIT (https://opensource.org/licenses/MIT):

Copyright 2022 Stefan Appelhoff

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
# %% Imports and constants
import glob
import json
import multiprocessing
import os
import os.path as op
import pathlib
import shutil
from tempfile import mkdtemp
import requests

import mne_bids
import numpy as np
import pandas as pd
from mne.channels import read_dig_captrak
from mne.io import read_raw_brainvision
from mne_bids.copyfiles import copyfile_brainvision

# Adjust this path to where the bids directory is stored
home = os.path.expanduser("~")
is_ds_to_publish = "mpib_sp_eeg" in str(pathlib.Path(__file__).parent.absolute())
if ("stefanappelhoff" in home) and (not is_ds_to_publish):
    BIDS_ROOT = os.path.join("/", "home", "stefanappelhoff", "Desktop", "sp_data")
elif ("appelhoff" in home) and (not is_ds_to_publish):
    BIDS_ROOT = os.path.join("/", "vol2", "appelhoff", "sp_data")
elif ("appelhoff" in home) and is_ds_to_publish:
    BIDS_ROOT = os.path.join("/", "vol2", "appelhoff", "mpib_sp_eeg")
else:
    raise RuntimeError("Could not determine BIDS_ROOT. Please add your own.")

OVERWRITE = True
SUBJECTS = range(1, 41)
NJOBS = max(1, min(multiprocessing.cpu_count() - 2, 40))
TASK_MAP = {
    "spactive": "Active{}",
    "sppassive": "Yoked{}",
    "description": "description",
}

# %% Function for meanings of triggers in the experiment


def provide_trigger_dict():
    """Provide a dictionnary mapping str names to byte values [1]_.

    References
    ----------
    .. [1] https://github.com/sappelhoff/sp_experiment/blob/master/sp_experiment/define_ttl_triggers.py  # noqa: E501

    """
    trigger_dict = dict()

    # At the beginning and end of the experiment ... take these triggers to
    # crop the meaningful EEG data. Make sure to include some time BEFORE and
    # AFTER the triggers so that filtering does not introduce artifacts into
    # important parts.
    trigger_dict["trig_begin_experiment"] = bytes([1])
    trigger_dict["trig_end_experiment"] = bytes([2])

    # Indication when a new trial is started
    trigger_dict["trig_new_trl"] = bytes([3])

    # Wenever a new sample within a trial is started (fixation stim)
    trigger_dict["trig_sample_onset"] = bytes([4])

    # Whenever a choice is being inquired during sampling
    trigger_dict["trig_left_choice"] = bytes([5])
    trigger_dict["trig_right_choice"] = bytes([6])
    trigger_dict["trig_final_choice"] = bytes([7])

    # When displaying outcomes during sampling
    trigger_dict["trig_mask_out_l"] = bytes([8])
    trigger_dict["trig_show_out_l"] = bytes([9])
    trigger_dict["trig_mask_out_r"] = bytes([10])
    trigger_dict["trig_show_out_r"] = bytes([11])

    # Indication when a final choice is started
    trigger_dict["trig_new_final_choice"] = bytes([12])

    # Whenever a final choice is started (fixation stim)
    trigger_dict["trig_final_choice_onset"] = bytes([13])

    # Inquiring actions during CHOICE
    trigger_dict["trig_left_final_choice"] = bytes([14])
    trigger_dict["trig_right_final_choice"] = bytes([15])

    # Displaying outcomes during CHOICE
    trigger_dict["trig_mask_final_out_l"] = bytes([16])
    trigger_dict["trig_show_final_out_l"] = bytes([17])
    trigger_dict["trig_mask_final_out_r"] = bytes([18])
    trigger_dict["trig_show_final_out_r"] = bytes([19])

    # trigger for ERROR, when a trial has to be reset
    # (ignore all markers prior to this marker within this trial)
    trigger_dict["trig_error"] = bytes([20])

    # If the subject sampled a maximum of steps and now wants to take yet
    # another one, we force stop and initiate a final choice
    trigger_dict["trig_forced_stop"] = bytes([21])

    # If subject tried to make a final choice before taking at least one sample
    trigger_dict["trig_premature_stop"] = bytes([22])

    # Display the block feedback
    trigger_dict["trig_block_feedback"] = bytes([23])

    return trigger_dict


# %% Making a datacite.yml file for GIN
def make_datacite_yml(bids_root, overwrite):
    """Make a datacite.yml file."""
    txt = """# Metadata for DOI registration according to DataCite Metadata Schema 4.1.
# For detailed schema description see https://doi.org/10.5438/0014

## Required fields

# The main researchers involved. Include digital identifier (e.g., ORCID)
# if possible, including the prefix to indicate its type.
authors:
  -
    firstname: "Stefan"
    lastname: "Appelhoff"
    affiliation: "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany"
    id: "ORCID:0000-0001-8002-0877"
  -
    firstname: "Ralph"
    lastname: "Hertwig"
    affiliation: "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany"
    id: "ORCID:0000-0002-9908-9556"
  -
    firstname: "Bernhard"
    lastname: "Spitzer"
    affiliation: "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany"
    id: "ORCID:0000-0001-9752-932X"

# A title to describe the published resource.
title: "The mpib_sp_eeg dataset"

# Additional information about the resource, e.g., a brief abstract.
description: |
  When acquiring information about choice alternatives, decision makers may have varying levels of control over which
  and how much information they sample before making a choice. How does subjective control over sampling affect the
  quality of experience-based decisions?
  This resource contains behavioral, eyetracking, and EEG data of 40 human participants performing a
  numerical sampling task in which the level of subjective control over sampling was systematically varied.
  The dataset is organized according to the Brain Imaging Data Structure (BIDS).

# List of keywords the resource should be associated with.
# Give as many keywords as possible, to make the resource findable.
keywords:
  - cognitive neuroscience
  - decisions from experience
  - DFE
  - sampling paradigm
  - sequential sampling
  - decision-making
  - control
  - EEG
  - electroencephalography
  - eyetracking
  - BIDS
  - Brain Imaging Data Structure

# License information for this resource. Please provide the license name and/or a link to the license.
# Please add also a corresponding LICENSE file to the repository.
license:
  name: "Open Data Commons Public Domain Dedication and License (PDDL) v1.0"
  url: "https://opendatacommons.org/licenses/pddl/1-0/"

## Optional Fields

# Funding information for this resource.
# Separate funder name and grant number by comma.
funding:
  - "Max Planck Institute for Human Development"

# Related publications. reftype might be: IsSupplementTo, IsDescribedBy, IsReferencedBy.
# Please provide digital identifier (e.g., DOI) if possible.
# Add a prefix to the ID, separated by a colon, to indicate the source.
# Supported sources are: DOI, arXiv, PMID
# In the citation field, please provide the full reference, including title, authors, journal etc.
references:
  -
    id: "doi:10.1101/2021.06.03.446960"
    reftype: "IsSupplementTo"
    citation: "Control over sampling boosts numerical evidence processing in human decisions from experience Stefan Appelhoff, Ralph Hertwig, Bernhard Spitzer bioRxiv 2021.06.03.446960"
  -
    id: "doi:10.5281/zenodo.3361717"
    reftype: "IsReferencedBy"
    citation: "Stefan Appelhoff. (2019, August 6). sappelhoff/sp_experiment: v1.0 (Version v1.0). Zenodo. http://doi.org/10.5281/zenodo.3361717"
  -
    id: "doi:10.5281/zenodo.5929222"
    reftype: "IsReferencedBy"
    citation: "Stefan Appelhoff. (2022). sappelhoff/sp_code: 1.0.0 (1.0.0). Zenodo. https://doi.org/10.5281/zenodo.5929223"

# Resource type. Default is Dataset, other possible values are Software, DataPaper, Image, Text.
resourcetype: Dataset

# Do not edit or remove the following line
templateversion: 1.2
"""

    fname = op.join(bids_root, "datacite.yml")
    if op.exists(fname) and not overwrite:
        return
    with open(fname, "w", encoding="utf-8") as fout:
        fout.write(txt)


# %% Making a .bidsignore file
def make_bidsignore(bids_root, overwrite):
    """Make a .bidsignore file."""
    txt = """README.md
datacite.yml
"""

    fname = op.join(bids_root, ".bidsignore")
    if op.exists(fname) and not overwrite:
        return
    with open(fname, "w", encoding="utf-8") as fout:
        fout.write(txt)


# %% Making a .bids-validator-config.json file to ignore some known warnings in the validator
def make_bids_validator_config(bids_root, overwrite):
    """Make a .bidsconfig.json file."""
    # fmt: off
    # switch off the following warnings, because they don't make sense for this dataset.
    # README is README.md
    # Subjects are naturally inconsistent, because the study is a mixed design with a between factor
    bids_validator_config_json = {
        "ignore": [
            101,  # [WARN] The recommended file /README is missing. See Section 03 (Modality agnostic files) of the BIDS specification. (code: 101 - README_FILE_MISSING)
            38,  # [WARN] Not all subjects contain the same files. Each subject should contain the same number of files with the same naming unless some files are known to be missing. (code: 38 - INCONSISTENT_SUBJECTS)
            ]}
    # fmt: on

    fname = op.join(bids_root, ".bids-validator-config.json")
    if not op.exists(fname) or overwrite:
        with open(fname, "w", encoding="utf-8") as fout:
            json.dump(bids_validator_config_json, fout, ensure_ascii=False, indent=4)


# %% Copying EEG files
def copy_eeg_and_events_files(bids_root, task_map, sub, just_json, overwrite):
    """Copy and rename the EEG and events files per subject.

    Parameters
    ----------
    bids_root : str
        Path to the root of the bids dir.
    sub : str
        The subject entity to work on, for example "sub-01".
    task_map : dict
        A mapping between old task names, and templates for the
        new task names.
    just_json : bool
        Whether or not to only touch the json files.
    overwrite : bool
        If True, overwrite existing files.
    """
    # map from old to new
    for old_task, new_task_template in task_map.items():
        fname_old = f"{sub}_task-{old_task}_eeg.vhdr"
        sub_id = int(sub[-2:])
        stop_policy = "Variable" if sub_id % 2 == 0 else "Fixed"
        new_task = new_task_template.format(stop_policy)
        fname_new = f"{sub}_task-{new_task}_eeg.vhdr"

        src = op.join(bids_root, "sourcedata", sub, "eeg", fname_old)
        dest_dir = op.join(bids_root, sub, "eeg")
        os.makedirs(dest_dir, exist_ok=True)
        dest = op.join(dest_dir, fname_new)

        # Copy EEG data
        if not just_json:
            if op.exists(src) and (not op.exists(dest) or overwrite):
                copyfile_brainvision(src, dest)

        # Copy and rename events.tsv
        src = src.replace("_eeg.vhdr", "_events.tsv")
        dest = dest.replace("_eeg.vhdr", "_events.tsv")

        # For description task, we need to fix nan -> n/a
        if "task-description" in src:
            tmpdf = pd.read_csv(src, sep="\t")
            if overwrite:
                tmpdf.to_csv(dest, index=False, na_rep="n/a", sep="\t")
        else:
            # if not description, we can simply copy over
            if op.exists(src) and (not op.exists(dest) or overwrite):
                shutil.copyfile(src, dest)


# %% Making a README
def make_README(bids_root, overwrite):
    """Write a README.md file."""
    txt = """# The `mpib_sp_eeg` dataset

This is the readme of the `mpib_sp_eeg` dataset. The short dataset name results from these three facts:

- the data was collected at the Max Planck Institute for Human Development (MPIB)
- the behavioral task was the "Sampling Paradigm" (SP)
- the dataset's main neuroimaging modality is electroencephalography data (EEG)

The data was collected in 2019 at the MPIB in Berlin by Stefan Appelhoff and colleagues.

The data is organized according to the Brain Imaging Data Structure, see: https://bids.neuroimaging.io

The dataset is managed with datalad, see: http://handbook.datalad.org/en/latest/index.html

## Download

1. Install datalad (http://handbook.datalad.org/en/latest/intro/installation.html)
2. Run the code below from the shell:
    1. first "clone" the dataset
    2. then navigate to the root of the dataset
    3. then use `datalad get <file you want>` to get the file contents for each file you want
       (you can also use `datalad get . -r` to get everything at once, but this may take some time)

```shell
datalad clone https://gin.g-node.org/sappelhoff/mpib_sp_eeg
cd mpib_sp_eeg
datalad get participants.tsv
```

## Preprint

A preprint is available on BioRxiv.

- BioRxiv: https://doi.org/10.1101/2021.06.03.446960

## Experimental presentation code

The code used for the experimental presentation can be found on GitHub and Zenodo.

- GitHub: https://github.com/sappelhoff/sp_experiment
- Zenodo: https://doi.org/10.5281/zenodo.3354368

## Analysis code

The code used for data analysis can be found on GitHub and on Zenodo.

- GitHub: https://github.com/sappelhoff/sp_code
- Zenodo: https://doi.org/10.5281/zenodo.5929222

## Contact

- [Stefan Appelhoff](mailto:appelhoff@mpib-berlin.mpg.de)

## License

The `source_to_bids.py` script in the `code/` directory is licensed under the MIT license.

This data is made available under the Public Domain Dedication and License v1.0
whose full text can be found at: http://opendatacommons.org/licenses/pddl/1.0/

See also this human readable summary: https://opendatacommons.org/licenses/pddl/summary/

For details, please see the [LICENSE](LICENSE) file.

## Using this dataset

If you use this dataset in your work, please consider citing it as well as the main references describing it.

## Additional information

- The eyetracking recording for sub-18 in the ActiveVariable task is broken ("sub-18/eeg/sub-18_task-ActiveVariable_recording-eyetracking_physio.tsv.gz").
- The eyetracking recording for sub-15 in the YokedFixed task for unknown reasons has timing issues ("sub-15/eeg/sub-15_task-YokedFixed_recording-eyetracking_physio.tsv.gz").
- All bipolar channels (ECG, HEOG, VEOG) were recorded with a ground electrode placed 10cm away from the navel on the participant's right side of the belly.
- The following describes the approximate locations of the ECG, HEOG, and VEOG electrodes:
    - ECG- between the 5th and 6th rib on the left chest.
    - ECG+ in the middle of the upper chest
    - HEOG- 1cm from the left outer canthus
    - HEOG+ 1cm from the right outer canthus
    - VEOG- 2cm below the left eye
    - VEOG+ 1cm above the left eyebrow
"""
    fname = op.join(bids_root, "README.md")
    if op.exists(fname) and not overwrite:
        return
    with open(fname, "w", encoding="utf-8") as fout:
        fout.write(txt)


# %% Making a dataset_description.json


def make_dataset_description(bids_root, overwrite):
    """Make a dataset_description.json."""
    # Prepare and write participants JSON
    dataset_description_json = {
        "Name": "mpib_sp_eeg",
        "BIDSVersion": "1.6.0",
        "DatasetType": "raw",
        "License": "PDDL",
        "Authors": [
            "Stefan Appelhoff",
            "Ralph Hertwig",
            "Bernhard Spitzer",
        ],
        "Acknowledgements": "We thank Agnessa Karapetian, Clara Wicharz, Jann Wäscher, Yoonsang Lee, and Zhiqi Kang for help with data collection, Dirk Ostwald and Casper Kerrén for helpful discussions and feedback, and Susannah Goss for editorial assistance.",
        "HowToAcknowledge": "Please cite https://doi.org/10.1101/2021.06.03.446960",
        "EthicsApprovals": [
            "The study was approved by the ethics committee of the Max Planck Institute for Human Development, Berlin, Germany."
        ],
        "ReferencesAndLinks": [
            "https://doi.org/10.1101/2021.06.03.446960",
            "https://doi.org/10.5281/zenodo.3354368",
            "https://doi.org/10.5281/zenodo.5929222"
        ],
        "DatasetDOI": "https://gin.g-node.org/sappelhoff/mpib_sp_eeg/",
    }

    fname = op.join(bids_root, "dataset_description.json")
    if not op.exists(fname) or overwrite:
        with open(fname, "w", encoding="utf-8") as fout:
            json.dump(dataset_description_json, fout, ensure_ascii=False, indent=4)
            fout.write("\n")

# %% Make LICENSE


def make_LICENSE(bids_root, overwrite):
    """Make LICENSE file."""
    response = requests.get("https://opendatacommons.org/licenses/pddl/pddl-10.txt")

    fname = op.join(bids_root, "LICENSE")
    if not op.exists(fname) or overwrite:
        with open(fname, "w", encoding="utf-8") as fout:
            fout.write(response.text)


# %% Make CHANGES


def make_CHANGES(bids_root, overwrite):
    """Make CHANGES file."""
    txt = """1.0.0 2021-05-25
    - Initial release

1.0.1 2021-06-04
    - Updated link to preprint in dataset_description.json and datacite.yml
    - Added Manufacturer and ManufacturersModelName to physio.json (Tobii 4C eyetracker)
    - Rephrased acknowledgements in dataset_description.json
    - Updated code/environment.yml

1.0.2 2022-02-01
    - Minor updates to README, source_to_bids.py, datacite.yml, dataset_description.json
      in order to add links to other resources, and minor wording fixes
    - Removed unneeded code/environment.yml
    - The `source_to_bids.py` script is now licensed under the MIT license
    - Make dataset available under PDDL
"""

    fname = op.join(bids_root, "CHANGES")
    if not op.exists(fname) or overwrite:
        with open(fname, "w", encoding="utf-8") as fout:
            fout.write(txt)


# %% Make phenotype


def make_phenotype(bids_root, overwrite):
    """Make phenotype directory for BNT data."""
    # Make phenotype directory
    os.makedirs(op.join(BIDS_ROOT, "phenotype"), exist_ok=True)

    # the phenotypo data was read by a human from the handwritten PDFs
    # in the /sourcedata and recorded here.
    # NOTE: When participants answered with floats, the answer was rounded to integer.
    #       When they answered in percentage and a count was needed, ...
    #       that count was calculated using the percentage.
    # fmt: off
    data = {
        "participant_id": [f'sub-{subj:02}' for subj in range(1, 41)],
        "q1": [25, 35, 25, 76, 25, 25, 25, 25, 25, 25, 3, 10, 25, 25, 25, 10, 25, 25, 10, 40,
               25, 25, 25, 40, 10, 5, 13, 25, 25, 25, 25, 25, 30, 40, 30, 20, 25, 40, 20, 10],
        "q2": [20, 20, 20, 42, 20, 20, 23, 35, 23, 20, 35, 20, 33, 35, 46, 21, 22, 25, 47, 23,
               20, 20, 25, 35, 23, 30, 35, 19, 20, 26, 20, 20, 35, 20, 12, 23, 21, 35, 20, 35],
        "q3": [50, 5, 80, 35, 50, 20, 50, 50, 50, 50, 4, 4, 50, 20, np.nan, 5, 8, 7, 50, 5, 50,
               80, 8, 30, 4, 80, 2, 23, 9, 80, 15, 80, 95, 10, 20, 5, 95, 80, 4, 30],
        "q4": [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40, 30,
               30, 30, 40, 30, 33, 30, np.nan, 30, 30, 30, 30, 200, 10, 33, 30, 30, 30, 50, 30],
    }
    # fmt: on

    # Fill correct or not based on the true answers
    q1_correct = 25
    q2_correct = 20
    q3_correct = 50
    q4_correct = 30
    for question, correct in zip(
        ["q1", "q2", "q3", "q4"], [q1_correct, q2_correct, q3_correct, q4_correct]
    ):
        data[f"{question}_correct"] = (np.array(data[f"{question}"]) == correct).astype(
            int
        )

    # Make dataframe and save as TSV
    phenotype_tsv = pd.DataFrame(data=data)
    fname = op.join(BIDS_ROOT, "phenotype", "berlin_numeracy_test.tsv")
    if not op.exists(fname) or overwrite:
        phenotype_tsv.to_csv(fname, index=False, na_rep="n/a", sep="\t")

    # Prepare the JSON file
    phenotype_json = {
        "MeasurementToolMetadata": {
            "Description": "Berlin Numeracy Test - Pen and Paper Version",
            "TermURL": "http://www.riskliteracy.org/researchers/",
        },
        "participant_id": {
            "LongName": "participant identification",
            "Description": "identification number of the participant",
        },
        "q1": {
            "LongName": "question two",
            "Description": "Out of 1,000 people in a small town 500 are members of a choir. Out of these 500 members in the choir 100 are men. Out of the 500 inhabitants that are not in the choir 300 are men. What is the probability that a randomly drawn man is a member of the choir? (please indicate the probability in percent).",
        },
        "q1_correct": {
            "LongName": "question two correct",
            "Description": "was question two answered correctly with 25?",
            "Levels": {0: False, 1: True},
        },
        "q2": {
            "LongName": "question three",
            "Description": "Imagine we are throwing a loaded die (6 sides). The probability that the die shows a 6 is twice as high as the probability of each of the other numbers. On average, out of these 70 throws, how many times would the die show the number 6?",
        },
        "q2_correct": {
            "LongName": "question three correct",
            "Description": "was question three answered correctly with 20?",
            "Levels": {0: False, 1: True},
        },
        "q3": {
            "LongName": "question four",
            "Description": "In a forest 20% of mushrooms are red, 50% brown and 30% white. A red mushroom is poisonous with a probability of 20%. A mushroom that is not red is poisonous with a probability of 5%. What is the probability that a poisonous mushroom in the forest is red?",
        },
        "q3_correct": {
            "LongName": "question four correct",
            "Description": "was question four answered correctly with 50?",
            "Levels": {0: False, 1: True},
        },
        "q4": {
            "LongName": "question one",
            "Description": "Imagine we are throwing a five-sided die 50 times. On average, out of these 50 throws how many times would this five-sided die show an odd number (1, 3 or 5)?",
        },
        "q4_correct": {
            "LongName": "question one correct",
            "Description": "was question one answered correctly with 30?",
            "Levels": {0: False, 1: True},
        },
    }

    fname = op.join(bids_root, "phenotype", "berlin_numeracy_test.json")
    if not op.exists(fname) or overwrite:
        with open(fname, "w", encoding="utf-8") as fout:
            json.dump(phenotype_json, fout, ensure_ascii=False, indent=4)


# %% Make events.json file
# Descriptions are taken from here:
# https://github.com/sappelhoff/sp_experiment/blob/6e50de2ecde4b8d13f267cb5eff5451578372a89/sp_experiment/define_variable_meanings.py#L12-L258


def make_events_json_dict():
    """Provide a dict to describe all collected variables."""
    # Get the trigger values
    trigger_dict = provide_trigger_dict()
    events_json_dict = dict()

    # Add stimulus presentation information
    events_json_dict["StimulusPresentation"] = {
            "OperatingSystem": "Windows 10 - Version 1903",
            "SoftwareName": "PsychoPy",
            "SoftwareRRID": "SCR_006571",
            "SoftwareVersion": "3.0.0",
            "Code": "doi:10.5281/zenodo.3354368"
        }

    # Start populating the dict
    events_json_dict["onset"] = {
        "Description": "onset of the event",
        "Units": "seconds",
    }

    events_json_dict["duration"] = {
        "Description": "duration of the event",
        "Units": "seconds",
    }

    events_json_dict["trial"] = {
        "Description": "zero indexed trial counter, where a trial is a sequence of steps that ends with a final choice."
    }

    events_json_dict["action_type"] = {
        "Description": "type of the action that the subject performed at this event within a trial",
        "Levels": {
            "sample": "the subject sampled either the left or the right option",
            "stop": "the subject decided to stop sampling the options and instead use the next action for a final choice",
            "forced_stop": "the subject took a maximum of samples and wanted to take another one, so we force stopped in this turn",
            "premature_stop": "the subject tried to stop sampling before taking a single sample. This lead to an error.",
            "final_choice": "the subject chose either the left or the right option as a final choice",
        },
    }

    events_json_dict["action"] = {
        "Description": "the concrete action that the subject performed for the action type",
        "Levels": {
            "0": "the subject picked the *left* option",
            "1": "the subject picked the *right* option",
            "2": "the subject decided to stop sampling - for action_type *stop* only",
        },
    }

    events_json_dict["outcome"] = {
        "Description": "the outcome that the subject received for their action. Numbers in the range 1 to 9.",
    }

    events_json_dict["response_time"] = {
        "Description": "the time it took the subject to respond after the onset of the event",
        "Units": "milliseconds",
    }

    events_json_dict["value"] = {
        "Description": "the TTL trigger value (=EEG marker value) associated with an event",
        "Levels": {
            trigger_dict["trig_begin_experiment"]: "beginning of the experiment",
            trigger_dict["trig_end_experiment"]: "end of the experiment",
            trigger_dict[
                "trig_new_trl"
            ]: "color of fixcross is changed to indicate start of new trial",
            trigger_dict[
                "trig_sample_onset"
            ]: "onset of new sample within a trial (fixcross changes to white color)",
            trigger_dict["trig_left_choice"]: "subject chose *left* during sampling",
            trigger_dict["trig_right_choice"]: "subject chose *right* during sampling",
            trigger_dict["trig_final_choice"]: "subject chose *stop* during sampling",
            trigger_dict[
                "trig_mask_out_l"
            ]: "a masked outcome is shown after sampling (left side)",
            trigger_dict[
                "trig_show_out_l"
            ]: "an outcome is revealed after sampling (left side)",
            trigger_dict[
                "trig_mask_out_r"
            ]: "a masked outcome is shown after sampling (right side)",
            trigger_dict[
                "trig_show_out_r"
            ]: "an outcome is revealed after sampling (right side)",
            trigger_dict[
                "trig_new_final_choice"
            ]: "color of fixcross is changed to indicate start of a final choice",
            trigger_dict[
                "trig_final_choice_onset"
            ]: "onset of new final choice at the end of trial (fixcross changes to white color)",
            trigger_dict[
                "trig_left_final_choice"
            ]: "subject chose *left* for final choice",
            trigger_dict[
                "trig_right_final_choice"
            ]: "subject chose *right* for final choice",
            trigger_dict[
                "trig_mask_final_out_l"
            ]: "a masked outcome is shown after final choice (left side)",
            trigger_dict[
                "trig_show_final_out_l"
            ]: "an outcome is revealed after final choice (left side)",
            trigger_dict[
                "trig_mask_final_out_r"
            ]: "a masked outcome is shown after final choice (right side)",
            trigger_dict[
                "trig_show_final_out_r"
            ]: "an outcome is revealed after final choice (right side)",
            trigger_dict[
                "trig_error"
            ]: "color of fixcross is changed to indicate an error (ignore all markers prior to this marker within this trial)",
            trigger_dict[
                "trig_forced_stop"
            ]: "subject took the maximum number of samples and wanted to take yet another one",
            trigger_dict[
                "trig_premature_stop"
            ]: "subject tried to make a final choice before taking at least one sample",
            trigger_dict["trig_block_feedback"]: "block feedback is displayed",
        },
    }

    events_json_dict["mag0_1"] = {
        "LongName": "magnitude 0_1",
        "Description": "the first of two possible magnitudes in outcomes for option 0",
    }

    events_json_dict["prob0_1"] = {
        "LongName": "probability 0_1",
        "Description": "the first of two possible probabilities in outcomes for option 0",
    }

    events_json_dict["mag0_2"] = {
        "LongName": "magnitude 0_2",
        "Description": "the second of two possible magnitudes in outcomes for option 0",
    }

    events_json_dict["prob0_2"] = {
        "LongName": "probability 0_2",
        "Description": "the second of two possible probabilities in outcomes for option 0",
    }

    events_json_dict["mag1_1"] = {
        "LongName": "magnitude 1_1",
        "Description": "the first of two possible magnitudes in outcomes for option 1",
    }

    events_json_dict["prob1_1"] = {
        "LongName": "probability 1_1",
        "Description": "the first of two possible probabilities in outcomes for option 1",
    }

    events_json_dict["mag1_2"] = {
        "LongName": "magnitude 1_2",
        "Description": "the second of two possible magnitudes in outcomes for option 1",
    }

    events_json_dict["prob1_2"] = {
        "LongName": "probability 1_2",
        "Description": "the second of two possible probabilities in outcomes for option 1",
    }

    events_json_dict["version"] = {
        "Description": "version of the experiment used for collecting this data."
    }

    events_json_dict["reset"] = {
        "Description": "boolean that describes whether of not to ignore events prior to this event in the current trial.",
        "Levels": {
            "0": "so far no error in this trial since the beginning or the last error",
            "1": "error committed: disregard all events prior to this event for the current trial.",
        },
    }

    events_json_dict["system_time_stamp"] = {
        "Description": "system time in microseconds as measured from an arbitrary starting point. This should be used to connect the event with the eyetracking data.",
        "Units": "microseconds",
    }

    # Keys in levels for "value" are bytes: we need to turn them into integers
    events_json_dict["value"]["Levels"] = {
        ord(key): val for key, val in events_json_dict["value"]["Levels"].items()
    }

    # return
    return events_json_dict


def make_description_task_json():
    """Provide variable meanings for description task.

    This is heavily based on the sampling paradigm tasks. We overwrite a few
    of the descriptions to make more sense for the description task.
    """
    # Get the definitions from sampling paradigm
    events_json_dict = make_events_json_dict()

    # Overwrite some values
    events_json_dict["trial"][
        "Description"
    ] = "zero indexed trial counter, where a trial index points to the lottery setting that was used in this event by comparing with the trial column in the spactive task."

    # Remove all action types except "final choice"
    for level in ["sample", "stop", "forced_stop", "premature_stop"]:
        events_json_dict["action_type"]["Levels"].pop(level)

    # Remove possible action "2", indicating a stop: Stopping is not possible
    # in the descriptions task. Only "0"(=pick left) and "1"(=pick right)
    for level in ["2"]:
        events_json_dict["action"]["Levels"].pop(level)

    # Some trigger values are not occurring in the descriptions task
    trigger_dict = provide_trigger_dict()
    for level in [
        ord(trigger_dict["trig_sample_onset"]),
        ord(trigger_dict["trig_left_choice"]),
        ord(trigger_dict["trig_right_choice"]),
        ord(trigger_dict["trig_final_choice"]),
        ord(trigger_dict["trig_mask_out_l"]),
        ord(trigger_dict["trig_show_out_r"]),
        ord(trigger_dict["trig_new_final_choice"]),
        ord(trigger_dict["trig_forced_stop"]),
        ord(trigger_dict["trig_premature_stop"]),
    ]:
        events_json_dict["value"]["Levels"].pop(level)

    return events_json_dict


def make_events_json(bids_root, overwrite):
    """Make events.json files."""
    # Prepare and write events JSON for all non-description tasks
    events_json = make_events_json_dict()

    tasks = ["ActiveFixed", "ActiveVariable", "YokedFixed", "YokedVariable"]
    for taskname in tasks:
        fname = op.join(bids_root, f"task-{taskname}_events.json")
        if not op.exists(fname) or overwrite:
            with open(fname, "w", encoding="utf-8") as fout:
                json.dump(events_json, fout, ensure_ascii=False, indent=4)

    # now a slightly adjusted one for description task
    events_json_descr = make_description_task_json()

    taskname = "description"
    fname = op.join(bids_root, f"task-{taskname}_events.json")
    if not op.exists(fname) or overwrite:
        with open(fname, "w", encoding="utf-8") as fout:
            json.dump(events_json_descr, fout, ensure_ascii=False, indent=4)


# %% Making participants files


def make_participants(bids_root, overwrite):
    """Make participants.tsv and participants.json files."""
    # Read participant data from the log files
    fname_template = op.join(bids_root, "sourcedata", "sub-{0:02}/log_{0}_active.txt")
    subj = []
    age = []
    sex = []
    subjects = range(1, 41)
    for isubj in subjects:
        fname = fname_template.format(isubj)
        with open(fname, "r") as fin:
            lines = fin.readlines()
        subj.append(f"sub-{isubj:02}")
        age.append(int(lines[1].strip().split(": ")[-1]))
        sex.append(lines[2].strip().split(": ")[-1][0].lower())

    # Some data can be filled from our knowledge about the experiment
    # See log files in sourcedata
    handedness = np.repeat(["r"], 40)
    seed = np.repeat(np.arange(1, 11), 4)
    # https://stackoverflow.com/a/33802213/5201771
    # pick n consecutive items every m items
    yoked_to = np.tile(np.arange(1, 41).reshape(-1, 4)[:, :2], (1, 2)).reshape(-1)
    yoked_to = [f"sub-{s:02}" for s in yoked_to]
    start_condition = np.tile(["active"] * 2 + ["yoked"] * 2, 10)
    stopping = np.tile(["fixed", "variable"], 20)

    data = {
        "participant_id": subj,
        "sex": sex,
        "age": age,
        "handedness": handedness,
        "seed": seed,
        "yoked_to": yoked_to,
        "start_condition": start_condition,
        "stopping": stopping,
    }
    participants_tsv = pd.DataFrame(data=data)

    # Write participants TSV
    fname = op.join(bids_root, "participants.tsv")
    if not op.exists(fname) or overwrite:
        participants_tsv.to_csv(fname, index=False, na_rep="n/a", sep="\t")

    # Prepare and write participants JSON
    participants_json = {
        "participant_id": {"Description": "Unique participant identifier."},
        "age": {"Description": "The age of the participant.", "Units": "years"},
        "sex": {
            "Description": "The biological sex of the participant as judged by the experimenter.",
            "Levels": {"m": "male", "f": "female"},
        },
        "handedness": {
            "Description": "The handedness of the participant as reported by the participant",
            "Levels": {"l": "left", "r": "right"},
        },
        "seed": {
            "Description": "The random seed used to initialize the experiment. Participants that share a seed, saw the same underlying distributions per trial."
        },
        "yoked_to": {
            "Description": "The participant_id to which the participant was yoked in the yoked sampling condition. Some participants were yoked to themselves (i.e., they saw a replay of their own active sampling condition)."
        },
        "start_condition": {
            "Description": "The experimental condition with which the participant started the overall experiment. If the experiment was started with the active sampling condition, the yoked sampling condition was done afterwards and vice versa. After the two sampling conditions, the description task and the berlin numeracy task followed (not counterbalanced).",
            "Levels": {
                "active": "active sampling condition",
                "yoked": "yoked sampling condition",
            },
        },
        "stopping": {
            "Description": "The between factor of the experiment. Some participants always had to draw 12 samples ('fixed'), others could draw between 1 and 19 samples ('variable').",
            "Levels": {
                "fixed": "fixed stopping after 12 samples",
                "variable": "variable stopping between 1 and 19 samples",
            },
        },
    }

    fname = op.join(bids_root, "participants.json")
    if not op.exists(fname) or overwrite:
        with open(fname, "w", encoding="utf-8") as fout:
            json.dump(participants_json, fout, ensure_ascii=False, indent=4)


# %% Preparing and copying the eyetracking files
def copy_eyetracking_files(bids_root, task_map, sub, just_json, overwrite):
    """Format eyetracking files.

    Parameters
    ----------
    bids_root : str
        Path to the root of the bids dir.
    sub : str
        The subject entity to work on, for example "sub-01".
    task_map : dict
        A mapping between old task names, and templates for the
        new task names.
    just_json : bool
        Whether or not to only touch the json files.
    overwrite : bool
        If True, overwrite existing files.
    """
    # Define relevant columns
    cols = [
        "device_time_stamp",
        "system_time_stamp",
        "left_gaze_point_on_display_area_x",
        "left_gaze_point_on_display_area_y",
        "left_gaze_point_validity",
        "left_pupil_diameter",
        "left_pupil_validity",
        "right_gaze_point_on_display_area_x",
        "right_gaze_point_on_display_area_y",
        "right_gaze_point_validity",
        "right_pupil_diameter",
        "right_pupil_validity",
    ]

    # Object for physio.json --> needs to be written once for each task and subject.
    # StartTime could be estimated by looking into the
    # events.tsv file and compare the `system_time_stamp` column with the same
    # column from the eyetracking files.
    physio_json = {
        "SamplingFrequency": 90,  # Tobii 4C eyetracker is not "research grade", so this sfreq is not guaranteed to be stable apparently.
        "StartTime": "n/a",
        "Columns": cols,
        "Manufacturer": "Tobii Technology, Sweden",
        "ManfacturersModelName": "4C",
        "device_time_stamp": {
            "Description": "time stamp according to the eyetracker's internal clock",
            "Units": "µs",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazeData.html#details",
        },
        "system_time_stamp": {
            "Description": "time stamp according to the computer's internal clock",
            "Units": "µs",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazeData.html#details",
        },
        "left_gaze_point_on_display_area_x": {
            "Description": "X value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
        },
        "left_gaze_point_on_display_area_y": {
            "Description": "Y value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
        },
        "left_gaze_point_validity": {
            "Description": "validity of the left gaze point data",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#afce85e70ee3e1a53e0eae66cf2fecc30",
            "Levels": {"0": False, "1": True},
        },
        "left_pupil_diameter": {
            "Description": "diameter of the left pupil in millimeters",
            "Units": "mm",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
        },
        "left_pupil_validity": {
            "Description": "validity of the left pupil data",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
            "Levels": {"0": False, "1": True},
        },
        "right_gaze_point_on_display_area_x": {
            "Description": "X value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
        },
        "right_gaze_point_on_display_area_y": {
            "Description": "Y value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
        },
        "right_gaze_point_validity": {
            "Description": "validity of the right gaze point data",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#afce85e70ee3e1a53e0eae66cf2fecc30",
            "Levels": {"0": False, "1": True},
        },
        "right_pupil_diameter": {
            "Description": "diameter of the right pupil in millimeters",
            "Units": "mm",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
        },
        "right_pupil_validity": {
            "Description": "validity of the right pupil data",
            "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
            "Levels": {"0": False, "1": True},
        },
    }

    for old_task, new_task in TASK_MAP.items():
        # Get file, and prepare new file name
        fname_old = f"{sub}_task-{old_task}_eyetracking.tsv"
        sub_id = int(sub[-2:])
        stop_policy = "Variable" if sub_id % 2 == 0 else "Fixed"
        new_task = new_task.format(stop_policy)
        fname_new = f"{sub}_task-{new_task}_recording-eyetracking_physio.tsv.gz"

        dest_dir = op.join(bids_root, sub, "eeg")
        os.makedirs(dest_dir, exist_ok=True)
        dest = op.join(dest_dir, fname_new)
        if op.exists(dest) and not overwrite:
            continue

        # Read data
        src = op.join(bids_root, "sourcedata", sub, "eyetracking", fname_old)
        df = pd.read_csv(src, sep="\t")

        # Convert str of tuple X Y gazepoint to two float columns
        for direction in ["left", "right"]:
            tmp = (
                df[f"{direction}_gaze_point_on_display_area"]
                .str.lstrip("(")
                .str.rstrip(")")
                .str.split(",", expand=True)
            )
            df[f"{direction}_gaze_point_on_display_area_x"] = (
                tmp[0].to_numpy().astype("float")
            )
            df[f"{direction}_gaze_point_on_display_area_y"] = (
                tmp[1].to_numpy().astype("float")
            )

        # Select only relevant columns and write
        df = df[cols]
        if not just_json:
            # skip writing if we only write (update) JSONs
            df.to_csv(
                dest, sep="\t", header=False, na_rep="n/a", index=False, compression="gzip"
            )

        # Write one physio.json per task - needed because each has a different StartTime
        # First, try to get StartTime
        events_fname = op.join(
            bids_root, sub, "eeg", f"{sub}_task-{new_task}_events.tsv"
        )
        if op.exists(events_fname):
            # We calculate StartTime as difference between the first
            # measured timestamps in seconds
            events_df = pd.read_csv(events_fname, sep="\t")
            first_event_timestamp = events_df["system_time_stamp"].to_list()[0]
            first_physio_timestamp = df["system_time_stamp"].to_list()[0]
            try:
                # timestamps must be int, and event is measured *after* physio, so
                # it must be bigger
                assert isinstance(first_event_timestamp, int)
                assert isinstance(first_physio_timestamp, int)
                assert first_event_timestamp > first_physio_timestamp
                starttime_seconds = (
                    first_physio_timestamp - first_event_timestamp
                ) / 1e6
                assert starttime_seconds < 0
                physio_json["StartTime"] = starttime_seconds
            except AssertionError:
                if "sub-18_task-Active" in events_fname:
                    # Eyetracking recording for sub-18 is broken and full of NaN
                    physio_json["StartTime"] = 0.0
                else:
                    print(
                        f"Calculation of StartTime failed for {events_fname}:\n{first_event_timestamp}\n{first_physio_timestamp}"
                    )
        else:
            print(
                f"Did not find events file, could not calculate StartTime for physio.json:\n{events_fname}"
            )

        # Now write
        fname = op.join(
            bids_root,
            sub,
            "eeg",
            f"{sub}_task-{new_task}_recording-eyetracking_physio.json",
        )
        if not op.exists(fname) or overwrite:
            with open(fname, "w", encoding="utf-8") as fout:
                json.dump(physio_json, fout, ensure_ascii=False, indent=4)


# %% Coordsystem, electrodes, channels, eeg sidecar, and scans files
def coord_elec_chan_eegjson_scans(bids_root, task_map, sub, just_json, overwrite):
    """Write/Convert several files to BIDS.

    Convert the following files:

    - electrodes.tsv --> per subj
    - coordsystem.json --> per subj
    - channels.tsv --> per subj/task
    - eeg.json --> per task

    Parameters
    ----------
    bids_root : str
        Path to the root of the bids dir.
    sub : str
        The subject entity to work on, for example "sub-01".
    task_map : dict
        A mapping between old task names, and templates for the
        new task names.
    just_json : bool
        This parameter is currently not implemented and passing it has no effect.
    overwrite : bool
        If True, overwrite existing files.
    """
    # Temporary directory to store all mne-bids outputs
    tmpdir = mkdtemp(prefix=f"sp_bids_tmp_{sub}_")

    # data needed for conversion
    df = pd.read_csv(op.join(bids_root, "participants.tsv"), sep="\t")

    # What stopping group did this sub belong to
    stopping = (
        df[df["participant_id"] == f"{sub}"]["stopping"].to_list()[0].capitalize()
    )

    # Get montage file
    fname_bvct = f"CapTrakResultFile_{sub}.bvct"
    fname_bvct = op.join(bids_root, "sourcedata", f"{sub}", "coords", fname_bvct)
    montage = read_dig_captrak(fname_bvct)

    # Get ground and reference positions separately, because they get dropped
    # at raw.set_montage() otherwise
    pos_gnd = montage.get_positions()["ch_pos"]["GND"]
    pos_ref = montage.get_positions()["ch_pos"]["REF"]

    # Convert to BIDS using mne-bids
    for oldtask, newtask_template in task_map.items():
        newtask = newtask_template.format(stopping)
        bids_path = mne_bids.BIDSPath(
            subject=f"{sub[-2:]}", task=newtask, root=tmpdir, datatype="eeg"
        )

        # Get EEG file
        fname_vhdr = f"{sub}_task-{oldtask}_eeg.vhdr"
        fname_vhdr = op.join(bids_root, "sourcedata", f"{sub}", "eeg", fname_vhdr)
        raw = read_raw_brainvision(fname_vhdr, preload=False, verbose=False)
        raw.set_channel_types(
            {"HEOG": "eog", "VEOG": "eog", "ECG": "ecg"}, verbose=False
        )
        raw.set_montage(montage, verbose=False)
        raw.info["line_freq"] = 50

        # Potentially get bad channels from relative path
        if "mpib_sp_eeg" in bids_root:
            annotation_derivatives_path = "derivatives"
        else:
            annotation_derivatives_path = "code"
        badch_file = op.join(
            bids_root,
            annotation_derivatives_path,
            "annotation_derivatives",
            f"{sub}",
            f"{sub}_task-{newtask}_badchannels.txt",
        )
        if op.exists(badch_file):
            raw.load_bad_channels(badch_file, force=False)
        else:
            print(f"\nDid not find {badch_file}\n")

        # adjust high cutoff according to "True" high cutoff
        # The saved "1000" was only a setting that gets auto overridden
        # (verified via email to BrainProducts)
        raw.info["lowpass"] = 450

        # write to BIDS
        mne_bids.write_raw_bids(raw, bids_path, overwrite=True, verbose=False)

        # Copy wanted files from tmp to stable
        # coordsystem.json
        src = op.join(tmpdir, f"{sub}", "eeg", f"{sub}_coordsystem.json")
        dest = op.join(bids_root, f"{sub}", "eeg", f"{sub}_coordsystem.json")
        if not op.exists(dest) or overwrite:
            shutil.copyfile(src, dest)

        # eeg.json
        src = op.join(tmpdir, f"{sub}", "eeg", f"{sub}_task-{newtask}_eeg.json")
        dest = op.join(bids_root, f"task-{newtask}_eeg.json")
        if not op.exists(dest) or overwrite:
            shutil.copyfile(src, dest)

        # electrodes.tsv
        src = op.join(
            tmpdir,
            f"{sub}",
            "eeg",
            f"{sub}_electrodes.tsv",
        )
        dest = op.join(bids_root, f"{sub}", "eeg", f"{sub}_electrodes.tsv")
        if not op.exists(dest) or overwrite:
            # Add some info that mne-bids didn't add:
            # the electrodes ECG, HEOG, VEOG are actually 3 *pairs* of electrodes
            electrodes_df = pd.read_csv(src, sep="\t")

            bipolar_elecs = electrodes_df.loc[
                electrodes_df["name"].isin(["ECG", "HEOG", "VEOG"]), :
            ].copy()
            bipolar_elecs.loc[:, "name"] += "-"

            electrodes_df.loc[
                electrodes_df["name"].isin(["ECG", "HEOG", "VEOG"]), :
            ] += "+"
            electrodes_df = electrodes_df.append(bipolar_elecs, ignore_index=True)

            # Reference and ground electrode are dropped by mne-bids. Add back here.
            electrodes_df = electrodes_df.append(
                (pd.DataFrame(["Gnd"] + list(pos_gnd) + [np.nan]).T).rename(
                    columns=dict(
                        zip(range(len(electrodes_df.columns)), electrodes_df.columns)
                    )
                ),
                ignore_index=True,
            )
            electrodes_df = electrodes_df.append(
                (pd.DataFrame(["Ref"] + list(pos_ref) + [np.nan]).T).rename(
                    columns=dict(
                        zip(range(len(electrodes_df.columns)), electrodes_df.columns)
                    )
                ),
                ignore_index=True,
            )

            # try to fill impedance data for bipolar elecs, gnd, and ref
            with open(fname_vhdr, "r") as fin:
                lines = fin.readlines()

            # impedances are the last couple of lines
            for line in lines[-100:]:
                line_split = line.split(":")
                if len(line_split) != 2:
                    continue
                ch, impedance = line_split
                try:
                    impedance = int(impedance.strip())
                except ValueError:
                    impedance = np.nan
                if ch in [
                    "ECG+",
                    "ECG-",
                    "HEOG+",
                    "HEOG-",
                    "VEOG+",
                    "VEOG-",
                    "Gnd",
                    "Ref",
                ]:
                    electrodes_df.loc[
                        electrodes_df["name"] == ch, "impedance"
                    ] = impedance

            electrodes_df.to_csv(dest, index=False, na_rep="n/a", sep="\t")

        # channels.tsv
        src = op.join(
            tmpdir,
            f"{sub}",
            "eeg",
            f"{sub}_task-{newtask}_channels.tsv",
        )
        dest = op.join(
            bids_root,
            f"{sub}",
            "eeg",
            f"{sub}_task-{newtask}_channels.tsv",
        )
        if not op.exists(dest) or overwrite:
            # Add some info that mne-bids didn't add
            # specific status_description, and referencing schemes (especially for bipolar channels)
            # change type of heog to heog and veog to veog ... (from generic eog)
            channels_df = pd.read_csv(src, sep="\t")
            channels_df.loc[
                channels_df["status"] == "bad", "status_description"
            ] = "bad as judged by visual inspection"
            channels_df.insert(3, "reference", "FCz")
            channels_df.loc[channels_df["name"] == "ECG", "reference"] = "ECG+, ECG-"
            channels_df.loc[channels_df["name"] == "HEOG", "reference"] = "HEOG+, HEOG-"
            channels_df.loc[channels_df["name"] == "VEOG", "reference"] = "VEOG+, VEOG-"
            channels_df.loc[channels_df["name"] == "HEOG", "type"] = "HEOG"
            channels_df.loc[channels_df["name"] == "VEOG", "type"] = "VEOG"
            channels_df = channels_df[
                [
                    "name",
                    "type",
                    "units",
                    "description",
                    "sampling_frequency",
                    "reference",
                    "low_cutoff",
                    "high_cutoff",
                    "status",
                    "status_description",
                ]
            ]
            channels_df.to_csv(dest, index=False, na_rep="n/a", sep="\t")

        # scans.tsv
        src = op.join(
            tmpdir,
            f"{sub}",
            f"{sub}_scans.tsv",
        )
        dest = op.join(
            bids_root,
            f"{sub}",
            f"{sub}_scans.tsv",
        )
        if not op.exists(dest) or overwrite:
            shutil.copyfile(src, dest)

    # We are done, remove the temporary directory containing unneeded mne-bids outputs
    shutil.rmtree(tmpdir)


# %% Add more information to EEG JSON sidecar files after they are written
def enrich_eeg_json(bids_root, overwrite):
    """Add information to EEG JSON sidecar files."""
    # find sidecars for AF, AV, YF, YV, description
    sidecars = glob.glob(BIDS_ROOT + os.sep + "*_eeg.json")
    assert len(sidecars) == 5

    for sidecar in sidecars:
        # Read the file provided by mne-bids
        with open(sidecar, "r") as fin:
            sidecar_dict = json.load(fin)

        # Add information
        sidecar_dict["InstitutionName"] = "Max Plack Institute for Human Development"
        sidecar_dict["InstitutionAddress"] = "Lentzeallee 94, 14195 Berlin, Germany"
        sidecar_dict["Manufacturer"] = "Brain Products"
        sidecar_dict["ManufacturersModelName"] = "BrainAmp DC and BrainAmp ExG"
        sidecar_dict[
            "SoftwareVersions"
        ] = "BrainVision Recorder Professional   -   V. 1.21.0303"
        sidecar_dict[
            "EEGReference"
        ] = "For all EEG channels: FCz, for other channels, see channels.tsv"
        sidecar_dict[
            "EEGGround"
        ] = "For all EEG channels: Fpz, for all other channels: ca. 10cm above navel on right side of belly."
        sidecar_dict["CapManufacturer"] = "EasyCap"
        sidecar_dict["CapManufacturersModelName"] = "actiCAP 64 Ch Standard-2"
        sidecar_dict[
            "Instructions"
        ] = "Instructions can be found in the experiment code, here: https://doi.org/10.5281/zenodo.3354368"
        sidecar_dict["HardwareFilters"] = {
            "Highpass causal RC-filter": {
                "Description": "To prevent signals drifts. This filter is specified with a time constant of 10 seconds, which translates to a cutoff frequency of approximately 0.0159Hz.",
                "TimeConstant": "10s",
                "CutoffFrequency": "0.0159Hz",
                "RollOffSlopeAtCutoffFrequency": "6db/Oct",
            },
            "Lowpass causal Butterworth filter": {
                "Description": "For anti-aliasing, because the BrainAmp DC always records at 5000Hz sampling frequency, and this sampling frequency then gets downsampled (or not) depending on user settings.",
                "CutoffFrequency": "1000Hz",
                "RollOffSlopeAtCutoffFrequency": "30db/Oct",
            },
        }
        sidecar_dict["SoftwareFilters"] = {
            "Lowpass causal Butterworth filter": {
                "Description": "For anti-aliasing prior to downsampling from the default 5000Hz to 1000Hz, because in the case of this dataset, a sampling frequency of 1000Hz was selected in user settings.",
                "CutoffFrequency": "450Hz",
                "RollOffSlopeAtCutoffFrequency": "24db/Oct",
            }
        }

        for key in ["EMGChannelCount", "MiscChannelCount", "TriggerChannelCount"]:
            if key in sidecar_dict:
                del sidecar_dict[key]

        # Write enriched file back
        with open(sidecar, "w", encoding="utf-8") as fout:
            json.dump(sidecar_dict, fout, ensure_ascii=False, indent=4)


# %% Perform all formatting in parallel

if __name__ == "__main__":
    make_datacite_yml(BIDS_ROOT, OVERWRITE)
    # make_bids_validator_config(BIDS_ROOT, OVERWRITE)
    # make_bidsignore(BIDS_ROOT, OVERWRITE)
    make_README(BIDS_ROOT, OVERWRITE)
    # make_LICENSE(BIDS_ROOT, OVERWRITE)
    make_CHANGES(BIDS_ROOT, OVERWRITE)
    make_dataset_description(BIDS_ROOT, OVERWRITE)
    # make_phenotype(BIDS_ROOT, OVERWRITE)
    # make_participants(BIDS_ROOT, OVERWRITE)
    # make_events_json(BIDS_ROOT, OVERWRITE)

    just_json = True
    inputs = [(BIDS_ROOT, TASK_MAP, f"sub-{sub:02}", just_json, OVERWRITE) for sub in SUBJECTS]
    with multiprocessing.Pool(NJOBS) as pool:
        # pool.starmap(copy_eeg_and_events_files, inputs)
        pool.starmap(copy_eyetracking_files, inputs)
        # pool.starmap(coord_elec_chan_eegjson_scans, inputs)

    # enrich_eeg_json(BIDS_ROOT, OVERWRITE)