12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442 |
- """Convert the sourcedata of the experiment to BIDS format.
- The script is to be run from a /code folder, nested in the BIDS folder that
- also contains the /sourcedata folder.
- BIDS-folder
- ├── code
- │ └── source_to_bids.py
- │ └── environment.yml
- └── sourcedata
- │ └── ...
- │ └── ...
- ...
- You need to have the following software dependencies installed for this code to work:
- - mne_bids < 0.8
- - mne < 0.23
- All remaining dependencies will be installed automatically with the packages above.
- This code is licensed under MIT (https://opensource.org/licenses/MIT):
- Copyright 2022 Stefan Appelhoff
- Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- """
- # %% Imports and constants
- import glob
- import json
- import multiprocessing
- import os
- import os.path as op
- import pathlib
- import shutil
- from tempfile import mkdtemp
- import requests
- import mne_bids
- import numpy as np
- import pandas as pd
- from mne.channels import read_dig_captrak
- from mne.io import read_raw_brainvision
- from mne_bids.copyfiles import copyfile_brainvision
- # Adjust this path to where the bids directory is stored
- home = os.path.expanduser("~")
- is_ds_to_publish = "mpib_sp_eeg" in str(pathlib.Path(__file__).parent.absolute())
- if ("stefanappelhoff" in home) and (not is_ds_to_publish):
- BIDS_ROOT = os.path.join("/", "home", "stefanappelhoff", "Desktop", "sp_data")
- elif ("appelhoff" in home) and (not is_ds_to_publish):
- BIDS_ROOT = os.path.join("/", "vol2", "appelhoff", "sp_data")
- elif ("appelhoff" in home) and is_ds_to_publish:
- BIDS_ROOT = os.path.join("/", "vol2", "appelhoff", "mpib_sp_eeg")
- else:
- raise RuntimeError("Could not determine BIDS_ROOT. Please add your own.")
- OVERWRITE = True
- SUBJECTS = range(1, 41)
- NJOBS = max(1, min(multiprocessing.cpu_count() - 2, 40))
- TASK_MAP = {
- "spactive": "Active{}",
- "sppassive": "Yoked{}",
- "description": "description",
- }
- # %% Function for meanings of triggers in the experiment
- def provide_trigger_dict():
- """Provide a dictionnary mapping str names to byte values [1]_.
- References
- ----------
- .. [1] https://github.com/sappelhoff/sp_experiment/blob/master/sp_experiment/define_ttl_triggers.py # noqa: E501
- """
- trigger_dict = dict()
- # At the beginning and end of the experiment ... take these triggers to
- # crop the meaningful EEG data. Make sure to include some time BEFORE and
- # AFTER the triggers so that filtering does not introduce artifacts into
- # important parts.
- trigger_dict["trig_begin_experiment"] = bytes([1])
- trigger_dict["trig_end_experiment"] = bytes([2])
- # Indication when a new trial is started
- trigger_dict["trig_new_trl"] = bytes([3])
- # Wenever a new sample within a trial is started (fixation stim)
- trigger_dict["trig_sample_onset"] = bytes([4])
- # Whenever a choice is being inquired during sampling
- trigger_dict["trig_left_choice"] = bytes([5])
- trigger_dict["trig_right_choice"] = bytes([6])
- trigger_dict["trig_final_choice"] = bytes([7])
- # When displaying outcomes during sampling
- trigger_dict["trig_mask_out_l"] = bytes([8])
- trigger_dict["trig_show_out_l"] = bytes([9])
- trigger_dict["trig_mask_out_r"] = bytes([10])
- trigger_dict["trig_show_out_r"] = bytes([11])
- # Indication when a final choice is started
- trigger_dict["trig_new_final_choice"] = bytes([12])
- # Whenever a final choice is started (fixation stim)
- trigger_dict["trig_final_choice_onset"] = bytes([13])
- # Inquiring actions during CHOICE
- trigger_dict["trig_left_final_choice"] = bytes([14])
- trigger_dict["trig_right_final_choice"] = bytes([15])
- # Displaying outcomes during CHOICE
- trigger_dict["trig_mask_final_out_l"] = bytes([16])
- trigger_dict["trig_show_final_out_l"] = bytes([17])
- trigger_dict["trig_mask_final_out_r"] = bytes([18])
- trigger_dict["trig_show_final_out_r"] = bytes([19])
- # trigger for ERROR, when a trial has to be reset
- # (ignore all markers prior to this marker within this trial)
- trigger_dict["trig_error"] = bytes([20])
- # If the subject sampled a maximum of steps and now wants to take yet
- # another one, we force stop and initiate a final choice
- trigger_dict["trig_forced_stop"] = bytes([21])
- # If subject tried to make a final choice before taking at least one sample
- trigger_dict["trig_premature_stop"] = bytes([22])
- # Display the block feedback
- trigger_dict["trig_block_feedback"] = bytes([23])
- return trigger_dict
- # %% Making a datacite.yml file for GIN
- def make_datacite_yml(bids_root, overwrite):
- """Make a datacite.yml file."""
- txt = """# Metadata for DOI registration according to DataCite Metadata Schema 4.1.
- # For detailed schema description see https://doi.org/10.5438/0014
- ## Required fields
- # The main researchers involved. Include digital identifier (e.g., ORCID)
- # if possible, including the prefix to indicate its type.
- authors:
- -
- firstname: "Stefan"
- lastname: "Appelhoff"
- affiliation: "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany"
- id: "ORCID:0000-0001-8002-0877"
- -
- firstname: "Ralph"
- lastname: "Hertwig"
- affiliation: "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany"
- id: "ORCID:0000-0002-9908-9556"
- -
- firstname: "Bernhard"
- lastname: "Spitzer"
- affiliation: "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany"
- id: "ORCID:0000-0001-9752-932X"
- # A title to describe the published resource.
- title: "The mpib_sp_eeg dataset"
- # Additional information about the resource, e.g., a brief abstract.
- description: |
- When acquiring information about choice alternatives, decision makers may have varying levels of control over which
- and how much information they sample before making a choice. How does subjective control over sampling affect the
- quality of experience-based decisions?
- This resource contains behavioral, eyetracking, and EEG data of 40 human participants performing a
- numerical sampling task in which the level of subjective control over sampling was systematically varied.
- The dataset is organized according to the Brain Imaging Data Structure (BIDS).
- # List of keywords the resource should be associated with.
- # Give as many keywords as possible, to make the resource findable.
- keywords:
- - cognitive neuroscience
- - decisions from experience
- - DFE
- - sampling paradigm
- - sequential sampling
- - decision-making
- - control
- - EEG
- - electroencephalography
- - eyetracking
- - BIDS
- - Brain Imaging Data Structure
- # License information for this resource. Please provide the license name and/or a link to the license.
- # Please add also a corresponding LICENSE file to the repository.
- license:
- name: "Open Data Commons Public Domain Dedication and License (PDDL) v1.0"
- url: "https://opendatacommons.org/licenses/pddl/1-0/"
- ## Optional Fields
- # Funding information for this resource.
- # Separate funder name and grant number by comma.
- funding:
- - "Max Planck Institute for Human Development"
- # Related publications. reftype might be: IsSupplementTo, IsDescribedBy, IsReferencedBy.
- # Please provide digital identifier (e.g., DOI) if possible.
- # Add a prefix to the ID, separated by a colon, to indicate the source.
- # Supported sources are: DOI, arXiv, PMID
- # In the citation field, please provide the full reference, including title, authors, journal etc.
- references:
- -
- id: "doi:10.1101/2021.06.03.446960"
- reftype: "IsSupplementTo"
- citation: "Control over sampling boosts numerical evidence processing in human decisions from experience Stefan Appelhoff, Ralph Hertwig, Bernhard Spitzer bioRxiv 2021.06.03.446960"
- -
- id: "doi:10.5281/zenodo.3361717"
- reftype: "IsReferencedBy"
- citation: "Stefan Appelhoff. (2019, August 6). sappelhoff/sp_experiment: v1.0 (Version v1.0). Zenodo. http://doi.org/10.5281/zenodo.3361717"
- -
- id: "doi:10.5281/zenodo.5929222"
- reftype: "IsReferencedBy"
- citation: "Stefan Appelhoff. (2022). sappelhoff/sp_code: 1.0.0 (1.0.0). Zenodo. https://doi.org/10.5281/zenodo.5929223"
- # Resource type. Default is Dataset, other possible values are Software, DataPaper, Image, Text.
- resourcetype: Dataset
- # Do not edit or remove the following line
- templateversion: 1.2
- """
- fname = op.join(bids_root, "datacite.yml")
- if op.exists(fname) and not overwrite:
- return
- with open(fname, "w", encoding="utf-8") as fout:
- fout.write(txt)
- # %% Making a .bidsignore file
- def make_bidsignore(bids_root, overwrite):
- """Make a .bidsignore file."""
- txt = """README.md
- datacite.yml
- """
- fname = op.join(bids_root, ".bidsignore")
- if op.exists(fname) and not overwrite:
- return
- with open(fname, "w", encoding="utf-8") as fout:
- fout.write(txt)
- # %% Making a .bids-validator-config.json file to ignore some known warnings in the validator
- def make_bids_validator_config(bids_root, overwrite):
- """Make a .bidsconfig.json file."""
- # fmt: off
- # switch off the following warnings, because they don't make sense for this dataset.
- # README is README.md
- # Subjects are naturally inconsistent, because the study is a mixed design with a between factor
- bids_validator_config_json = {
- "ignore": [
- 101, # [WARN] The recommended file /README is missing. See Section 03 (Modality agnostic files) of the BIDS specification. (code: 101 - README_FILE_MISSING)
- 38, # [WARN] Not all subjects contain the same files. Each subject should contain the same number of files with the same naming unless some files are known to be missing. (code: 38 - INCONSISTENT_SUBJECTS)
- ]}
- # fmt: on
- fname = op.join(bids_root, ".bids-validator-config.json")
- if not op.exists(fname) or overwrite:
- with open(fname, "w", encoding="utf-8") as fout:
- json.dump(bids_validator_config_json, fout, ensure_ascii=False, indent=4)
- # %% Copying EEG files
- def copy_eeg_and_events_files(bids_root, task_map, sub, just_json, overwrite):
- """Copy and rename the EEG and events files per subject.
- Parameters
- ----------
- bids_root : str
- Path to the root of the bids dir.
- sub : str
- The subject entity to work on, for example "sub-01".
- task_map : dict
- A mapping between old task names, and templates for the
- new task names.
- just_json : bool
- Whether or not to only touch the json files.
- overwrite : bool
- If True, overwrite existing files.
- """
- # map from old to new
- for old_task, new_task_template in task_map.items():
- fname_old = f"{sub}_task-{old_task}_eeg.vhdr"
- sub_id = int(sub[-2:])
- stop_policy = "Variable" if sub_id % 2 == 0 else "Fixed"
- new_task = new_task_template.format(stop_policy)
- fname_new = f"{sub}_task-{new_task}_eeg.vhdr"
- src = op.join(bids_root, "sourcedata", sub, "eeg", fname_old)
- dest_dir = op.join(bids_root, sub, "eeg")
- os.makedirs(dest_dir, exist_ok=True)
- dest = op.join(dest_dir, fname_new)
- # Copy EEG data
- if not just_json:
- if op.exists(src) and (not op.exists(dest) or overwrite):
- copyfile_brainvision(src, dest)
- # Copy and rename events.tsv
- src = src.replace("_eeg.vhdr", "_events.tsv")
- dest = dest.replace("_eeg.vhdr", "_events.tsv")
- # For description task, we need to fix nan -> n/a
- if "task-description" in src:
- tmpdf = pd.read_csv(src, sep="\t")
- if overwrite:
- tmpdf.to_csv(dest, index=False, na_rep="n/a", sep="\t")
- else:
- # if not description, we can simply copy over
- if op.exists(src) and (not op.exists(dest) or overwrite):
- shutil.copyfile(src, dest)
- # %% Making a README
- def make_README(bids_root, overwrite):
- """Write a README.md file."""
- txt = """# The `mpib_sp_eeg` dataset
- This is the readme of the `mpib_sp_eeg` dataset. The short dataset name results from these three facts:
- - the data was collected at the Max Planck Institute for Human Development (MPIB)
- - the behavioral task was the "Sampling Paradigm" (SP)
- - the dataset's main neuroimaging modality is electroencephalography data (EEG)
- The data was collected in 2019 at the MPIB in Berlin by Stefan Appelhoff and colleagues.
- The data is organized according to the Brain Imaging Data Structure, see: https://bids.neuroimaging.io
- The dataset is managed with datalad, see: http://handbook.datalad.org/en/latest/index.html
- ## Download
- 1. Install datalad (http://handbook.datalad.org/en/latest/intro/installation.html)
- 2. Run the code below from the shell:
- 1. first "clone" the dataset
- 2. then navigate to the root of the dataset
- 3. then use `datalad get <file you want>` to get the file contents for each file you want
- (you can also use `datalad get . -r` to get everything at once, but this may take some time)
- ```shell
- datalad clone https://gin.g-node.org/sappelhoff/mpib_sp_eeg
- cd mpib_sp_eeg
- datalad get participants.tsv
- ```
- ## Preprint
- A preprint is available on BioRxiv.
- - BioRxiv: https://doi.org/10.1101/2021.06.03.446960
- ## Experimental presentation code
- The code used for the experimental presentation can be found on GitHub and Zenodo.
- - GitHub: https://github.com/sappelhoff/sp_experiment
- - Zenodo: https://doi.org/10.5281/zenodo.3354368
- ## Analysis code
- The code used for data analysis can be found on GitHub and on Zenodo.
- - GitHub: https://github.com/sappelhoff/sp_code
- - Zenodo: https://doi.org/10.5281/zenodo.5929222
- ## Contact
- - [Stefan Appelhoff](mailto:appelhoff@mpib-berlin.mpg.de)
- ## License
- The `source_to_bids.py` script in the `code/` directory is licensed under the MIT license.
- This data is made available under the Public Domain Dedication and License v1.0
- whose full text can be found at: http://opendatacommons.org/licenses/pddl/1.0/
- See also this human readable summary: https://opendatacommons.org/licenses/pddl/summary/
- For details, please see the [LICENSE](LICENSE) file.
- ## Using this dataset
- If you use this dataset in your work, please consider citing it as well as the main references describing it.
- ## Additional information
- - The eyetracking recording for sub-18 in the ActiveVariable task is broken ("sub-18/eeg/sub-18_task-ActiveVariable_recording-eyetracking_physio.tsv.gz").
- - The eyetracking recording for sub-15 in the YokedFixed task for unknown reasons has timing issues ("sub-15/eeg/sub-15_task-YokedFixed_recording-eyetracking_physio.tsv.gz").
- - All bipolar channels (ECG, HEOG, VEOG) were recorded with a ground electrode placed 10cm away from the navel on the participant's right side of the belly.
- - The following describes the approximate locations of the ECG, HEOG, and VEOG electrodes:
- - ECG- between the 5th and 6th rib on the left chest.
- - ECG+ in the middle of the upper chest
- - HEOG- 1cm from the left outer canthus
- - HEOG+ 1cm from the right outer canthus
- - VEOG- 2cm below the left eye
- - VEOG+ 1cm above the left eyebrow
- """
- fname = op.join(bids_root, "README.md")
- if op.exists(fname) and not overwrite:
- return
- with open(fname, "w", encoding="utf-8") as fout:
- fout.write(txt)
- # %% Making a dataset_description.json
- def make_dataset_description(bids_root, overwrite):
- """Make a dataset_description.json."""
- # Prepare and write participants JSON
- dataset_description_json = {
- "Name": "mpib_sp_eeg",
- "BIDSVersion": "1.6.0",
- "DatasetType": "raw",
- "License": "PDDL",
- "Authors": [
- "Stefan Appelhoff",
- "Ralph Hertwig",
- "Bernhard Spitzer",
- ],
- "Acknowledgements": "We thank Agnessa Karapetian, Clara Wicharz, Jann Wäscher, Yoonsang Lee, and Zhiqi Kang for help with data collection, Dirk Ostwald and Casper Kerrén for helpful discussions and feedback, and Susannah Goss for editorial assistance.",
- "HowToAcknowledge": "Please cite https://doi.org/10.1101/2021.06.03.446960",
- "EthicsApprovals": [
- "The study was approved by the ethics committee of the Max Planck Institute for Human Development, Berlin, Germany."
- ],
- "ReferencesAndLinks": [
- "https://doi.org/10.1101/2021.06.03.446960",
- "https://doi.org/10.5281/zenodo.3354368",
- "https://doi.org/10.5281/zenodo.5929222"
- ],
- "DatasetDOI": "https://gin.g-node.org/sappelhoff/mpib_sp_eeg/",
- }
- fname = op.join(bids_root, "dataset_description.json")
- if not op.exists(fname) or overwrite:
- with open(fname, "w", encoding="utf-8") as fout:
- json.dump(dataset_description_json, fout, ensure_ascii=False, indent=4)
- fout.write("\n")
- # %% Make LICENSE
- def make_LICENSE(bids_root, overwrite):
- """Make LICENSE file."""
- response = requests.get("https://opendatacommons.org/licenses/pddl/pddl-10.txt")
- fname = op.join(bids_root, "LICENSE")
- if not op.exists(fname) or overwrite:
- with open(fname, "w", encoding="utf-8") as fout:
- fout.write(response.text)
- # %% Make CHANGES
- def make_CHANGES(bids_root, overwrite):
- """Make CHANGES file."""
- txt = """1.0.0 2021-05-25
- - Initial release
- 1.0.1 2021-06-04
- - Updated link to preprint in dataset_description.json and datacite.yml
- - Added Manufacturer and ManufacturersModelName to physio.json (Tobii 4C eyetracker)
- - Rephrased acknowledgements in dataset_description.json
- - Updated code/environment.yml
- 1.0.2 2022-02-01
- - Minor updates to README, source_to_bids.py, datacite.yml, dataset_description.json
- in order to add links to other resources, and minor wording fixes
- - Removed unneeded code/environment.yml
- - The `source_to_bids.py` script is now licensed under the MIT license
- - Make dataset available under PDDL
- """
- fname = op.join(bids_root, "CHANGES")
- if not op.exists(fname) or overwrite:
- with open(fname, "w", encoding="utf-8") as fout:
- fout.write(txt)
- # %% Make phenotype
- def make_phenotype(bids_root, overwrite):
- """Make phenotype directory for BNT data."""
- # Make phenotype directory
- os.makedirs(op.join(BIDS_ROOT, "phenotype"), exist_ok=True)
- # the phenotypo data was read by a human from the handwritten PDFs
- # in the /sourcedata and recorded here.
- # NOTE: When participants answered with floats, the answer was rounded to integer.
- # When they answered in percentage and a count was needed, ...
- # that count was calculated using the percentage.
- # fmt: off
- data = {
- "participant_id": [f'sub-{subj:02}' for subj in range(1, 41)],
- "q1": [25, 35, 25, 76, 25, 25, 25, 25, 25, 25, 3, 10, 25, 25, 25, 10, 25, 25, 10, 40,
- 25, 25, 25, 40, 10, 5, 13, 25, 25, 25, 25, 25, 30, 40, 30, 20, 25, 40, 20, 10],
- "q2": [20, 20, 20, 42, 20, 20, 23, 35, 23, 20, 35, 20, 33, 35, 46, 21, 22, 25, 47, 23,
- 20, 20, 25, 35, 23, 30, 35, 19, 20, 26, 20, 20, 35, 20, 12, 23, 21, 35, 20, 35],
- "q3": [50, 5, 80, 35, 50, 20, 50, 50, 50, 50, 4, 4, 50, 20, np.nan, 5, 8, 7, 50, 5, 50,
- 80, 8, 30, 4, 80, 2, 23, 9, 80, 15, 80, 95, 10, 20, 5, 95, 80, 4, 30],
- "q4": [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40, 30,
- 30, 30, 40, 30, 33, 30, np.nan, 30, 30, 30, 30, 200, 10, 33, 30, 30, 30, 50, 30],
- }
- # fmt: on
- # Fill correct or not based on the true answers
- q1_correct = 25
- q2_correct = 20
- q3_correct = 50
- q4_correct = 30
- for question, correct in zip(
- ["q1", "q2", "q3", "q4"], [q1_correct, q2_correct, q3_correct, q4_correct]
- ):
- data[f"{question}_correct"] = (np.array(data[f"{question}"]) == correct).astype(
- int
- )
- # Make dataframe and save as TSV
- phenotype_tsv = pd.DataFrame(data=data)
- fname = op.join(BIDS_ROOT, "phenotype", "berlin_numeracy_test.tsv")
- if not op.exists(fname) or overwrite:
- phenotype_tsv.to_csv(fname, index=False, na_rep="n/a", sep="\t")
- # Prepare the JSON file
- phenotype_json = {
- "MeasurementToolMetadata": {
- "Description": "Berlin Numeracy Test - Pen and Paper Version",
- "TermURL": "http://www.riskliteracy.org/researchers/",
- },
- "participant_id": {
- "LongName": "participant identification",
- "Description": "identification number of the participant",
- },
- "q1": {
- "LongName": "question two",
- "Description": "Out of 1,000 people in a small town 500 are members of a choir. Out of these 500 members in the choir 100 are men. Out of the 500 inhabitants that are not in the choir 300 are men. What is the probability that a randomly drawn man is a member of the choir? (please indicate the probability in percent).",
- },
- "q1_correct": {
- "LongName": "question two correct",
- "Description": "was question two answered correctly with 25?",
- "Levels": {0: False, 1: True},
- },
- "q2": {
- "LongName": "question three",
- "Description": "Imagine we are throwing a loaded die (6 sides). The probability that the die shows a 6 is twice as high as the probability of each of the other numbers. On average, out of these 70 throws, how many times would the die show the number 6?",
- },
- "q2_correct": {
- "LongName": "question three correct",
- "Description": "was question three answered correctly with 20?",
- "Levels": {0: False, 1: True},
- },
- "q3": {
- "LongName": "question four",
- "Description": "In a forest 20% of mushrooms are red, 50% brown and 30% white. A red mushroom is poisonous with a probability of 20%. A mushroom that is not red is poisonous with a probability of 5%. What is the probability that a poisonous mushroom in the forest is red?",
- },
- "q3_correct": {
- "LongName": "question four correct",
- "Description": "was question four answered correctly with 50?",
- "Levels": {0: False, 1: True},
- },
- "q4": {
- "LongName": "question one",
- "Description": "Imagine we are throwing a five-sided die 50 times. On average, out of these 50 throws how many times would this five-sided die show an odd number (1, 3 or 5)?",
- },
- "q4_correct": {
- "LongName": "question one correct",
- "Description": "was question one answered correctly with 30?",
- "Levels": {0: False, 1: True},
- },
- }
- fname = op.join(bids_root, "phenotype", "berlin_numeracy_test.json")
- if not op.exists(fname) or overwrite:
- with open(fname, "w", encoding="utf-8") as fout:
- json.dump(phenotype_json, fout, ensure_ascii=False, indent=4)
- # %% Make events.json file
- # Descriptions are taken from here:
- # https://github.com/sappelhoff/sp_experiment/blob/6e50de2ecde4b8d13f267cb5eff5451578372a89/sp_experiment/define_variable_meanings.py#L12-L258
- def make_events_json_dict():
- """Provide a dict to describe all collected variables."""
- # Get the trigger values
- trigger_dict = provide_trigger_dict()
- events_json_dict = dict()
- # Add stimulus presentation information
- events_json_dict["StimulusPresentation"] = {
- "OperatingSystem": "Windows 10 - Version 1903",
- "SoftwareName": "PsychoPy",
- "SoftwareRRID": "SCR_006571",
- "SoftwareVersion": "3.0.0",
- "Code": "doi:10.5281/zenodo.3354368"
- }
- # Start populating the dict
- events_json_dict["onset"] = {
- "Description": "onset of the event",
- "Units": "seconds",
- }
- events_json_dict["duration"] = {
- "Description": "duration of the event",
- "Units": "seconds",
- }
- events_json_dict["trial"] = {
- "Description": "zero indexed trial counter, where a trial is a sequence of steps that ends with a final choice."
- }
- events_json_dict["action_type"] = {
- "Description": "type of the action that the subject performed at this event within a trial",
- "Levels": {
- "sample": "the subject sampled either the left or the right option",
- "stop": "the subject decided to stop sampling the options and instead use the next action for a final choice",
- "forced_stop": "the subject took a maximum of samples and wanted to take another one, so we force stopped in this turn",
- "premature_stop": "the subject tried to stop sampling before taking a single sample. This lead to an error.",
- "final_choice": "the subject chose either the left or the right option as a final choice",
- },
- }
- events_json_dict["action"] = {
- "Description": "the concrete action that the subject performed for the action type",
- "Levels": {
- "0": "the subject picked the *left* option",
- "1": "the subject picked the *right* option",
- "2": "the subject decided to stop sampling - for action_type *stop* only",
- },
- }
- events_json_dict["outcome"] = {
- "Description": "the outcome that the subject received for their action. Numbers in the range 1 to 9.",
- }
- events_json_dict["response_time"] = {
- "Description": "the time it took the subject to respond after the onset of the event",
- "Units": "milliseconds",
- }
- events_json_dict["value"] = {
- "Description": "the TTL trigger value (=EEG marker value) associated with an event",
- "Levels": {
- trigger_dict["trig_begin_experiment"]: "beginning of the experiment",
- trigger_dict["trig_end_experiment"]: "end of the experiment",
- trigger_dict[
- "trig_new_trl"
- ]: "color of fixcross is changed to indicate start of new trial",
- trigger_dict[
- "trig_sample_onset"
- ]: "onset of new sample within a trial (fixcross changes to white color)",
- trigger_dict["trig_left_choice"]: "subject chose *left* during sampling",
- trigger_dict["trig_right_choice"]: "subject chose *right* during sampling",
- trigger_dict["trig_final_choice"]: "subject chose *stop* during sampling",
- trigger_dict[
- "trig_mask_out_l"
- ]: "a masked outcome is shown after sampling (left side)",
- trigger_dict[
- "trig_show_out_l"
- ]: "an outcome is revealed after sampling (left side)",
- trigger_dict[
- "trig_mask_out_r"
- ]: "a masked outcome is shown after sampling (right side)",
- trigger_dict[
- "trig_show_out_r"
- ]: "an outcome is revealed after sampling (right side)",
- trigger_dict[
- "trig_new_final_choice"
- ]: "color of fixcross is changed to indicate start of a final choice",
- trigger_dict[
- "trig_final_choice_onset"
- ]: "onset of new final choice at the end of trial (fixcross changes to white color)",
- trigger_dict[
- "trig_left_final_choice"
- ]: "subject chose *left* for final choice",
- trigger_dict[
- "trig_right_final_choice"
- ]: "subject chose *right* for final choice",
- trigger_dict[
- "trig_mask_final_out_l"
- ]: "a masked outcome is shown after final choice (left side)",
- trigger_dict[
- "trig_show_final_out_l"
- ]: "an outcome is revealed after final choice (left side)",
- trigger_dict[
- "trig_mask_final_out_r"
- ]: "a masked outcome is shown after final choice (right side)",
- trigger_dict[
- "trig_show_final_out_r"
- ]: "an outcome is revealed after final choice (right side)",
- trigger_dict[
- "trig_error"
- ]: "color of fixcross is changed to indicate an error (ignore all markers prior to this marker within this trial)",
- trigger_dict[
- "trig_forced_stop"
- ]: "subject took the maximum number of samples and wanted to take yet another one",
- trigger_dict[
- "trig_premature_stop"
- ]: "subject tried to make a final choice before taking at least one sample",
- trigger_dict["trig_block_feedback"]: "block feedback is displayed",
- },
- }
- events_json_dict["mag0_1"] = {
- "LongName": "magnitude 0_1",
- "Description": "the first of two possible magnitudes in outcomes for option 0",
- }
- events_json_dict["prob0_1"] = {
- "LongName": "probability 0_1",
- "Description": "the first of two possible probabilities in outcomes for option 0",
- }
- events_json_dict["mag0_2"] = {
- "LongName": "magnitude 0_2",
- "Description": "the second of two possible magnitudes in outcomes for option 0",
- }
- events_json_dict["prob0_2"] = {
- "LongName": "probability 0_2",
- "Description": "the second of two possible probabilities in outcomes for option 0",
- }
- events_json_dict["mag1_1"] = {
- "LongName": "magnitude 1_1",
- "Description": "the first of two possible magnitudes in outcomes for option 1",
- }
- events_json_dict["prob1_1"] = {
- "LongName": "probability 1_1",
- "Description": "the first of two possible probabilities in outcomes for option 1",
- }
- events_json_dict["mag1_2"] = {
- "LongName": "magnitude 1_2",
- "Description": "the second of two possible magnitudes in outcomes for option 1",
- }
- events_json_dict["prob1_2"] = {
- "LongName": "probability 1_2",
- "Description": "the second of two possible probabilities in outcomes for option 1",
- }
- events_json_dict["version"] = {
- "Description": "version of the experiment used for collecting this data."
- }
- events_json_dict["reset"] = {
- "Description": "boolean that describes whether of not to ignore events prior to this event in the current trial.",
- "Levels": {
- "0": "so far no error in this trial since the beginning or the last error",
- "1": "error committed: disregard all events prior to this event for the current trial.",
- },
- }
- events_json_dict["system_time_stamp"] = {
- "Description": "system time in microseconds as measured from an arbitrary starting point. This should be used to connect the event with the eyetracking data.",
- "Units": "microseconds",
- }
- # Keys in levels for "value" are bytes: we need to turn them into integers
- events_json_dict["value"]["Levels"] = {
- ord(key): val for key, val in events_json_dict["value"]["Levels"].items()
- }
- # return
- return events_json_dict
- def make_description_task_json():
- """Provide variable meanings for description task.
- This is heavily based on the sampling paradigm tasks. We overwrite a few
- of the descriptions to make more sense for the description task.
- """
- # Get the definitions from sampling paradigm
- events_json_dict = make_events_json_dict()
- # Overwrite some values
- events_json_dict["trial"][
- "Description"
- ] = "zero indexed trial counter, where a trial index points to the lottery setting that was used in this event by comparing with the trial column in the spactive task."
- # Remove all action types except "final choice"
- for level in ["sample", "stop", "forced_stop", "premature_stop"]:
- events_json_dict["action_type"]["Levels"].pop(level)
- # Remove possible action "2", indicating a stop: Stopping is not possible
- # in the descriptions task. Only "0"(=pick left) and "1"(=pick right)
- for level in ["2"]:
- events_json_dict["action"]["Levels"].pop(level)
- # Some trigger values are not occurring in the descriptions task
- trigger_dict = provide_trigger_dict()
- for level in [
- ord(trigger_dict["trig_sample_onset"]),
- ord(trigger_dict["trig_left_choice"]),
- ord(trigger_dict["trig_right_choice"]),
- ord(trigger_dict["trig_final_choice"]),
- ord(trigger_dict["trig_mask_out_l"]),
- ord(trigger_dict["trig_show_out_r"]),
- ord(trigger_dict["trig_new_final_choice"]),
- ord(trigger_dict["trig_forced_stop"]),
- ord(trigger_dict["trig_premature_stop"]),
- ]:
- events_json_dict["value"]["Levels"].pop(level)
- return events_json_dict
- def make_events_json(bids_root, overwrite):
- """Make events.json files."""
- # Prepare and write events JSON for all non-description tasks
- events_json = make_events_json_dict()
- tasks = ["ActiveFixed", "ActiveVariable", "YokedFixed", "YokedVariable"]
- for taskname in tasks:
- fname = op.join(bids_root, f"task-{taskname}_events.json")
- if not op.exists(fname) or overwrite:
- with open(fname, "w", encoding="utf-8") as fout:
- json.dump(events_json, fout, ensure_ascii=False, indent=4)
- # now a slightly adjusted one for description task
- events_json_descr = make_description_task_json()
- taskname = "description"
- fname = op.join(bids_root, f"task-{taskname}_events.json")
- if not op.exists(fname) or overwrite:
- with open(fname, "w", encoding="utf-8") as fout:
- json.dump(events_json_descr, fout, ensure_ascii=False, indent=4)
- # %% Making participants files
- def make_participants(bids_root, overwrite):
- """Make participants.tsv and participants.json files."""
- # Read participant data from the log files
- fname_template = op.join(bids_root, "sourcedata", "sub-{0:02}/log_{0}_active.txt")
- subj = []
- age = []
- sex = []
- subjects = range(1, 41)
- for isubj in subjects:
- fname = fname_template.format(isubj)
- with open(fname, "r") as fin:
- lines = fin.readlines()
- subj.append(f"sub-{isubj:02}")
- age.append(int(lines[1].strip().split(": ")[-1]))
- sex.append(lines[2].strip().split(": ")[-1][0].lower())
- # Some data can be filled from our knowledge about the experiment
- # See log files in sourcedata
- handedness = np.repeat(["r"], 40)
- seed = np.repeat(np.arange(1, 11), 4)
- # https://stackoverflow.com/a/33802213/5201771
- # pick n consecutive items every m items
- yoked_to = np.tile(np.arange(1, 41).reshape(-1, 4)[:, :2], (1, 2)).reshape(-1)
- yoked_to = [f"sub-{s:02}" for s in yoked_to]
- start_condition = np.tile(["active"] * 2 + ["yoked"] * 2, 10)
- stopping = np.tile(["fixed", "variable"], 20)
- data = {
- "participant_id": subj,
- "sex": sex,
- "age": age,
- "handedness": handedness,
- "seed": seed,
- "yoked_to": yoked_to,
- "start_condition": start_condition,
- "stopping": stopping,
- }
- participants_tsv = pd.DataFrame(data=data)
- # Write participants TSV
- fname = op.join(bids_root, "participants.tsv")
- if not op.exists(fname) or overwrite:
- participants_tsv.to_csv(fname, index=False, na_rep="n/a", sep="\t")
- # Prepare and write participants JSON
- participants_json = {
- "participant_id": {"Description": "Unique participant identifier."},
- "age": {"Description": "The age of the participant.", "Units": "years"},
- "sex": {
- "Description": "The biological sex of the participant as judged by the experimenter.",
- "Levels": {"m": "male", "f": "female"},
- },
- "handedness": {
- "Description": "The handedness of the participant as reported by the participant",
- "Levels": {"l": "left", "r": "right"},
- },
- "seed": {
- "Description": "The random seed used to initialize the experiment. Participants that share a seed, saw the same underlying distributions per trial."
- },
- "yoked_to": {
- "Description": "The participant_id to which the participant was yoked in the yoked sampling condition. Some participants were yoked to themselves (i.e., they saw a replay of their own active sampling condition)."
- },
- "start_condition": {
- "Description": "The experimental condition with which the participant started the overall experiment. If the experiment was started with the active sampling condition, the yoked sampling condition was done afterwards and vice versa. After the two sampling conditions, the description task and the berlin numeracy task followed (not counterbalanced).",
- "Levels": {
- "active": "active sampling condition",
- "yoked": "yoked sampling condition",
- },
- },
- "stopping": {
- "Description": "The between factor of the experiment. Some participants always had to draw 12 samples ('fixed'), others could draw between 1 and 19 samples ('variable').",
- "Levels": {
- "fixed": "fixed stopping after 12 samples",
- "variable": "variable stopping between 1 and 19 samples",
- },
- },
- }
- fname = op.join(bids_root, "participants.json")
- if not op.exists(fname) or overwrite:
- with open(fname, "w", encoding="utf-8") as fout:
- json.dump(participants_json, fout, ensure_ascii=False, indent=4)
- # %% Preparing and copying the eyetracking files
- def copy_eyetracking_files(bids_root, task_map, sub, just_json, overwrite):
- """Format eyetracking files.
- Parameters
- ----------
- bids_root : str
- Path to the root of the bids dir.
- sub : str
- The subject entity to work on, for example "sub-01".
- task_map : dict
- A mapping between old task names, and templates for the
- new task names.
- just_json : bool
- Whether or not to only touch the json files.
- overwrite : bool
- If True, overwrite existing files.
- """
- # Define relevant columns
- cols = [
- "device_time_stamp",
- "system_time_stamp",
- "left_gaze_point_on_display_area_x",
- "left_gaze_point_on_display_area_y",
- "left_gaze_point_validity",
- "left_pupil_diameter",
- "left_pupil_validity",
- "right_gaze_point_on_display_area_x",
- "right_gaze_point_on_display_area_y",
- "right_gaze_point_validity",
- "right_pupil_diameter",
- "right_pupil_validity",
- ]
- # Object for physio.json --> needs to be written once for each task and subject.
- # StartTime could be estimated by looking into the
- # events.tsv file and compare the `system_time_stamp` column with the same
- # column from the eyetracking files.
- physio_json = {
- "SamplingFrequency": 90, # Tobii 4C eyetracker is not "research grade", so this sfreq is not guaranteed to be stable apparently.
- "StartTime": "n/a",
- "Columns": cols,
- "Manufacturer": "Tobii Technology, Sweden",
- "ManfacturersModelName": "4C",
- "device_time_stamp": {
- "Description": "time stamp according to the eyetracker's internal clock",
- "Units": "µs",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazeData.html#details",
- },
- "system_time_stamp": {
- "Description": "time stamp according to the computer's internal clock",
- "Units": "µs",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazeData.html#details",
- },
- "left_gaze_point_on_display_area_x": {
- "Description": "X value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
- },
- "left_gaze_point_on_display_area_y": {
- "Description": "Y value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
- },
- "left_gaze_point_validity": {
- "Description": "validity of the left gaze point data",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#afce85e70ee3e1a53e0eae66cf2fecc30",
- "Levels": {"0": False, "1": True},
- },
- "left_pupil_diameter": {
- "Description": "diameter of the left pupil in millimeters",
- "Units": "mm",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
- },
- "left_pupil_validity": {
- "Description": "validity of the left pupil data",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
- "Levels": {"0": False, "1": True},
- },
- "right_gaze_point_on_display_area_x": {
- "Description": "X value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
- },
- "right_gaze_point_on_display_area_y": {
- "Description": "Y value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
- },
- "right_gaze_point_validity": {
- "Description": "validity of the right gaze point data",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#afce85e70ee3e1a53e0eae66cf2fecc30",
- "Levels": {"0": False, "1": True},
- },
- "right_pupil_diameter": {
- "Description": "diameter of the right pupil in millimeters",
- "Units": "mm",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
- },
- "right_pupil_validity": {
- "Description": "validity of the right pupil data",
- "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
- "Levels": {"0": False, "1": True},
- },
- }
- for old_task, new_task in TASK_MAP.items():
- # Get file, and prepare new file name
- fname_old = f"{sub}_task-{old_task}_eyetracking.tsv"
- sub_id = int(sub[-2:])
- stop_policy = "Variable" if sub_id % 2 == 0 else "Fixed"
- new_task = new_task.format(stop_policy)
- fname_new = f"{sub}_task-{new_task}_recording-eyetracking_physio.tsv.gz"
- dest_dir = op.join(bids_root, sub, "eeg")
- os.makedirs(dest_dir, exist_ok=True)
- dest = op.join(dest_dir, fname_new)
- if op.exists(dest) and not overwrite:
- continue
- # Read data
- src = op.join(bids_root, "sourcedata", sub, "eyetracking", fname_old)
- df = pd.read_csv(src, sep="\t")
- # Convert str of tuple X Y gazepoint to two float columns
- for direction in ["left", "right"]:
- tmp = (
- df[f"{direction}_gaze_point_on_display_area"]
- .str.lstrip("(")
- .str.rstrip(")")
- .str.split(",", expand=True)
- )
- df[f"{direction}_gaze_point_on_display_area_x"] = (
- tmp[0].to_numpy().astype("float")
- )
- df[f"{direction}_gaze_point_on_display_area_y"] = (
- tmp[1].to_numpy().astype("float")
- )
- # Select only relevant columns and write
- df = df[cols]
- if not just_json:
- # skip writing if we only write (update) JSONs
- df.to_csv(
- dest, sep="\t", header=False, na_rep="n/a", index=False, compression="gzip"
- )
- # Write one physio.json per task - needed because each has a different StartTime
- # First, try to get StartTime
- events_fname = op.join(
- bids_root, sub, "eeg", f"{sub}_task-{new_task}_events.tsv"
- )
- if op.exists(events_fname):
- # We calculate StartTime as difference between the first
- # measured timestamps in seconds
- events_df = pd.read_csv(events_fname, sep="\t")
- first_event_timestamp = events_df["system_time_stamp"].to_list()[0]
- first_physio_timestamp = df["system_time_stamp"].to_list()[0]
- try:
- # timestamps must be int, and event is measured *after* physio, so
- # it must be bigger
- assert isinstance(first_event_timestamp, int)
- assert isinstance(first_physio_timestamp, int)
- assert first_event_timestamp > first_physio_timestamp
- starttime_seconds = (
- first_physio_timestamp - first_event_timestamp
- ) / 1e6
- assert starttime_seconds < 0
- physio_json["StartTime"] = starttime_seconds
- except AssertionError:
- if "sub-18_task-Active" in events_fname:
- # Eyetracking recording for sub-18 is broken and full of NaN
- physio_json["StartTime"] = 0.0
- else:
- print(
- f"Calculation of StartTime failed for {events_fname}:\n{first_event_timestamp}\n{first_physio_timestamp}"
- )
- else:
- print(
- f"Did not find events file, could not calculate StartTime for physio.json:\n{events_fname}"
- )
- # Now write
- fname = op.join(
- bids_root,
- sub,
- "eeg",
- f"{sub}_task-{new_task}_recording-eyetracking_physio.json",
- )
- if not op.exists(fname) or overwrite:
- with open(fname, "w", encoding="utf-8") as fout:
- json.dump(physio_json, fout, ensure_ascii=False, indent=4)
- # %% Coordsystem, electrodes, channels, eeg sidecar, and scans files
- def coord_elec_chan_eegjson_scans(bids_root, task_map, sub, just_json, overwrite):
- """Write/Convert several files to BIDS.
- Convert the following files:
- - electrodes.tsv --> per subj
- - coordsystem.json --> per subj
- - channels.tsv --> per subj/task
- - eeg.json --> per task
- Parameters
- ----------
- bids_root : str
- Path to the root of the bids dir.
- sub : str
- The subject entity to work on, for example "sub-01".
- task_map : dict
- A mapping between old task names, and templates for the
- new task names.
- just_json : bool
- This parameter is currently not implemented and passing it has no effect.
- overwrite : bool
- If True, overwrite existing files.
- """
- # Temporary directory to store all mne-bids outputs
- tmpdir = mkdtemp(prefix=f"sp_bids_tmp_{sub}_")
- # data needed for conversion
- df = pd.read_csv(op.join(bids_root, "participants.tsv"), sep="\t")
- # What stopping group did this sub belong to
- stopping = (
- df[df["participant_id"] == f"{sub}"]["stopping"].to_list()[0].capitalize()
- )
- # Get montage file
- fname_bvct = f"CapTrakResultFile_{sub}.bvct"
- fname_bvct = op.join(bids_root, "sourcedata", f"{sub}", "coords", fname_bvct)
- montage = read_dig_captrak(fname_bvct)
- # Get ground and reference positions separately, because they get dropped
- # at raw.set_montage() otherwise
- pos_gnd = montage.get_positions()["ch_pos"]["GND"]
- pos_ref = montage.get_positions()["ch_pos"]["REF"]
- # Convert to BIDS using mne-bids
- for oldtask, newtask_template in task_map.items():
- newtask = newtask_template.format(stopping)
- bids_path = mne_bids.BIDSPath(
- subject=f"{sub[-2:]}", task=newtask, root=tmpdir, datatype="eeg"
- )
- # Get EEG file
- fname_vhdr = f"{sub}_task-{oldtask}_eeg.vhdr"
- fname_vhdr = op.join(bids_root, "sourcedata", f"{sub}", "eeg", fname_vhdr)
- raw = read_raw_brainvision(fname_vhdr, preload=False, verbose=False)
- raw.set_channel_types(
- {"HEOG": "eog", "VEOG": "eog", "ECG": "ecg"}, verbose=False
- )
- raw.set_montage(montage, verbose=False)
- raw.info["line_freq"] = 50
- # Potentially get bad channels from relative path
- if "mpib_sp_eeg" in bids_root:
- annotation_derivatives_path = "derivatives"
- else:
- annotation_derivatives_path = "code"
- badch_file = op.join(
- bids_root,
- annotation_derivatives_path,
- "annotation_derivatives",
- f"{sub}",
- f"{sub}_task-{newtask}_badchannels.txt",
- )
- if op.exists(badch_file):
- raw.load_bad_channels(badch_file, force=False)
- else:
- print(f"\nDid not find {badch_file}\n")
- # adjust high cutoff according to "True" high cutoff
- # The saved "1000" was only a setting that gets auto overridden
- # (verified via email to BrainProducts)
- raw.info["lowpass"] = 450
- # write to BIDS
- mne_bids.write_raw_bids(raw, bids_path, overwrite=True, verbose=False)
- # Copy wanted files from tmp to stable
- # coordsystem.json
- src = op.join(tmpdir, f"{sub}", "eeg", f"{sub}_coordsystem.json")
- dest = op.join(bids_root, f"{sub}", "eeg", f"{sub}_coordsystem.json")
- if not op.exists(dest) or overwrite:
- shutil.copyfile(src, dest)
- # eeg.json
- src = op.join(tmpdir, f"{sub}", "eeg", f"{sub}_task-{newtask}_eeg.json")
- dest = op.join(bids_root, f"task-{newtask}_eeg.json")
- if not op.exists(dest) or overwrite:
- shutil.copyfile(src, dest)
- # electrodes.tsv
- src = op.join(
- tmpdir,
- f"{sub}",
- "eeg",
- f"{sub}_electrodes.tsv",
- )
- dest = op.join(bids_root, f"{sub}", "eeg", f"{sub}_electrodes.tsv")
- if not op.exists(dest) or overwrite:
- # Add some info that mne-bids didn't add:
- # the electrodes ECG, HEOG, VEOG are actually 3 *pairs* of electrodes
- electrodes_df = pd.read_csv(src, sep="\t")
- bipolar_elecs = electrodes_df.loc[
- electrodes_df["name"].isin(["ECG", "HEOG", "VEOG"]), :
- ].copy()
- bipolar_elecs.loc[:, "name"] += "-"
- electrodes_df.loc[
- electrodes_df["name"].isin(["ECG", "HEOG", "VEOG"]), :
- ] += "+"
- electrodes_df = electrodes_df.append(bipolar_elecs, ignore_index=True)
- # Reference and ground electrode are dropped by mne-bids. Add back here.
- electrodes_df = electrodes_df.append(
- (pd.DataFrame(["Gnd"] + list(pos_gnd) + [np.nan]).T).rename(
- columns=dict(
- zip(range(len(electrodes_df.columns)), electrodes_df.columns)
- )
- ),
- ignore_index=True,
- )
- electrodes_df = electrodes_df.append(
- (pd.DataFrame(["Ref"] + list(pos_ref) + [np.nan]).T).rename(
- columns=dict(
- zip(range(len(electrodes_df.columns)), electrodes_df.columns)
- )
- ),
- ignore_index=True,
- )
- # try to fill impedance data for bipolar elecs, gnd, and ref
- with open(fname_vhdr, "r") as fin:
- lines = fin.readlines()
- # impedances are the last couple of lines
- for line in lines[-100:]:
- line_split = line.split(":")
- if len(line_split) != 2:
- continue
- ch, impedance = line_split
- try:
- impedance = int(impedance.strip())
- except ValueError:
- impedance = np.nan
- if ch in [
- "ECG+",
- "ECG-",
- "HEOG+",
- "HEOG-",
- "VEOG+",
- "VEOG-",
- "Gnd",
- "Ref",
- ]:
- electrodes_df.loc[
- electrodes_df["name"] == ch, "impedance"
- ] = impedance
- electrodes_df.to_csv(dest, index=False, na_rep="n/a", sep="\t")
- # channels.tsv
- src = op.join(
- tmpdir,
- f"{sub}",
- "eeg",
- f"{sub}_task-{newtask}_channels.tsv",
- )
- dest = op.join(
- bids_root,
- f"{sub}",
- "eeg",
- f"{sub}_task-{newtask}_channels.tsv",
- )
- if not op.exists(dest) or overwrite:
- # Add some info that mne-bids didn't add
- # specific status_description, and referencing schemes (especially for bipolar channels)
- # change type of heog to heog and veog to veog ... (from generic eog)
- channels_df = pd.read_csv(src, sep="\t")
- channels_df.loc[
- channels_df["status"] == "bad", "status_description"
- ] = "bad as judged by visual inspection"
- channels_df.insert(3, "reference", "FCz")
- channels_df.loc[channels_df["name"] == "ECG", "reference"] = "ECG+, ECG-"
- channels_df.loc[channels_df["name"] == "HEOG", "reference"] = "HEOG+, HEOG-"
- channels_df.loc[channels_df["name"] == "VEOG", "reference"] = "VEOG+, VEOG-"
- channels_df.loc[channels_df["name"] == "HEOG", "type"] = "HEOG"
- channels_df.loc[channels_df["name"] == "VEOG", "type"] = "VEOG"
- channels_df = channels_df[
- [
- "name",
- "type",
- "units",
- "description",
- "sampling_frequency",
- "reference",
- "low_cutoff",
- "high_cutoff",
- "status",
- "status_description",
- ]
- ]
- channels_df.to_csv(dest, index=False, na_rep="n/a", sep="\t")
- # scans.tsv
- src = op.join(
- tmpdir,
- f"{sub}",
- f"{sub}_scans.tsv",
- )
- dest = op.join(
- bids_root,
- f"{sub}",
- f"{sub}_scans.tsv",
- )
- if not op.exists(dest) or overwrite:
- shutil.copyfile(src, dest)
- # We are done, remove the temporary directory containing unneeded mne-bids outputs
- shutil.rmtree(tmpdir)
- # %% Add more information to EEG JSON sidecar files after they are written
- def enrich_eeg_json(bids_root, overwrite):
- """Add information to EEG JSON sidecar files."""
- # find sidecars for AF, AV, YF, YV, description
- sidecars = glob.glob(BIDS_ROOT + os.sep + "*_eeg.json")
- assert len(sidecars) == 5
- for sidecar in sidecars:
- # Read the file provided by mne-bids
- with open(sidecar, "r") as fin:
- sidecar_dict = json.load(fin)
- # Add information
- sidecar_dict["InstitutionName"] = "Max Plack Institute for Human Development"
- sidecar_dict["InstitutionAddress"] = "Lentzeallee 94, 14195 Berlin, Germany"
- sidecar_dict["Manufacturer"] = "Brain Products"
- sidecar_dict["ManufacturersModelName"] = "BrainAmp DC and BrainAmp ExG"
- sidecar_dict[
- "SoftwareVersions"
- ] = "BrainVision Recorder Professional - V. 1.21.0303"
- sidecar_dict[
- "EEGReference"
- ] = "For all EEG channels: FCz, for other channels, see channels.tsv"
- sidecar_dict[
- "EEGGround"
- ] = "For all EEG channels: Fpz, for all other channels: ca. 10cm above navel on right side of belly."
- sidecar_dict["CapManufacturer"] = "EasyCap"
- sidecar_dict["CapManufacturersModelName"] = "actiCAP 64 Ch Standard-2"
- sidecar_dict[
- "Instructions"
- ] = "Instructions can be found in the experiment code, here: https://doi.org/10.5281/zenodo.3354368"
- sidecar_dict["HardwareFilters"] = {
- "Highpass causal RC-filter": {
- "Description": "To prevent signals drifts. This filter is specified with a time constant of 10 seconds, which translates to a cutoff frequency of approximately 0.0159Hz.",
- "TimeConstant": "10s",
- "CutoffFrequency": "0.0159Hz",
- "RollOffSlopeAtCutoffFrequency": "6db/Oct",
- },
- "Lowpass causal Butterworth filter": {
- "Description": "For anti-aliasing, because the BrainAmp DC always records at 5000Hz sampling frequency, and this sampling frequency then gets downsampled (or not) depending on user settings.",
- "CutoffFrequency": "1000Hz",
- "RollOffSlopeAtCutoffFrequency": "30db/Oct",
- },
- }
- sidecar_dict["SoftwareFilters"] = {
- "Lowpass causal Butterworth filter": {
- "Description": "For anti-aliasing prior to downsampling from the default 5000Hz to 1000Hz, because in the case of this dataset, a sampling frequency of 1000Hz was selected in user settings.",
- "CutoffFrequency": "450Hz",
- "RollOffSlopeAtCutoffFrequency": "24db/Oct",
- }
- }
- for key in ["EMGChannelCount", "MiscChannelCount", "TriggerChannelCount"]:
- if key in sidecar_dict:
- del sidecar_dict[key]
- # Write enriched file back
- with open(sidecar, "w", encoding="utf-8") as fout:
- json.dump(sidecar_dict, fout, ensure_ascii=False, indent=4)
- # %% Perform all formatting in parallel
- if __name__ == "__main__":
- make_datacite_yml(BIDS_ROOT, OVERWRITE)
- # make_bids_validator_config(BIDS_ROOT, OVERWRITE)
- # make_bidsignore(BIDS_ROOT, OVERWRITE)
- make_README(BIDS_ROOT, OVERWRITE)
- # make_LICENSE(BIDS_ROOT, OVERWRITE)
- make_CHANGES(BIDS_ROOT, OVERWRITE)
- make_dataset_description(BIDS_ROOT, OVERWRITE)
- # make_phenotype(BIDS_ROOT, OVERWRITE)
- # make_participants(BIDS_ROOT, OVERWRITE)
- # make_events_json(BIDS_ROOT, OVERWRITE)
- just_json = True
- inputs = [(BIDS_ROOT, TASK_MAP, f"sub-{sub:02}", just_json, OVERWRITE) for sub in SUBJECTS]
- with multiprocessing.Pool(NJOBS) as pool:
- # pool.starmap(copy_eeg_and_events_files, inputs)
- pool.starmap(copy_eyetracking_files, inputs)
- # pool.starmap(coord_elec_chan_eegjson_scans, inputs)
- # enrich_eeg_json(BIDS_ROOT, OVERWRITE)
|