source_to_bids.py 60 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442
  1. """Convert the sourcedata of the experiment to BIDS format.
  2. The script is to be run from a /code folder, nested in the BIDS folder that
  3. also contains the /sourcedata folder.
  4. BIDS-folder
  5. ├── code
  6. │   └── source_to_bids.py
  7. │   └── environment.yml
  8. └── sourcedata
  9. │   └── ...
  10. │   └── ...
  11. ...
  12. You need to have the following software dependencies installed for this code to work:
  13. - mne_bids < 0.8
  14. - mne < 0.23
  15. All remaining dependencies will be installed automatically with the packages above.
  16. This code is licensed under MIT (https://opensource.org/licenses/MIT):
  17. Copyright 2022 Stefan Appelhoff
  18. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  19. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  20. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. """
  22. # %% Imports and constants
  23. import glob
  24. import json
  25. import multiprocessing
  26. import os
  27. import os.path as op
  28. import pathlib
  29. import shutil
  30. from tempfile import mkdtemp
  31. import requests
  32. import mne_bids
  33. import numpy as np
  34. import pandas as pd
  35. from mne.channels import read_dig_captrak
  36. from mne.io import read_raw_brainvision
  37. from mne_bids.copyfiles import copyfile_brainvision
  38. # Adjust this path to where the bids directory is stored
  39. home = os.path.expanduser("~")
  40. is_ds_to_publish = "mpib_sp_eeg" in str(pathlib.Path(__file__).parent.absolute())
  41. if ("stefanappelhoff" in home) and (not is_ds_to_publish):
  42. BIDS_ROOT = os.path.join("/", "home", "stefanappelhoff", "Desktop", "sp_data")
  43. elif ("appelhoff" in home) and (not is_ds_to_publish):
  44. BIDS_ROOT = os.path.join("/", "vol2", "appelhoff", "sp_data")
  45. elif ("appelhoff" in home) and is_ds_to_publish:
  46. BIDS_ROOT = os.path.join("/", "vol2", "appelhoff", "mpib_sp_eeg")
  47. else:
  48. raise RuntimeError("Could not determine BIDS_ROOT. Please add your own.")
  49. OVERWRITE = True
  50. SUBJECTS = range(1, 41)
  51. NJOBS = max(1, min(multiprocessing.cpu_count() - 2, 40))
  52. TASK_MAP = {
  53. "spactive": "Active{}",
  54. "sppassive": "Yoked{}",
  55. "description": "description",
  56. }
  57. # %% Function for meanings of triggers in the experiment
  58. def provide_trigger_dict():
  59. """Provide a dictionnary mapping str names to byte values [1]_.
  60. References
  61. ----------
  62. .. [1] https://github.com/sappelhoff/sp_experiment/blob/master/sp_experiment/define_ttl_triggers.py # noqa: E501
  63. """
  64. trigger_dict = dict()
  65. # At the beginning and end of the experiment ... take these triggers to
  66. # crop the meaningful EEG data. Make sure to include some time BEFORE and
  67. # AFTER the triggers so that filtering does not introduce artifacts into
  68. # important parts.
  69. trigger_dict["trig_begin_experiment"] = bytes([1])
  70. trigger_dict["trig_end_experiment"] = bytes([2])
  71. # Indication when a new trial is started
  72. trigger_dict["trig_new_trl"] = bytes([3])
  73. # Wenever a new sample within a trial is started (fixation stim)
  74. trigger_dict["trig_sample_onset"] = bytes([4])
  75. # Whenever a choice is being inquired during sampling
  76. trigger_dict["trig_left_choice"] = bytes([5])
  77. trigger_dict["trig_right_choice"] = bytes([6])
  78. trigger_dict["trig_final_choice"] = bytes([7])
  79. # When displaying outcomes during sampling
  80. trigger_dict["trig_mask_out_l"] = bytes([8])
  81. trigger_dict["trig_show_out_l"] = bytes([9])
  82. trigger_dict["trig_mask_out_r"] = bytes([10])
  83. trigger_dict["trig_show_out_r"] = bytes([11])
  84. # Indication when a final choice is started
  85. trigger_dict["trig_new_final_choice"] = bytes([12])
  86. # Whenever a final choice is started (fixation stim)
  87. trigger_dict["trig_final_choice_onset"] = bytes([13])
  88. # Inquiring actions during CHOICE
  89. trigger_dict["trig_left_final_choice"] = bytes([14])
  90. trigger_dict["trig_right_final_choice"] = bytes([15])
  91. # Displaying outcomes during CHOICE
  92. trigger_dict["trig_mask_final_out_l"] = bytes([16])
  93. trigger_dict["trig_show_final_out_l"] = bytes([17])
  94. trigger_dict["trig_mask_final_out_r"] = bytes([18])
  95. trigger_dict["trig_show_final_out_r"] = bytes([19])
  96. # trigger for ERROR, when a trial has to be reset
  97. # (ignore all markers prior to this marker within this trial)
  98. trigger_dict["trig_error"] = bytes([20])
  99. # If the subject sampled a maximum of steps and now wants to take yet
  100. # another one, we force stop and initiate a final choice
  101. trigger_dict["trig_forced_stop"] = bytes([21])
  102. # If subject tried to make a final choice before taking at least one sample
  103. trigger_dict["trig_premature_stop"] = bytes([22])
  104. # Display the block feedback
  105. trigger_dict["trig_block_feedback"] = bytes([23])
  106. return trigger_dict
  107. # %% Making a datacite.yml file for GIN
  108. def make_datacite_yml(bids_root, overwrite):
  109. """Make a datacite.yml file."""
  110. txt = """# Metadata for DOI registration according to DataCite Metadata Schema 4.1.
  111. # For detailed schema description see https://doi.org/10.5438/0014
  112. ## Required fields
  113. # The main researchers involved. Include digital identifier (e.g., ORCID)
  114. # if possible, including the prefix to indicate its type.
  115. authors:
  116. -
  117. firstname: "Stefan"
  118. lastname: "Appelhoff"
  119. affiliation: "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany"
  120. id: "ORCID:0000-0001-8002-0877"
  121. -
  122. firstname: "Ralph"
  123. lastname: "Hertwig"
  124. affiliation: "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany"
  125. id: "ORCID:0000-0002-9908-9556"
  126. -
  127. firstname: "Bernhard"
  128. lastname: "Spitzer"
  129. affiliation: "Center for Adaptive Rationality, Max Planck Institute for Human Development, Berlin, Germany"
  130. id: "ORCID:0000-0001-9752-932X"
  131. # A title to describe the published resource.
  132. title: "The mpib_sp_eeg dataset"
  133. # Additional information about the resource, e.g., a brief abstract.
  134. description: |
  135. When acquiring information about choice alternatives, decision makers may have varying levels of control over which
  136. and how much information they sample before making a choice. How does subjective control over sampling affect the
  137. quality of experience-based decisions?
  138. This resource contains behavioral, eyetracking, and EEG data of 40 human participants performing a
  139. numerical sampling task in which the level of subjective control over sampling was systematically varied.
  140. The dataset is organized according to the Brain Imaging Data Structure (BIDS).
  141. # List of keywords the resource should be associated with.
  142. # Give as many keywords as possible, to make the resource findable.
  143. keywords:
  144. - cognitive neuroscience
  145. - decisions from experience
  146. - DFE
  147. - sampling paradigm
  148. - sequential sampling
  149. - decision-making
  150. - control
  151. - EEG
  152. - electroencephalography
  153. - eyetracking
  154. - BIDS
  155. - Brain Imaging Data Structure
  156. # License information for this resource. Please provide the license name and/or a link to the license.
  157. # Please add also a corresponding LICENSE file to the repository.
  158. license:
  159. name: "Open Data Commons Public Domain Dedication and License (PDDL) v1.0"
  160. url: "https://opendatacommons.org/licenses/pddl/1-0/"
  161. ## Optional Fields
  162. # Funding information for this resource.
  163. # Separate funder name and grant number by comma.
  164. funding:
  165. - "Max Planck Institute for Human Development"
  166. # Related publications. reftype might be: IsSupplementTo, IsDescribedBy, IsReferencedBy.
  167. # Please provide digital identifier (e.g., DOI) if possible.
  168. # Add a prefix to the ID, separated by a colon, to indicate the source.
  169. # Supported sources are: DOI, arXiv, PMID
  170. # In the citation field, please provide the full reference, including title, authors, journal etc.
  171. references:
  172. -
  173. id: "doi:10.1101/2021.06.03.446960"
  174. reftype: "IsSupplementTo"
  175. citation: "Control over sampling boosts numerical evidence processing in human decisions from experience Stefan Appelhoff, Ralph Hertwig, Bernhard Spitzer bioRxiv 2021.06.03.446960"
  176. -
  177. id: "doi:10.5281/zenodo.3361717"
  178. reftype: "IsReferencedBy"
  179. citation: "Stefan Appelhoff. (2019, August 6). sappelhoff/sp_experiment: v1.0 (Version v1.0). Zenodo. http://doi.org/10.5281/zenodo.3361717"
  180. -
  181. id: "doi:10.5281/zenodo.5929222"
  182. reftype: "IsReferencedBy"
  183. citation: "Stefan Appelhoff. (2022). sappelhoff/sp_code: 1.0.0 (1.0.0). Zenodo. https://doi.org/10.5281/zenodo.5929223"
  184. # Resource type. Default is Dataset, other possible values are Software, DataPaper, Image, Text.
  185. resourcetype: Dataset
  186. # Do not edit or remove the following line
  187. templateversion: 1.2
  188. """
  189. fname = op.join(bids_root, "datacite.yml")
  190. if op.exists(fname) and not overwrite:
  191. return
  192. with open(fname, "w", encoding="utf-8") as fout:
  193. fout.write(txt)
  194. # %% Making a .bidsignore file
  195. def make_bidsignore(bids_root, overwrite):
  196. """Make a .bidsignore file."""
  197. txt = """README.md
  198. datacite.yml
  199. """
  200. fname = op.join(bids_root, ".bidsignore")
  201. if op.exists(fname) and not overwrite:
  202. return
  203. with open(fname, "w", encoding="utf-8") as fout:
  204. fout.write(txt)
  205. # %% Making a .bids-validator-config.json file to ignore some known warnings in the validator
  206. def make_bids_validator_config(bids_root, overwrite):
  207. """Make a .bidsconfig.json file."""
  208. # fmt: off
  209. # switch off the following warnings, because they don't make sense for this dataset.
  210. # README is README.md
  211. # Subjects are naturally inconsistent, because the study is a mixed design with a between factor
  212. bids_validator_config_json = {
  213. "ignore": [
  214. 101, # [WARN] The recommended file /README is missing. See Section 03 (Modality agnostic files) of the BIDS specification. (code: 101 - README_FILE_MISSING)
  215. 38, # [WARN] Not all subjects contain the same files. Each subject should contain the same number of files with the same naming unless some files are known to be missing. (code: 38 - INCONSISTENT_SUBJECTS)
  216. ]}
  217. # fmt: on
  218. fname = op.join(bids_root, ".bids-validator-config.json")
  219. if not op.exists(fname) or overwrite:
  220. with open(fname, "w", encoding="utf-8") as fout:
  221. json.dump(bids_validator_config_json, fout, ensure_ascii=False, indent=4)
  222. # %% Copying EEG files
  223. def copy_eeg_and_events_files(bids_root, task_map, sub, just_json, overwrite):
  224. """Copy and rename the EEG and events files per subject.
  225. Parameters
  226. ----------
  227. bids_root : str
  228. Path to the root of the bids dir.
  229. sub : str
  230. The subject entity to work on, for example "sub-01".
  231. task_map : dict
  232. A mapping between old task names, and templates for the
  233. new task names.
  234. just_json : bool
  235. Whether or not to only touch the json files.
  236. overwrite : bool
  237. If True, overwrite existing files.
  238. """
  239. # map from old to new
  240. for old_task, new_task_template in task_map.items():
  241. fname_old = f"{sub}_task-{old_task}_eeg.vhdr"
  242. sub_id = int(sub[-2:])
  243. stop_policy = "Variable" if sub_id % 2 == 0 else "Fixed"
  244. new_task = new_task_template.format(stop_policy)
  245. fname_new = f"{sub}_task-{new_task}_eeg.vhdr"
  246. src = op.join(bids_root, "sourcedata", sub, "eeg", fname_old)
  247. dest_dir = op.join(bids_root, sub, "eeg")
  248. os.makedirs(dest_dir, exist_ok=True)
  249. dest = op.join(dest_dir, fname_new)
  250. # Copy EEG data
  251. if not just_json:
  252. if op.exists(src) and (not op.exists(dest) or overwrite):
  253. copyfile_brainvision(src, dest)
  254. # Copy and rename events.tsv
  255. src = src.replace("_eeg.vhdr", "_events.tsv")
  256. dest = dest.replace("_eeg.vhdr", "_events.tsv")
  257. # For description task, we need to fix nan -> n/a
  258. if "task-description" in src:
  259. tmpdf = pd.read_csv(src, sep="\t")
  260. if overwrite:
  261. tmpdf.to_csv(dest, index=False, na_rep="n/a", sep="\t")
  262. else:
  263. # if not description, we can simply copy over
  264. if op.exists(src) and (not op.exists(dest) or overwrite):
  265. shutil.copyfile(src, dest)
  266. # %% Making a README
  267. def make_README(bids_root, overwrite):
  268. """Write a README.md file."""
  269. txt = """# The `mpib_sp_eeg` dataset
  270. This is the readme of the `mpib_sp_eeg` dataset. The short dataset name results from these three facts:
  271. - the data was collected at the Max Planck Institute for Human Development (MPIB)
  272. - the behavioral task was the "Sampling Paradigm" (SP)
  273. - the dataset's main neuroimaging modality is electroencephalography data (EEG)
  274. The data was collected in 2019 at the MPIB in Berlin by Stefan Appelhoff and colleagues.
  275. The data is organized according to the Brain Imaging Data Structure, see: https://bids.neuroimaging.io
  276. The dataset is managed with datalad, see: http://handbook.datalad.org/en/latest/index.html
  277. ## Download
  278. 1. Install datalad (http://handbook.datalad.org/en/latest/intro/installation.html)
  279. 2. Run the code below from the shell:
  280. 1. first "clone" the dataset
  281. 2. then navigate to the root of the dataset
  282. 3. then use `datalad get <file you want>` to get the file contents for each file you want
  283. (you can also use `datalad get . -r` to get everything at once, but this may take some time)
  284. ```shell
  285. datalad clone https://gin.g-node.org/sappelhoff/mpib_sp_eeg
  286. cd mpib_sp_eeg
  287. datalad get participants.tsv
  288. ```
  289. ## Preprint
  290. A preprint is available on BioRxiv.
  291. - BioRxiv: https://doi.org/10.1101/2021.06.03.446960
  292. ## Experimental presentation code
  293. The code used for the experimental presentation can be found on GitHub and Zenodo.
  294. - GitHub: https://github.com/sappelhoff/sp_experiment
  295. - Zenodo: https://doi.org/10.5281/zenodo.3354368
  296. ## Analysis code
  297. The code used for data analysis can be found on GitHub and on Zenodo.
  298. - GitHub: https://github.com/sappelhoff/sp_code
  299. - Zenodo: https://doi.org/10.5281/zenodo.5929222
  300. ## Contact
  301. - [Stefan Appelhoff](mailto:appelhoff@mpib-berlin.mpg.de)
  302. ## License
  303. The `source_to_bids.py` script in the `code/` directory is licensed under the MIT license.
  304. This data is made available under the Public Domain Dedication and License v1.0
  305. whose full text can be found at: http://opendatacommons.org/licenses/pddl/1.0/
  306. See also this human readable summary: https://opendatacommons.org/licenses/pddl/summary/
  307. For details, please see the [LICENSE](LICENSE) file.
  308. ## Using this dataset
  309. If you use this dataset in your work, please consider citing it as well as the main references describing it.
  310. ## Additional information
  311. - The eyetracking recording for sub-18 in the ActiveVariable task is broken ("sub-18/eeg/sub-18_task-ActiveVariable_recording-eyetracking_physio.tsv.gz").
  312. - The eyetracking recording for sub-15 in the YokedFixed task for unknown reasons has timing issues ("sub-15/eeg/sub-15_task-YokedFixed_recording-eyetracking_physio.tsv.gz").
  313. - All bipolar channels (ECG, HEOG, VEOG) were recorded with a ground electrode placed 10cm away from the navel on the participant's right side of the belly.
  314. - The following describes the approximate locations of the ECG, HEOG, and VEOG electrodes:
  315. - ECG- between the 5th and 6th rib on the left chest.
  316. - ECG+ in the middle of the upper chest
  317. - HEOG- 1cm from the left outer canthus
  318. - HEOG+ 1cm from the right outer canthus
  319. - VEOG- 2cm below the left eye
  320. - VEOG+ 1cm above the left eyebrow
  321. """
  322. fname = op.join(bids_root, "README.md")
  323. if op.exists(fname) and not overwrite:
  324. return
  325. with open(fname, "w", encoding="utf-8") as fout:
  326. fout.write(txt)
  327. # %% Making a dataset_description.json
  328. def make_dataset_description(bids_root, overwrite):
  329. """Make a dataset_description.json."""
  330. # Prepare and write participants JSON
  331. dataset_description_json = {
  332. "Name": "mpib_sp_eeg",
  333. "BIDSVersion": "1.6.0",
  334. "DatasetType": "raw",
  335. "License": "PDDL",
  336. "Authors": [
  337. "Stefan Appelhoff",
  338. "Ralph Hertwig",
  339. "Bernhard Spitzer",
  340. ],
  341. "Acknowledgements": "We thank Agnessa Karapetian, Clara Wicharz, Jann Wäscher, Yoonsang Lee, and Zhiqi Kang for help with data collection, Dirk Ostwald and Casper Kerrén for helpful discussions and feedback, and Susannah Goss for editorial assistance.",
  342. "HowToAcknowledge": "Please cite https://doi.org/10.1101/2021.06.03.446960",
  343. "EthicsApprovals": [
  344. "The study was approved by the ethics committee of the Max Planck Institute for Human Development, Berlin, Germany."
  345. ],
  346. "ReferencesAndLinks": [
  347. "https://doi.org/10.1101/2021.06.03.446960",
  348. "https://doi.org/10.5281/zenodo.3354368",
  349. "https://doi.org/10.5281/zenodo.5929222"
  350. ],
  351. "DatasetDOI": "https://gin.g-node.org/sappelhoff/mpib_sp_eeg/",
  352. }
  353. fname = op.join(bids_root, "dataset_description.json")
  354. if not op.exists(fname) or overwrite:
  355. with open(fname, "w", encoding="utf-8") as fout:
  356. json.dump(dataset_description_json, fout, ensure_ascii=False, indent=4)
  357. fout.write("\n")
  358. # %% Make LICENSE
  359. def make_LICENSE(bids_root, overwrite):
  360. """Make LICENSE file."""
  361. response = requests.get("https://opendatacommons.org/licenses/pddl/pddl-10.txt")
  362. fname = op.join(bids_root, "LICENSE")
  363. if not op.exists(fname) or overwrite:
  364. with open(fname, "w", encoding="utf-8") as fout:
  365. fout.write(response.text)
  366. # %% Make CHANGES
  367. def make_CHANGES(bids_root, overwrite):
  368. """Make CHANGES file."""
  369. txt = """1.0.0 2021-05-25
  370. - Initial release
  371. 1.0.1 2021-06-04
  372. - Updated link to preprint in dataset_description.json and datacite.yml
  373. - Added Manufacturer and ManufacturersModelName to physio.json (Tobii 4C eyetracker)
  374. - Rephrased acknowledgements in dataset_description.json
  375. - Updated code/environment.yml
  376. 1.0.2 2022-02-01
  377. - Minor updates to README, source_to_bids.py, datacite.yml, dataset_description.json
  378. in order to add links to other resources, and minor wording fixes
  379. - Removed unneeded code/environment.yml
  380. - The `source_to_bids.py` script is now licensed under the MIT license
  381. - Make dataset available under PDDL
  382. """
  383. fname = op.join(bids_root, "CHANGES")
  384. if not op.exists(fname) or overwrite:
  385. with open(fname, "w", encoding="utf-8") as fout:
  386. fout.write(txt)
  387. # %% Make phenotype
  388. def make_phenotype(bids_root, overwrite):
  389. """Make phenotype directory for BNT data."""
  390. # Make phenotype directory
  391. os.makedirs(op.join(BIDS_ROOT, "phenotype"), exist_ok=True)
  392. # the phenotypo data was read by a human from the handwritten PDFs
  393. # in the /sourcedata and recorded here.
  394. # NOTE: When participants answered with floats, the answer was rounded to integer.
  395. # When they answered in percentage and a count was needed, ...
  396. # that count was calculated using the percentage.
  397. # fmt: off
  398. data = {
  399. "participant_id": [f'sub-{subj:02}' for subj in range(1, 41)],
  400. "q1": [25, 35, 25, 76, 25, 25, 25, 25, 25, 25, 3, 10, 25, 25, 25, 10, 25, 25, 10, 40,
  401. 25, 25, 25, 40, 10, 5, 13, 25, 25, 25, 25, 25, 30, 40, 30, 20, 25, 40, 20, 10],
  402. "q2": [20, 20, 20, 42, 20, 20, 23, 35, 23, 20, 35, 20, 33, 35, 46, 21, 22, 25, 47, 23,
  403. 20, 20, 25, 35, 23, 30, 35, 19, 20, 26, 20, 20, 35, 20, 12, 23, 21, 35, 20, 35],
  404. "q3": [50, 5, 80, 35, 50, 20, 50, 50, 50, 50, 4, 4, 50, 20, np.nan, 5, 8, 7, 50, 5, 50,
  405. 80, 8, 30, 4, 80, 2, 23, 9, 80, 15, 80, 95, 10, 20, 5, 95, 80, 4, 30],
  406. "q4": [30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 40, 30,
  407. 30, 30, 40, 30, 33, 30, np.nan, 30, 30, 30, 30, 200, 10, 33, 30, 30, 30, 50, 30],
  408. }
  409. # fmt: on
  410. # Fill correct or not based on the true answers
  411. q1_correct = 25
  412. q2_correct = 20
  413. q3_correct = 50
  414. q4_correct = 30
  415. for question, correct in zip(
  416. ["q1", "q2", "q3", "q4"], [q1_correct, q2_correct, q3_correct, q4_correct]
  417. ):
  418. data[f"{question}_correct"] = (np.array(data[f"{question}"]) == correct).astype(
  419. int
  420. )
  421. # Make dataframe and save as TSV
  422. phenotype_tsv = pd.DataFrame(data=data)
  423. fname = op.join(BIDS_ROOT, "phenotype", "berlin_numeracy_test.tsv")
  424. if not op.exists(fname) or overwrite:
  425. phenotype_tsv.to_csv(fname, index=False, na_rep="n/a", sep="\t")
  426. # Prepare the JSON file
  427. phenotype_json = {
  428. "MeasurementToolMetadata": {
  429. "Description": "Berlin Numeracy Test - Pen and Paper Version",
  430. "TermURL": "http://www.riskliteracy.org/researchers/",
  431. },
  432. "participant_id": {
  433. "LongName": "participant identification",
  434. "Description": "identification number of the participant",
  435. },
  436. "q1": {
  437. "LongName": "question two",
  438. "Description": "Out of 1,000 people in a small town 500 are members of a choir. Out of these 500 members in the choir 100 are men. Out of the 500 inhabitants that are not in the choir 300 are men. What is the probability that a randomly drawn man is a member of the choir? (please indicate the probability in percent).",
  439. },
  440. "q1_correct": {
  441. "LongName": "question two correct",
  442. "Description": "was question two answered correctly with 25?",
  443. "Levels": {0: False, 1: True},
  444. },
  445. "q2": {
  446. "LongName": "question three",
  447. "Description": "Imagine we are throwing a loaded die (6 sides). The probability that the die shows a 6 is twice as high as the probability of each of the other numbers. On average, out of these 70 throws, how many times would the die show the number 6?",
  448. },
  449. "q2_correct": {
  450. "LongName": "question three correct",
  451. "Description": "was question three answered correctly with 20?",
  452. "Levels": {0: False, 1: True},
  453. },
  454. "q3": {
  455. "LongName": "question four",
  456. "Description": "In a forest 20% of mushrooms are red, 50% brown and 30% white. A red mushroom is poisonous with a probability of 20%. A mushroom that is not red is poisonous with a probability of 5%. What is the probability that a poisonous mushroom in the forest is red?",
  457. },
  458. "q3_correct": {
  459. "LongName": "question four correct",
  460. "Description": "was question four answered correctly with 50?",
  461. "Levels": {0: False, 1: True},
  462. },
  463. "q4": {
  464. "LongName": "question one",
  465. "Description": "Imagine we are throwing a five-sided die 50 times. On average, out of these 50 throws how many times would this five-sided die show an odd number (1, 3 or 5)?",
  466. },
  467. "q4_correct": {
  468. "LongName": "question one correct",
  469. "Description": "was question one answered correctly with 30?",
  470. "Levels": {0: False, 1: True},
  471. },
  472. }
  473. fname = op.join(bids_root, "phenotype", "berlin_numeracy_test.json")
  474. if not op.exists(fname) or overwrite:
  475. with open(fname, "w", encoding="utf-8") as fout:
  476. json.dump(phenotype_json, fout, ensure_ascii=False, indent=4)
  477. # %% Make events.json file
  478. # Descriptions are taken from here:
  479. # https://github.com/sappelhoff/sp_experiment/blob/6e50de2ecde4b8d13f267cb5eff5451578372a89/sp_experiment/define_variable_meanings.py#L12-L258
  480. def make_events_json_dict():
  481. """Provide a dict to describe all collected variables."""
  482. # Get the trigger values
  483. trigger_dict = provide_trigger_dict()
  484. events_json_dict = dict()
  485. # Add stimulus presentation information
  486. events_json_dict["StimulusPresentation"] = {
  487. "OperatingSystem": "Windows 10 - Version 1903",
  488. "SoftwareName": "PsychoPy",
  489. "SoftwareRRID": "SCR_006571",
  490. "SoftwareVersion": "3.0.0",
  491. "Code": "doi:10.5281/zenodo.3354368"
  492. }
  493. # Start populating the dict
  494. events_json_dict["onset"] = {
  495. "Description": "onset of the event",
  496. "Units": "seconds",
  497. }
  498. events_json_dict["duration"] = {
  499. "Description": "duration of the event",
  500. "Units": "seconds",
  501. }
  502. events_json_dict["trial"] = {
  503. "Description": "zero indexed trial counter, where a trial is a sequence of steps that ends with a final choice."
  504. }
  505. events_json_dict["action_type"] = {
  506. "Description": "type of the action that the subject performed at this event within a trial",
  507. "Levels": {
  508. "sample": "the subject sampled either the left or the right option",
  509. "stop": "the subject decided to stop sampling the options and instead use the next action for a final choice",
  510. "forced_stop": "the subject took a maximum of samples and wanted to take another one, so we force stopped in this turn",
  511. "premature_stop": "the subject tried to stop sampling before taking a single sample. This lead to an error.",
  512. "final_choice": "the subject chose either the left or the right option as a final choice",
  513. },
  514. }
  515. events_json_dict["action"] = {
  516. "Description": "the concrete action that the subject performed for the action type",
  517. "Levels": {
  518. "0": "the subject picked the *left* option",
  519. "1": "the subject picked the *right* option",
  520. "2": "the subject decided to stop sampling - for action_type *stop* only",
  521. },
  522. }
  523. events_json_dict["outcome"] = {
  524. "Description": "the outcome that the subject received for their action. Numbers in the range 1 to 9.",
  525. }
  526. events_json_dict["response_time"] = {
  527. "Description": "the time it took the subject to respond after the onset of the event",
  528. "Units": "milliseconds",
  529. }
  530. events_json_dict["value"] = {
  531. "Description": "the TTL trigger value (=EEG marker value) associated with an event",
  532. "Levels": {
  533. trigger_dict["trig_begin_experiment"]: "beginning of the experiment",
  534. trigger_dict["trig_end_experiment"]: "end of the experiment",
  535. trigger_dict[
  536. "trig_new_trl"
  537. ]: "color of fixcross is changed to indicate start of new trial",
  538. trigger_dict[
  539. "trig_sample_onset"
  540. ]: "onset of new sample within a trial (fixcross changes to white color)",
  541. trigger_dict["trig_left_choice"]: "subject chose *left* during sampling",
  542. trigger_dict["trig_right_choice"]: "subject chose *right* during sampling",
  543. trigger_dict["trig_final_choice"]: "subject chose *stop* during sampling",
  544. trigger_dict[
  545. "trig_mask_out_l"
  546. ]: "a masked outcome is shown after sampling (left side)",
  547. trigger_dict[
  548. "trig_show_out_l"
  549. ]: "an outcome is revealed after sampling (left side)",
  550. trigger_dict[
  551. "trig_mask_out_r"
  552. ]: "a masked outcome is shown after sampling (right side)",
  553. trigger_dict[
  554. "trig_show_out_r"
  555. ]: "an outcome is revealed after sampling (right side)",
  556. trigger_dict[
  557. "trig_new_final_choice"
  558. ]: "color of fixcross is changed to indicate start of a final choice",
  559. trigger_dict[
  560. "trig_final_choice_onset"
  561. ]: "onset of new final choice at the end of trial (fixcross changes to white color)",
  562. trigger_dict[
  563. "trig_left_final_choice"
  564. ]: "subject chose *left* for final choice",
  565. trigger_dict[
  566. "trig_right_final_choice"
  567. ]: "subject chose *right* for final choice",
  568. trigger_dict[
  569. "trig_mask_final_out_l"
  570. ]: "a masked outcome is shown after final choice (left side)",
  571. trigger_dict[
  572. "trig_show_final_out_l"
  573. ]: "an outcome is revealed after final choice (left side)",
  574. trigger_dict[
  575. "trig_mask_final_out_r"
  576. ]: "a masked outcome is shown after final choice (right side)",
  577. trigger_dict[
  578. "trig_show_final_out_r"
  579. ]: "an outcome is revealed after final choice (right side)",
  580. trigger_dict[
  581. "trig_error"
  582. ]: "color of fixcross is changed to indicate an error (ignore all markers prior to this marker within this trial)",
  583. trigger_dict[
  584. "trig_forced_stop"
  585. ]: "subject took the maximum number of samples and wanted to take yet another one",
  586. trigger_dict[
  587. "trig_premature_stop"
  588. ]: "subject tried to make a final choice before taking at least one sample",
  589. trigger_dict["trig_block_feedback"]: "block feedback is displayed",
  590. },
  591. }
  592. events_json_dict["mag0_1"] = {
  593. "LongName": "magnitude 0_1",
  594. "Description": "the first of two possible magnitudes in outcomes for option 0",
  595. }
  596. events_json_dict["prob0_1"] = {
  597. "LongName": "probability 0_1",
  598. "Description": "the first of two possible probabilities in outcomes for option 0",
  599. }
  600. events_json_dict["mag0_2"] = {
  601. "LongName": "magnitude 0_2",
  602. "Description": "the second of two possible magnitudes in outcomes for option 0",
  603. }
  604. events_json_dict["prob0_2"] = {
  605. "LongName": "probability 0_2",
  606. "Description": "the second of two possible probabilities in outcomes for option 0",
  607. }
  608. events_json_dict["mag1_1"] = {
  609. "LongName": "magnitude 1_1",
  610. "Description": "the first of two possible magnitudes in outcomes for option 1",
  611. }
  612. events_json_dict["prob1_1"] = {
  613. "LongName": "probability 1_1",
  614. "Description": "the first of two possible probabilities in outcomes for option 1",
  615. }
  616. events_json_dict["mag1_2"] = {
  617. "LongName": "magnitude 1_2",
  618. "Description": "the second of two possible magnitudes in outcomes for option 1",
  619. }
  620. events_json_dict["prob1_2"] = {
  621. "LongName": "probability 1_2",
  622. "Description": "the second of two possible probabilities in outcomes for option 1",
  623. }
  624. events_json_dict["version"] = {
  625. "Description": "version of the experiment used for collecting this data."
  626. }
  627. events_json_dict["reset"] = {
  628. "Description": "boolean that describes whether of not to ignore events prior to this event in the current trial.",
  629. "Levels": {
  630. "0": "so far no error in this trial since the beginning or the last error",
  631. "1": "error committed: disregard all events prior to this event for the current trial.",
  632. },
  633. }
  634. events_json_dict["system_time_stamp"] = {
  635. "Description": "system time in microseconds as measured from an arbitrary starting point. This should be used to connect the event with the eyetracking data.",
  636. "Units": "microseconds",
  637. }
  638. # Keys in levels for "value" are bytes: we need to turn them into integers
  639. events_json_dict["value"]["Levels"] = {
  640. ord(key): val for key, val in events_json_dict["value"]["Levels"].items()
  641. }
  642. # return
  643. return events_json_dict
  644. def make_description_task_json():
  645. """Provide variable meanings for description task.
  646. This is heavily based on the sampling paradigm tasks. We overwrite a few
  647. of the descriptions to make more sense for the description task.
  648. """
  649. # Get the definitions from sampling paradigm
  650. events_json_dict = make_events_json_dict()
  651. # Overwrite some values
  652. events_json_dict["trial"][
  653. "Description"
  654. ] = "zero indexed trial counter, where a trial index points to the lottery setting that was used in this event by comparing with the trial column in the spactive task."
  655. # Remove all action types except "final choice"
  656. for level in ["sample", "stop", "forced_stop", "premature_stop"]:
  657. events_json_dict["action_type"]["Levels"].pop(level)
  658. # Remove possible action "2", indicating a stop: Stopping is not possible
  659. # in the descriptions task. Only "0"(=pick left) and "1"(=pick right)
  660. for level in ["2"]:
  661. events_json_dict["action"]["Levels"].pop(level)
  662. # Some trigger values are not occurring in the descriptions task
  663. trigger_dict = provide_trigger_dict()
  664. for level in [
  665. ord(trigger_dict["trig_sample_onset"]),
  666. ord(trigger_dict["trig_left_choice"]),
  667. ord(trigger_dict["trig_right_choice"]),
  668. ord(trigger_dict["trig_final_choice"]),
  669. ord(trigger_dict["trig_mask_out_l"]),
  670. ord(trigger_dict["trig_show_out_r"]),
  671. ord(trigger_dict["trig_new_final_choice"]),
  672. ord(trigger_dict["trig_forced_stop"]),
  673. ord(trigger_dict["trig_premature_stop"]),
  674. ]:
  675. events_json_dict["value"]["Levels"].pop(level)
  676. return events_json_dict
  677. def make_events_json(bids_root, overwrite):
  678. """Make events.json files."""
  679. # Prepare and write events JSON for all non-description tasks
  680. events_json = make_events_json_dict()
  681. tasks = ["ActiveFixed", "ActiveVariable", "YokedFixed", "YokedVariable"]
  682. for taskname in tasks:
  683. fname = op.join(bids_root, f"task-{taskname}_events.json")
  684. if not op.exists(fname) or overwrite:
  685. with open(fname, "w", encoding="utf-8") as fout:
  686. json.dump(events_json, fout, ensure_ascii=False, indent=4)
  687. # now a slightly adjusted one for description task
  688. events_json_descr = make_description_task_json()
  689. taskname = "description"
  690. fname = op.join(bids_root, f"task-{taskname}_events.json")
  691. if not op.exists(fname) or overwrite:
  692. with open(fname, "w", encoding="utf-8") as fout:
  693. json.dump(events_json_descr, fout, ensure_ascii=False, indent=4)
  694. # %% Making participants files
  695. def make_participants(bids_root, overwrite):
  696. """Make participants.tsv and participants.json files."""
  697. # Read participant data from the log files
  698. fname_template = op.join(bids_root, "sourcedata", "sub-{0:02}/log_{0}_active.txt")
  699. subj = []
  700. age = []
  701. sex = []
  702. subjects = range(1, 41)
  703. for isubj in subjects:
  704. fname = fname_template.format(isubj)
  705. with open(fname, "r") as fin:
  706. lines = fin.readlines()
  707. subj.append(f"sub-{isubj:02}")
  708. age.append(int(lines[1].strip().split(": ")[-1]))
  709. sex.append(lines[2].strip().split(": ")[-1][0].lower())
  710. # Some data can be filled from our knowledge about the experiment
  711. # See log files in sourcedata
  712. handedness = np.repeat(["r"], 40)
  713. seed = np.repeat(np.arange(1, 11), 4)
  714. # https://stackoverflow.com/a/33802213/5201771
  715. # pick n consecutive items every m items
  716. yoked_to = np.tile(np.arange(1, 41).reshape(-1, 4)[:, :2], (1, 2)).reshape(-1)
  717. yoked_to = [f"sub-{s:02}" for s in yoked_to]
  718. start_condition = np.tile(["active"] * 2 + ["yoked"] * 2, 10)
  719. stopping = np.tile(["fixed", "variable"], 20)
  720. data = {
  721. "participant_id": subj,
  722. "sex": sex,
  723. "age": age,
  724. "handedness": handedness,
  725. "seed": seed,
  726. "yoked_to": yoked_to,
  727. "start_condition": start_condition,
  728. "stopping": stopping,
  729. }
  730. participants_tsv = pd.DataFrame(data=data)
  731. # Write participants TSV
  732. fname = op.join(bids_root, "participants.tsv")
  733. if not op.exists(fname) or overwrite:
  734. participants_tsv.to_csv(fname, index=False, na_rep="n/a", sep="\t")
  735. # Prepare and write participants JSON
  736. participants_json = {
  737. "participant_id": {"Description": "Unique participant identifier."},
  738. "age": {"Description": "The age of the participant.", "Units": "years"},
  739. "sex": {
  740. "Description": "The biological sex of the participant as judged by the experimenter.",
  741. "Levels": {"m": "male", "f": "female"},
  742. },
  743. "handedness": {
  744. "Description": "The handedness of the participant as reported by the participant",
  745. "Levels": {"l": "left", "r": "right"},
  746. },
  747. "seed": {
  748. "Description": "The random seed used to initialize the experiment. Participants that share a seed, saw the same underlying distributions per trial."
  749. },
  750. "yoked_to": {
  751. "Description": "The participant_id to which the participant was yoked in the yoked sampling condition. Some participants were yoked to themselves (i.e., they saw a replay of their own active sampling condition)."
  752. },
  753. "start_condition": {
  754. "Description": "The experimental condition with which the participant started the overall experiment. If the experiment was started with the active sampling condition, the yoked sampling condition was done afterwards and vice versa. After the two sampling conditions, the description task and the berlin numeracy task followed (not counterbalanced).",
  755. "Levels": {
  756. "active": "active sampling condition",
  757. "yoked": "yoked sampling condition",
  758. },
  759. },
  760. "stopping": {
  761. "Description": "The between factor of the experiment. Some participants always had to draw 12 samples ('fixed'), others could draw between 1 and 19 samples ('variable').",
  762. "Levels": {
  763. "fixed": "fixed stopping after 12 samples",
  764. "variable": "variable stopping between 1 and 19 samples",
  765. },
  766. },
  767. }
  768. fname = op.join(bids_root, "participants.json")
  769. if not op.exists(fname) or overwrite:
  770. with open(fname, "w", encoding="utf-8") as fout:
  771. json.dump(participants_json, fout, ensure_ascii=False, indent=4)
  772. # %% Preparing and copying the eyetracking files
  773. def copy_eyetracking_files(bids_root, task_map, sub, just_json, overwrite):
  774. """Format eyetracking files.
  775. Parameters
  776. ----------
  777. bids_root : str
  778. Path to the root of the bids dir.
  779. sub : str
  780. The subject entity to work on, for example "sub-01".
  781. task_map : dict
  782. A mapping between old task names, and templates for the
  783. new task names.
  784. just_json : bool
  785. Whether or not to only touch the json files.
  786. overwrite : bool
  787. If True, overwrite existing files.
  788. """
  789. # Define relevant columns
  790. cols = [
  791. "device_time_stamp",
  792. "system_time_stamp",
  793. "left_gaze_point_on_display_area_x",
  794. "left_gaze_point_on_display_area_y",
  795. "left_gaze_point_validity",
  796. "left_pupil_diameter",
  797. "left_pupil_validity",
  798. "right_gaze_point_on_display_area_x",
  799. "right_gaze_point_on_display_area_y",
  800. "right_gaze_point_validity",
  801. "right_pupil_diameter",
  802. "right_pupil_validity",
  803. ]
  804. # Object for physio.json --> needs to be written once for each task and subject.
  805. # StartTime could be estimated by looking into the
  806. # events.tsv file and compare the `system_time_stamp` column with the same
  807. # column from the eyetracking files.
  808. physio_json = {
  809. "SamplingFrequency": 90, # Tobii 4C eyetracker is not "research grade", so this sfreq is not guaranteed to be stable apparently.
  810. "StartTime": "n/a",
  811. "Columns": cols,
  812. "Manufacturer": "Tobii Technology, Sweden",
  813. "ManfacturersModelName": "4C",
  814. "device_time_stamp": {
  815. "Description": "time stamp according to the eyetracker's internal clock",
  816. "Units": "µs",
  817. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazeData.html#details",
  818. },
  819. "system_time_stamp": {
  820. "Description": "time stamp according to the computer's internal clock",
  821. "Units": "µs",
  822. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazeData.html#details",
  823. },
  824. "left_gaze_point_on_display_area_x": {
  825. "Description": "X value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
  826. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
  827. },
  828. "left_gaze_point_on_display_area_y": {
  829. "Description": "Y value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
  830. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
  831. },
  832. "left_gaze_point_validity": {
  833. "Description": "validity of the left gaze point data",
  834. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#afce85e70ee3e1a53e0eae66cf2fecc30",
  835. "Levels": {"0": False, "1": True},
  836. },
  837. "left_pupil_diameter": {
  838. "Description": "diameter of the left pupil in millimeters",
  839. "Units": "mm",
  840. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
  841. },
  842. "left_pupil_validity": {
  843. "Description": "validity of the left pupil data",
  844. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
  845. "Levels": {"0": False, "1": True},
  846. },
  847. "right_gaze_point_on_display_area_x": {
  848. "Description": "X value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
  849. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
  850. },
  851. "right_gaze_point_on_display_area_y": {
  852. "Description": "Y value for normalized gaze point position in 2D on the active display area as an (x, y) tuple: bottom left of screen is (0, 1), top right of screen is (1, 0)",
  853. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#ab13c40e69e0e5e086efcd0186b31073d",
  854. },
  855. "right_gaze_point_validity": {
  856. "Description": "validity of the right gaze point data",
  857. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1GazePoint.html#afce85e70ee3e1a53e0eae66cf2fecc30",
  858. "Levels": {"0": False, "1": True},
  859. },
  860. "right_pupil_diameter": {
  861. "Description": "diameter of the right pupil in millimeters",
  862. "Units": "mm",
  863. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
  864. },
  865. "right_pupil_validity": {
  866. "Description": "validity of the right pupil data",
  867. "TermURL": "http://devtobiipro.azurewebsites.net/tobii.research/python/reference/1.8.0.21-alpha-gc085c9ab/classtobii__research_1_1PupilData.html#a4a4e504b7d20952925b0f4fcebd3160f",
  868. "Levels": {"0": False, "1": True},
  869. },
  870. }
  871. for old_task, new_task in TASK_MAP.items():
  872. # Get file, and prepare new file name
  873. fname_old = f"{sub}_task-{old_task}_eyetracking.tsv"
  874. sub_id = int(sub[-2:])
  875. stop_policy = "Variable" if sub_id % 2 == 0 else "Fixed"
  876. new_task = new_task.format(stop_policy)
  877. fname_new = f"{sub}_task-{new_task}_recording-eyetracking_physio.tsv.gz"
  878. dest_dir = op.join(bids_root, sub, "eeg")
  879. os.makedirs(dest_dir, exist_ok=True)
  880. dest = op.join(dest_dir, fname_new)
  881. if op.exists(dest) and not overwrite:
  882. continue
  883. # Read data
  884. src = op.join(bids_root, "sourcedata", sub, "eyetracking", fname_old)
  885. df = pd.read_csv(src, sep="\t")
  886. # Convert str of tuple X Y gazepoint to two float columns
  887. for direction in ["left", "right"]:
  888. tmp = (
  889. df[f"{direction}_gaze_point_on_display_area"]
  890. .str.lstrip("(")
  891. .str.rstrip(")")
  892. .str.split(",", expand=True)
  893. )
  894. df[f"{direction}_gaze_point_on_display_area_x"] = (
  895. tmp[0].to_numpy().astype("float")
  896. )
  897. df[f"{direction}_gaze_point_on_display_area_y"] = (
  898. tmp[1].to_numpy().astype("float")
  899. )
  900. # Select only relevant columns and write
  901. df = df[cols]
  902. if not just_json:
  903. # skip writing if we only write (update) JSONs
  904. df.to_csv(
  905. dest, sep="\t", header=False, na_rep="n/a", index=False, compression="gzip"
  906. )
  907. # Write one physio.json per task - needed because each has a different StartTime
  908. # First, try to get StartTime
  909. events_fname = op.join(
  910. bids_root, sub, "eeg", f"{sub}_task-{new_task}_events.tsv"
  911. )
  912. if op.exists(events_fname):
  913. # We calculate StartTime as difference between the first
  914. # measured timestamps in seconds
  915. events_df = pd.read_csv(events_fname, sep="\t")
  916. first_event_timestamp = events_df["system_time_stamp"].to_list()[0]
  917. first_physio_timestamp = df["system_time_stamp"].to_list()[0]
  918. try:
  919. # timestamps must be int, and event is measured *after* physio, so
  920. # it must be bigger
  921. assert isinstance(first_event_timestamp, int)
  922. assert isinstance(first_physio_timestamp, int)
  923. assert first_event_timestamp > first_physio_timestamp
  924. starttime_seconds = (
  925. first_physio_timestamp - first_event_timestamp
  926. ) / 1e6
  927. assert starttime_seconds < 0
  928. physio_json["StartTime"] = starttime_seconds
  929. except AssertionError:
  930. if "sub-18_task-Active" in events_fname:
  931. # Eyetracking recording for sub-18 is broken and full of NaN
  932. physio_json["StartTime"] = 0.0
  933. else:
  934. print(
  935. f"Calculation of StartTime failed for {events_fname}:\n{first_event_timestamp}\n{first_physio_timestamp}"
  936. )
  937. else:
  938. print(
  939. f"Did not find events file, could not calculate StartTime for physio.json:\n{events_fname}"
  940. )
  941. # Now write
  942. fname = op.join(
  943. bids_root,
  944. sub,
  945. "eeg",
  946. f"{sub}_task-{new_task}_recording-eyetracking_physio.json",
  947. )
  948. if not op.exists(fname) or overwrite:
  949. with open(fname, "w", encoding="utf-8") as fout:
  950. json.dump(physio_json, fout, ensure_ascii=False, indent=4)
  951. # %% Coordsystem, electrodes, channels, eeg sidecar, and scans files
  952. def coord_elec_chan_eegjson_scans(bids_root, task_map, sub, just_json, overwrite):
  953. """Write/Convert several files to BIDS.
  954. Convert the following files:
  955. - electrodes.tsv --> per subj
  956. - coordsystem.json --> per subj
  957. - channels.tsv --> per subj/task
  958. - eeg.json --> per task
  959. Parameters
  960. ----------
  961. bids_root : str
  962. Path to the root of the bids dir.
  963. sub : str
  964. The subject entity to work on, for example "sub-01".
  965. task_map : dict
  966. A mapping between old task names, and templates for the
  967. new task names.
  968. just_json : bool
  969. This parameter is currently not implemented and passing it has no effect.
  970. overwrite : bool
  971. If True, overwrite existing files.
  972. """
  973. # Temporary directory to store all mne-bids outputs
  974. tmpdir = mkdtemp(prefix=f"sp_bids_tmp_{sub}_")
  975. # data needed for conversion
  976. df = pd.read_csv(op.join(bids_root, "participants.tsv"), sep="\t")
  977. # What stopping group did this sub belong to
  978. stopping = (
  979. df[df["participant_id"] == f"{sub}"]["stopping"].to_list()[0].capitalize()
  980. )
  981. # Get montage file
  982. fname_bvct = f"CapTrakResultFile_{sub}.bvct"
  983. fname_bvct = op.join(bids_root, "sourcedata", f"{sub}", "coords", fname_bvct)
  984. montage = read_dig_captrak(fname_bvct)
  985. # Get ground and reference positions separately, because they get dropped
  986. # at raw.set_montage() otherwise
  987. pos_gnd = montage.get_positions()["ch_pos"]["GND"]
  988. pos_ref = montage.get_positions()["ch_pos"]["REF"]
  989. # Convert to BIDS using mne-bids
  990. for oldtask, newtask_template in task_map.items():
  991. newtask = newtask_template.format(stopping)
  992. bids_path = mne_bids.BIDSPath(
  993. subject=f"{sub[-2:]}", task=newtask, root=tmpdir, datatype="eeg"
  994. )
  995. # Get EEG file
  996. fname_vhdr = f"{sub}_task-{oldtask}_eeg.vhdr"
  997. fname_vhdr = op.join(bids_root, "sourcedata", f"{sub}", "eeg", fname_vhdr)
  998. raw = read_raw_brainvision(fname_vhdr, preload=False, verbose=False)
  999. raw.set_channel_types(
  1000. {"HEOG": "eog", "VEOG": "eog", "ECG": "ecg"}, verbose=False
  1001. )
  1002. raw.set_montage(montage, verbose=False)
  1003. raw.info["line_freq"] = 50
  1004. # Potentially get bad channels from relative path
  1005. if "mpib_sp_eeg" in bids_root:
  1006. annotation_derivatives_path = "derivatives"
  1007. else:
  1008. annotation_derivatives_path = "code"
  1009. badch_file = op.join(
  1010. bids_root,
  1011. annotation_derivatives_path,
  1012. "annotation_derivatives",
  1013. f"{sub}",
  1014. f"{sub}_task-{newtask}_badchannels.txt",
  1015. )
  1016. if op.exists(badch_file):
  1017. raw.load_bad_channels(badch_file, force=False)
  1018. else:
  1019. print(f"\nDid not find {badch_file}\n")
  1020. # adjust high cutoff according to "True" high cutoff
  1021. # The saved "1000" was only a setting that gets auto overridden
  1022. # (verified via email to BrainProducts)
  1023. raw.info["lowpass"] = 450
  1024. # write to BIDS
  1025. mne_bids.write_raw_bids(raw, bids_path, overwrite=True, verbose=False)
  1026. # Copy wanted files from tmp to stable
  1027. # coordsystem.json
  1028. src = op.join(tmpdir, f"{sub}", "eeg", f"{sub}_coordsystem.json")
  1029. dest = op.join(bids_root, f"{sub}", "eeg", f"{sub}_coordsystem.json")
  1030. if not op.exists(dest) or overwrite:
  1031. shutil.copyfile(src, dest)
  1032. # eeg.json
  1033. src = op.join(tmpdir, f"{sub}", "eeg", f"{sub}_task-{newtask}_eeg.json")
  1034. dest = op.join(bids_root, f"task-{newtask}_eeg.json")
  1035. if not op.exists(dest) or overwrite:
  1036. shutil.copyfile(src, dest)
  1037. # electrodes.tsv
  1038. src = op.join(
  1039. tmpdir,
  1040. f"{sub}",
  1041. "eeg",
  1042. f"{sub}_electrodes.tsv",
  1043. )
  1044. dest = op.join(bids_root, f"{sub}", "eeg", f"{sub}_electrodes.tsv")
  1045. if not op.exists(dest) or overwrite:
  1046. # Add some info that mne-bids didn't add:
  1047. # the electrodes ECG, HEOG, VEOG are actually 3 *pairs* of electrodes
  1048. electrodes_df = pd.read_csv(src, sep="\t")
  1049. bipolar_elecs = electrodes_df.loc[
  1050. electrodes_df["name"].isin(["ECG", "HEOG", "VEOG"]), :
  1051. ].copy()
  1052. bipolar_elecs.loc[:, "name"] += "-"
  1053. electrodes_df.loc[
  1054. electrodes_df["name"].isin(["ECG", "HEOG", "VEOG"]), :
  1055. ] += "+"
  1056. electrodes_df = electrodes_df.append(bipolar_elecs, ignore_index=True)
  1057. # Reference and ground electrode are dropped by mne-bids. Add back here.
  1058. electrodes_df = electrodes_df.append(
  1059. (pd.DataFrame(["Gnd"] + list(pos_gnd) + [np.nan]).T).rename(
  1060. columns=dict(
  1061. zip(range(len(electrodes_df.columns)), electrodes_df.columns)
  1062. )
  1063. ),
  1064. ignore_index=True,
  1065. )
  1066. electrodes_df = electrodes_df.append(
  1067. (pd.DataFrame(["Ref"] + list(pos_ref) + [np.nan]).T).rename(
  1068. columns=dict(
  1069. zip(range(len(electrodes_df.columns)), electrodes_df.columns)
  1070. )
  1071. ),
  1072. ignore_index=True,
  1073. )
  1074. # try to fill impedance data for bipolar elecs, gnd, and ref
  1075. with open(fname_vhdr, "r") as fin:
  1076. lines = fin.readlines()
  1077. # impedances are the last couple of lines
  1078. for line in lines[-100:]:
  1079. line_split = line.split(":")
  1080. if len(line_split) != 2:
  1081. continue
  1082. ch, impedance = line_split
  1083. try:
  1084. impedance = int(impedance.strip())
  1085. except ValueError:
  1086. impedance = np.nan
  1087. if ch in [
  1088. "ECG+",
  1089. "ECG-",
  1090. "HEOG+",
  1091. "HEOG-",
  1092. "VEOG+",
  1093. "VEOG-",
  1094. "Gnd",
  1095. "Ref",
  1096. ]:
  1097. electrodes_df.loc[
  1098. electrodes_df["name"] == ch, "impedance"
  1099. ] = impedance
  1100. electrodes_df.to_csv(dest, index=False, na_rep="n/a", sep="\t")
  1101. # channels.tsv
  1102. src = op.join(
  1103. tmpdir,
  1104. f"{sub}",
  1105. "eeg",
  1106. f"{sub}_task-{newtask}_channels.tsv",
  1107. )
  1108. dest = op.join(
  1109. bids_root,
  1110. f"{sub}",
  1111. "eeg",
  1112. f"{sub}_task-{newtask}_channels.tsv",
  1113. )
  1114. if not op.exists(dest) or overwrite:
  1115. # Add some info that mne-bids didn't add
  1116. # specific status_description, and referencing schemes (especially for bipolar channels)
  1117. # change type of heog to heog and veog to veog ... (from generic eog)
  1118. channels_df = pd.read_csv(src, sep="\t")
  1119. channels_df.loc[
  1120. channels_df["status"] == "bad", "status_description"
  1121. ] = "bad as judged by visual inspection"
  1122. channels_df.insert(3, "reference", "FCz")
  1123. channels_df.loc[channels_df["name"] == "ECG", "reference"] = "ECG+, ECG-"
  1124. channels_df.loc[channels_df["name"] == "HEOG", "reference"] = "HEOG+, HEOG-"
  1125. channels_df.loc[channels_df["name"] == "VEOG", "reference"] = "VEOG+, VEOG-"
  1126. channels_df.loc[channels_df["name"] == "HEOG", "type"] = "HEOG"
  1127. channels_df.loc[channels_df["name"] == "VEOG", "type"] = "VEOG"
  1128. channels_df = channels_df[
  1129. [
  1130. "name",
  1131. "type",
  1132. "units",
  1133. "description",
  1134. "sampling_frequency",
  1135. "reference",
  1136. "low_cutoff",
  1137. "high_cutoff",
  1138. "status",
  1139. "status_description",
  1140. ]
  1141. ]
  1142. channels_df.to_csv(dest, index=False, na_rep="n/a", sep="\t")
  1143. # scans.tsv
  1144. src = op.join(
  1145. tmpdir,
  1146. f"{sub}",
  1147. f"{sub}_scans.tsv",
  1148. )
  1149. dest = op.join(
  1150. bids_root,
  1151. f"{sub}",
  1152. f"{sub}_scans.tsv",
  1153. )
  1154. if not op.exists(dest) or overwrite:
  1155. shutil.copyfile(src, dest)
  1156. # We are done, remove the temporary directory containing unneeded mne-bids outputs
  1157. shutil.rmtree(tmpdir)
  1158. # %% Add more information to EEG JSON sidecar files after they are written
  1159. def enrich_eeg_json(bids_root, overwrite):
  1160. """Add information to EEG JSON sidecar files."""
  1161. # find sidecars for AF, AV, YF, YV, description
  1162. sidecars = glob.glob(BIDS_ROOT + os.sep + "*_eeg.json")
  1163. assert len(sidecars) == 5
  1164. for sidecar in sidecars:
  1165. # Read the file provided by mne-bids
  1166. with open(sidecar, "r") as fin:
  1167. sidecar_dict = json.load(fin)
  1168. # Add information
  1169. sidecar_dict["InstitutionName"] = "Max Plack Institute for Human Development"
  1170. sidecar_dict["InstitutionAddress"] = "Lentzeallee 94, 14195 Berlin, Germany"
  1171. sidecar_dict["Manufacturer"] = "Brain Products"
  1172. sidecar_dict["ManufacturersModelName"] = "BrainAmp DC and BrainAmp ExG"
  1173. sidecar_dict[
  1174. "SoftwareVersions"
  1175. ] = "BrainVision Recorder Professional - V. 1.21.0303"
  1176. sidecar_dict[
  1177. "EEGReference"
  1178. ] = "For all EEG channels: FCz, for other channels, see channels.tsv"
  1179. sidecar_dict[
  1180. "EEGGround"
  1181. ] = "For all EEG channels: Fpz, for all other channels: ca. 10cm above navel on right side of belly."
  1182. sidecar_dict["CapManufacturer"] = "EasyCap"
  1183. sidecar_dict["CapManufacturersModelName"] = "actiCAP 64 Ch Standard-2"
  1184. sidecar_dict[
  1185. "Instructions"
  1186. ] = "Instructions can be found in the experiment code, here: https://doi.org/10.5281/zenodo.3354368"
  1187. sidecar_dict["HardwareFilters"] = {
  1188. "Highpass causal RC-filter": {
  1189. "Description": "To prevent signals drifts. This filter is specified with a time constant of 10 seconds, which translates to a cutoff frequency of approximately 0.0159Hz.",
  1190. "TimeConstant": "10s",
  1191. "CutoffFrequency": "0.0159Hz",
  1192. "RollOffSlopeAtCutoffFrequency": "6db/Oct",
  1193. },
  1194. "Lowpass causal Butterworth filter": {
  1195. "Description": "For anti-aliasing, because the BrainAmp DC always records at 5000Hz sampling frequency, and this sampling frequency then gets downsampled (or not) depending on user settings.",
  1196. "CutoffFrequency": "1000Hz",
  1197. "RollOffSlopeAtCutoffFrequency": "30db/Oct",
  1198. },
  1199. }
  1200. sidecar_dict["SoftwareFilters"] = {
  1201. "Lowpass causal Butterworth filter": {
  1202. "Description": "For anti-aliasing prior to downsampling from the default 5000Hz to 1000Hz, because in the case of this dataset, a sampling frequency of 1000Hz was selected in user settings.",
  1203. "CutoffFrequency": "450Hz",
  1204. "RollOffSlopeAtCutoffFrequency": "24db/Oct",
  1205. }
  1206. }
  1207. for key in ["EMGChannelCount", "MiscChannelCount", "TriggerChannelCount"]:
  1208. if key in sidecar_dict:
  1209. del sidecar_dict[key]
  1210. # Write enriched file back
  1211. with open(sidecar, "w", encoding="utf-8") as fout:
  1212. json.dump(sidecar_dict, fout, ensure_ascii=False, indent=4)
  1213. # %% Perform all formatting in parallel
  1214. if __name__ == "__main__":
  1215. make_datacite_yml(BIDS_ROOT, OVERWRITE)
  1216. # make_bids_validator_config(BIDS_ROOT, OVERWRITE)
  1217. # make_bidsignore(BIDS_ROOT, OVERWRITE)
  1218. make_README(BIDS_ROOT, OVERWRITE)
  1219. # make_LICENSE(BIDS_ROOT, OVERWRITE)
  1220. make_CHANGES(BIDS_ROOT, OVERWRITE)
  1221. make_dataset_description(BIDS_ROOT, OVERWRITE)
  1222. # make_phenotype(BIDS_ROOT, OVERWRITE)
  1223. # make_participants(BIDS_ROOT, OVERWRITE)
  1224. # make_events_json(BIDS_ROOT, OVERWRITE)
  1225. just_json = True
  1226. inputs = [(BIDS_ROOT, TASK_MAP, f"sub-{sub:02}", just_json, OVERWRITE) for sub in SUBJECTS]
  1227. with multiprocessing.Pool(NJOBS) as pool:
  1228. # pool.starmap(copy_eeg_and_events_files, inputs)
  1229. pool.starmap(copy_eyetracking_files, inputs)
  1230. # pool.starmap(coord_elec_chan_eegjson_scans, inputs)
  1231. # enrich_eeg_json(BIDS_ROOT, OVERWRITE)