JiriVanek
/
DataConversionToNIX


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
							"""
mnetonix.py

Usage:
  python mnetonix.py [--split-data] [--split-stimuli] <datafile> <montage>

Arguments:
  datafile   Either an EDF file or a BrainVision header file (vhdr).
  montage    Any format montage file supported by MNE.

Flags:
  --split-data      If specified, each channel of raw data is stored in its own
                    separate DataArray.

  --split-stimuli   If specified, each stimulus type (identified by its label)
                    is stored in a separate MultiTag (one MultiTag per
                    stimulus type).


(Requires Python 3)

Command line script for reading EDF and BrainVision files using MNE
(mne-python) and storing the data and metadata into a NIX file.  Supports
reading montage files for recording channel locations.

To include in a script, call the 'write_raw_mne()' and provide a NIX filename
and MNE Raw structure as arguments.

NIX Format layout
=================

Data
----
Raw Data are stored in either a single 2-dimensional DataArray or a collection
of DataArrays (one per recording channel).  The latter makes tagging easier
since MultiTag positions and extents don't need to specify every channel they
reference.  However, creating multiple DataArrays makes file sizes much
bigger.

Stimuli
-------
MNE provides stimulus information through the Raw.annotations dictionary.
Onsets correspond to the 'positions' array and durations correspond to the
'extents' array of the "Stimuli" MultiTag.  If stimulus information is split
by label, each MultiTag uses the label as its name.

Metadata
--------
MNE collects metadata into a (nested) dictionary (Raw.info).  All non-empty
keys are converted into Properties in NIX.  The nested structure of the
dictionary is replicated in NIX by creating child Sections, starting with one
root section with name "Info".

Some extra metadata is kept in the '_raw_extras' private member when loading
from EDF files.  This seems to be missing from the 'Info' dictionary in order
to keep it anonymous (missing keys are 'subject_info', 'meas_date', 'file_id',
and 'meas_id').  The '_raw_extras' are also stored in the NIX file in a
separate Section with name "Extras".

"""
import sys
import os
from collections.abc import Iterable, Mapping
import mne
import matplotlib.pyplot as plt
import numpy as np
import nixio as nix


DATA_BLOCK_NAME = "EEG Data Block"
DATA_BLOCK_TYPE = "Recording"
RAW_DATA_GROUP_NAME = "Raw Data Group"
RAW_DATA_GROUP_TYPE = "EEG Channels"
RAW_DATA_TYPE = "Raw Data"


def plot_channel(data_array, index):
    signal = data_array[index]
    tdim = data_array.dimensions[1]
    datadim = data_array.dimensions[0]

    plt.plot(tdim.ticks, signal, label=datadim.labels[index])
    xlabel = f"({tdim.unit})"
    plt.xlabel(xlabel)
    ylabel = f"{datadim.labels[index]} ({data_array.unit})"
    plt.ylabel(ylabel)
    plt.legend()
    plt.show()


def create_md_tree(section, values, block):
    if values is None:
        return
    for k, v in values.items():
        if v is None:
            continue
        if isinstance(v, Iterable):
            if not len(v):
                continue
            ndim = np.ndim(v)
            if ndim > 1:
                da = block.create_data_array(k, "Multidimensional Metadata",
                                             data=v)
                for _ in range(ndim):
                    da.append_set_dimension()
                prop = section.create_property(k, da.id)
                prop.type = str(v.__class__)
                da.metadata = section
                continue
            # check element type
            if isinstance(v, Mapping):
                # Create a new Section to hold the metadata found in the
                # dictionary
                subsec = section.create_section(k, str(v.__class__))
                create_md_tree(subsec, v, block)
                continue
            if isinstance(v[0], Mapping):
                # Create a new subsection to hold each nested dictionary as
                # sub-subsections
                subsec = section.create_section(k, str(v.__class__))
                for idx, subd in enumerate(v):
                    subsubsec = subsec.create_section(f"{k}-{idx}",
                                                      str(subd.__class__))
                    create_md_tree(subsubsec, subd, block)
                continue

        try:
            prop = section.create_property(k, v)
        except TypeError:
            # inconsistent iterable types: upgrade to floats
            prop = section.create_property(k, [float(vi) for vi in v])
        prop.type = str(v.__class__)


def write_single_da(mneraw, block):
    # data and times
    data = mneraw.get_data()
    time = mneraw.times

    nchan = mneraw.info["nchan"]
    print(f"Found {nchan} channels with {mneraw.n_times} samples per channel")

    da = block.create_data_array("EEG Data", RAW_DATA_TYPE, data=data)
    block.groups[RAW_DATA_GROUP_NAME].data_arrays.append(da)
    da.unit = "V"

    for dimlen in data.shape:
        if dimlen == nchan:
            # channel labels: SetDimension
            da.append_set_dimension(labels=mneraw.ch_names)
        elif dimlen == mneraw.n_times:
            # times: RangeDimension
            # NOTE: EDF always uses seconds
            da.append_range_dimension(ticks=time, label="time", unit="s")


def write_multi_da(mneraw, block):
    data = mneraw.get_data()
    time = mneraw.times

    nchan = mneraw.info["nchan"]
    channames = mneraw.ch_names

    print(f"Found {nchan} channels with {mneraw.n_times} samples per channel")

    # find the channel dimension to iterate over it
    for idx, dimlen in enumerate(data.shape):
        if dimlen == nchan:
            chanidx = idx
            break
    else:
        raise RuntimeError("Could not find data dimension that matches number "
                           "of channels")

    for idx, chandata in enumerate(np.rollaxis(data, chanidx)):
        chname = channames[idx]
        da = block.create_data_array(chname, RAW_DATA_TYPE, data=chandata)
        block.groups[RAW_DATA_GROUP_NAME].data_arrays.append(da)
        da.unit = "V"

        # times: RangeDimension
        # NOTE: EDF always uses seconds
        da.append_range_dimension(ticks=time, label="time", unit="s")


def separate_stimulus_types(stimuli):
    # separate stimuli based on label
    stimdict = dict()
    for label, onset, duration in zip(stimuli.description,
                                      stimuli.onset,
                                      stimuli.duration):
        if label not in stimdict:
            stimdict[label] = [(label, onset, duration)]
        else:
            stimdict[label].append((label, onset, duration))
    return stimdict


def write_stim_tags(mneraw, block, split):
    stimuli = mneraw.annotations

    if split:
        stimtuples = separate_stimulus_types(stimuli)
        for label, st in stimtuples.items():
            label = label.replace("/", "|")
            create_stimulus_multi_tag(st, block, mneraw, mtagname=label)
    else:
        stimtuples = [(l, o, d) for l, o, d in zip(stimuli.description,
                                                   stimuli.onset,
                                                   stimuli.duration)]
        create_stimulus_multi_tag(stimtuples, block, mneraw)


def create_stimulus_multi_tag(stimtuples, block, mneraw, mtagname="Stimuli"):
    # check dimensionality of data
    datashape = block.groups[RAW_DATA_GROUP_NAME].data_arrays[0].shape

    labels = [st[0] for st in stimtuples]
    onsets = [st[1] for st in stimtuples]
    durations = [st[2] for st in stimtuples]

    ndim = len(datashape)
    if ndim == 1:
        positions = onsets
        extents = durations
    else:
        channelextent = mneraw.info["nchan"] - 1
        positions = [(0, p) for p in onsets]
        extents = [(channelextent, e) for e in durations]

    posda = block.create_data_array(f"{mtagname} onset", "Stimuli Positions",
                                    data=positions)
    posda.append_set_dimension(labels=labels)

    extda = block.create_data_array(f"{mtagname} durations", "Stimuli Extents",
                                    data=extents)
    extda.append_set_dimension(labels=labels)

    for _ in range(ndim-1):
        # extra set dimensions for any extra data dimensions (beyond the first)
        posda.append_set_dimension()
        extda.append_set_dimension()

    stimmtag = block.create_multi_tag(mtagname, "EEG Stimuli",
                                      positions=posda)
    stimmtag.extents = extda
    block.groups[RAW_DATA_GROUP_NAME].multi_tags.append(stimmtag)

    for da in block.groups[RAW_DATA_GROUP_NAME].data_arrays:
        if da.type == RAW_DATA_TYPE:
            stimmtag.references.append(da)


def write_raw_mne(nfname, mneraw,
                  split_data_channels=False, split_stimuli=False):
    """
    Writes the provided Raw MNE structure to a NIX file with the given name.

    :param nfname: Name for the NIX file to write to. Existing file will be
    overwritten.
    :param mneraw: An MNE Raw structure (any mne.io.BaseRaw subclass).
    :param split_data_channels: If True, each raw data channel will be stored
    in a separate DataArray.
    :param split_stimuli: If True, stimuli will be split into separate
    MultiTags based on the stimulus type (label).
    :rtype: None
    """
    mneinfo = mneraw.info
    extrainfo = mneraw._raw_extras

    # Create NIX file
    nf = nix.File(nfname, nix.FileMode.Overwrite)

    # Write Data to NIX
    block = nf.create_block(DATA_BLOCK_NAME, DATA_BLOCK_TYPE,
                            compression=nix.Compression.DeflateNormal)
    block.create_group(RAW_DATA_GROUP_NAME, RAW_DATA_GROUP_TYPE)

    if split_data_channels:
        write_multi_da(mneraw, block)
    else:
        write_single_da(mneraw, block)

    if mneraw.annotations:
        write_stim_tags(mneraw, block, split_stimuli)

    # Write metadata to NIX
    # info dictionary
    infomd = nf.create_section("Info", "File metadata")
    create_md_tree(infomd, mneinfo, block)
    # extras
    if len(extrainfo) > 1:
        for idx, emd_i in enumerate(extrainfo):
            extrasmd = nf.create_section(f"Extras-{idx}",
                                         "Raw Extras metadata")
            create_md_tree(extrasmd, emd_i, block)
    elif extrainfo:
        extrasmd = nf.create_section("Extras", "Raw Extras metadata")
        create_md_tree(extrasmd, extrainfo[0], block)

    # all done
    nf.close()
    print(f"Created NIX file at '{nfname}'")
    print("Done")


def main():
    args = sys.argv
    if len(args) < 2:
        print("Please provide either a BrainVision vhdr or "
              "an EDF filename as the first argument")
        sys.exit(1)

    splitdata = False
    if "--split-data" in args:
        splitdata = True
        args.remove("--split-data")

    splitstim = False
    if "--split-stimuli" in args:
        splitstim = True
        args.remove("--split-stimuli")

    datafilename = args[1]
    montage = None
    if len(args) > 2:
        montage = args[2]
        montage = os.path.abspath(montage)
    root, ext = os.path.splitext(datafilename)
    nfname = root + os.path.extsep + "nix"
    if ext.casefold() == ".edf".casefold():
        mneraw = mne.io.read_raw_edf(datafilename, montage=montage,
                                     preload=True, stim_channel=False)
    elif ext.casefold() == ".vhdr".casefold():
        mneraw = mne.io.read_raw_brainvision(datafilename, montage=montage,
                                             preload=True, stim_channel=False)
    else:
        raise RuntimeError(f"Unknown extension '{ext}'")
    print(f"Converting '{datafilename}' to NIX")
    if splitdata:
        print("  Creating one DataArray per channel")
    if splitstim:
        print("  Creating one MultiTag for each stimulus type")

    write_raw_mne(nfname, mneraw, splitdata, splitstim)

    mneraw.close()


if __name__ == "__main__":
    main()