INT
/
multielectrode_grasp


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751
							"""
Class for reading output files from NEST simulations
( http://www.nest-simulator.org/ ).
Tested with NEST2.10.0

Depends on: numpy, quantities

Supported: Read

Authors: Julia Sprenger, Maximilian Schmidt, Johanna Senk

"""

# needed for Python3 compatibility

import os.path
import warnings
from datetime import datetime
import numpy as np
import quantities as pq

from neo.io.baseio import BaseIO
from neo.core import Block, Segment, SpikeTrain, AnalogSignal

value_type_dict = {'V': pq.mV,
                   'I': pq.pA,
                   'g': pq.CompoundUnit("10^-9*S"),
                   'no type': pq.dimensionless}


class NestIO(BaseIO):
    """
    Class for reading NEST output files. GDF files for the spike data and DAT
    files for analog signals are possible.

    Usage:
        >>> from neo.io.nestio import NestIO

        >>> files = ['membrane_voltages-1261-0.dat',
                 'spikes-1258-0.gdf']
        >>> r = NestIO(filenames=files)
        >>> seg = r.read_segment(gid_list=[], t_start=400 * pq.ms,
                             t_stop=600 * pq.ms,
                             id_column_gdf=0, time_column_gdf=1,
                             id_column_dat=0, time_column_dat=1,
                             value_columns_dat=2)
    """

    is_readable = True  # class supports reading, but not writing
    is_writable = False

    supported_objects = [SpikeTrain, AnalogSignal, Segment, Block]
    readable_objects = [SpikeTrain, AnalogSignal, Segment, Block]

    has_header = False
    is_streameable = False

    write_params = None  # writing is not supported

    name = 'nest'
    extensions = ['gdf', 'dat']
    mode = 'file'

    def __init__(self, filenames=None):
        """
        Parameters
        ----------
            filenames: string or list of strings, default=None
                The filename or list of filenames to load.
        """

        if isinstance(filenames, str):
            filenames = [filenames]

        self.filenames = filenames
        self.avail_formats = {}
        self.avail_IOs = {}

        for filename in filenames:
            path, ext = os.path.splitext(filename)
            ext = ext.strip('.')
            if ext in self.extensions:
                if ext in self.avail_IOs:
                    raise ValueError('Received multiple files with "%s" '
                                     'extention. Can only load single file of '
                                     'this type.' % ext)
                self.avail_IOs[ext] = ColumnIO(filename)
            self.avail_formats[ext] = path

    def __read_analogsignals(self, gid_list, time_unit, t_start=None,
                             t_stop=None, sampling_period=None,
                             id_column=0, time_column=1,
                             value_columns=2, value_types=None,
                             value_units=None):
        """
        Internal function called by read_analogsignal() and read_segment().
        """

        if 'dat' not in self.avail_formats:
            raise ValueError('Can not load analogsignals. No DAT file '
                             'provided.')

        # checking gid input parameters
        gid_list, id_column = self._check_input_gids(gid_list, id_column)
        # checking time input parameters
        t_start, t_stop = self._check_input_times(t_start, t_stop,
                                                  mandatory=False)

        # checking value input parameters
        (value_columns, value_types, value_units) = \
            self._check_input_values_parameters(value_columns, value_types,
                                                value_units)

        # defining standard column order for internal usage
        # [id_column, time_column, value_column1, value_column2, ...]
        column_ids = [id_column, time_column] + value_columns
        for i, cid in enumerate(column_ids):
            if cid is None:
                column_ids[i] = -1

        # assert that no single column is assigned twice
        column_list = [id_column, time_column] + value_columns
        column_list_no_None = [c for c in column_list if c is not None]
        if len(np.unique(column_list_no_None)) < len(column_list_no_None):
            raise ValueError(
                'One or more columns have been specified to contain '
                'the same data. Columns were specified to %s.'
                '' % column_list_no_None)

        # extracting condition and sorting parameters for raw data loading
        (condition, condition_column,
         sorting_column) = self._get_conditions_and_sorting(id_column,
                                                            time_column,
                                                            gid_list,
                                                            t_start,
                                                            t_stop)
        # loading raw data columns
        data = self.avail_IOs['dat'].get_columns(
            column_ids=column_ids,
            condition=condition,
            condition_column=condition_column,
            sorting_columns=sorting_column)

        sampling_period = self._check_input_sampling_period(sampling_period,
                                                            time_column,
                                                            time_unit,
                                                            data)
        analogsignal_list = []

        # extracting complete gid list for anasig generation
        if (gid_list == []) and id_column is not None:
            gid_list = np.unique(data[:, id_column])

        # generate analogsignals for each neuron ID
        for i in gid_list:
            selected_ids = self._get_selected_ids(
                i, id_column, time_column, t_start, t_stop, time_unit,
                data)

            # extract starting time of analogsignal
            if (time_column is not None) and data.size:
                anasig_start_time = data[selected_ids[0], 1] * time_unit
            else:
                # set t_start equal to sampling_period because NEST starts
                #  recording only after 1 sampling_period
                anasig_start_time = 1. * sampling_period

            # create one analogsignal per value column requested
            for v_id, value_column in enumerate(value_columns):
                signal = data[
                    selected_ids[0]:selected_ids[1], value_column]

                # create AnalogSignal objects and annotate them with
                #  the neuron ID
                analogsignal_list.append(AnalogSignal(
                    signal * value_units[v_id],
                    sampling_period=sampling_period,
                    t_start=anasig_start_time,
                    id=i,
                    type=value_types[v_id]))
                # check for correct length of analogsignal
                assert (analogsignal_list[-1].t_stop
                        == anasig_start_time + len(signal) * sampling_period)
        return analogsignal_list

    def __read_spiketrains(self, gdf_id_list, time_unit,
                           t_start, t_stop, id_column,
                           time_column, **args):
        """
        Internal function for reading multiple spiketrains at once.
        This function is called by read_spiketrain() and read_segment().
        """

        if 'gdf' not in self.avail_IOs:
            raise ValueError('Can not load spiketrains. No GDF file provided.')

        # assert that the file contains spike times
        if time_column is None:
            raise ValueError('Time column is None. No spike times to '
                             'be read in.')

        gdf_id_list, id_column = self._check_input_gids(gdf_id_list, id_column)

        t_start, t_stop = self._check_input_times(t_start, t_stop,
                                                  mandatory=True)

        # assert that no single column is assigned twice
        if id_column == time_column:
            raise ValueError('One or more columns have been specified to '
                             'contain the same data.')

        # defining standard column order for internal usage
        # [id_column, time_column, value_column1, value_column2, ...]
        column_ids = [id_column, time_column]
        for i, cid in enumerate(column_ids):
            if cid is None:
                column_ids[i] = -1

        (condition, condition_column, sorting_column) = \
            self._get_conditions_and_sorting(id_column, time_column,
                                             gdf_id_list, t_start, t_stop)

        data = self.avail_IOs['gdf'].get_columns(
            column_ids=column_ids,
            condition=condition,
            condition_column=condition_column,
            sorting_columns=sorting_column)

        # create a list of SpikeTrains for all neuron IDs in gdf_id_list
        # assign spike times to neuron IDs if id_column is given
        if id_column is not None:
            if (gdf_id_list == []) and id_column is not None:
                gdf_id_list = np.unique(data[:, id_column])

            spiketrain_list = []
            for nid in gdf_id_list:
                selected_ids = self._get_selected_ids(nid, id_column,
                                                      time_column, t_start,
                                                      t_stop, time_unit, data)
                times = data[selected_ids[0]:selected_ids[1], time_column]
                spiketrain_list.append(SpikeTrain(

                    times, units=time_unit,
                    t_start=t_start, t_stop=t_stop,
                    id=nid, **args))

        # if id_column is not given, all spike times are collected in one
        #  spike train with id=None
        else:
            train = data[:, time_column]
            spiketrain_list = [SpikeTrain(train, units=time_unit,
                                          t_start=t_start, t_stop=t_stop,
                                          id=None, **args)]
        return spiketrain_list

    def _check_input_times(self, t_start, t_stop, mandatory=True):
        """
        Checks input times for existence and setting default values if
        necessary.

        t_start: pq.quantity.Quantity, start time of the time range to load.
        t_stop: pq.quantity.Quantity, stop time of the time range to load.
        mandatory: bool, if True times can not be None and an error will be
                raised. if False, time values of None will be replaced by
                -infinity or infinity, respectively. default: True.
        """
        if t_stop is None:
            if mandatory:
                raise ValueError('No t_start specified.')
            else:
                t_stop = np.inf * pq.s
        if t_start is None:
            if mandatory:
                raise ValueError('No t_stop specified.')
            else:
                t_start = -np.inf * pq.s

        for time in (t_start, t_stop):
            if not isinstance(time, pq.quantity.Quantity):
                raise TypeError('Time value (%s) is not a quantity.' % time)
        return t_start, t_stop

    def _check_input_values_parameters(self, value_columns, value_types,
                                       value_units):
        """
        Checks value parameters for consistency.

        value_columns: int, column id containing the value to load.
        value_types: list of strings, type of values.
        value_units: list of units of the value columns.

        Returns
        adjusted list of [value_columns, value_types, value_units]
        """
        if value_columns is None:
            raise ValueError('No value column provided.')
        if isinstance(value_columns, int):
            value_columns = [value_columns]
        if value_types is None:
            value_types = ['no type'] * len(value_columns)
        elif isinstance(value_types, str):
            value_types = [value_types]

        # translating value types into units as far as possible
        if value_units is None:
            short_value_types = [vtype.split('_')[0] for vtype in value_types]
            if not all([svt in value_type_dict for svt in short_value_types]):
                raise ValueError('Can not interpret value types '
                                 '"%s"' % value_types)
            value_units = [value_type_dict[svt] for svt in short_value_types]

        # checking for same number of value types, units and columns
        if not (len(value_types) == len(value_units) == len(value_columns)):
            raise ValueError('Length of value types, units and columns does '
                             'not match (%i,%i,%i)' % (len(value_types),
                                                       len(value_units),
                                                       len(value_columns)))
        if not all([isinstance(vunit, pq.UnitQuantity) for vunit in
                    value_units]):
            raise ValueError('No value unit or standard value type specified.')

        return value_columns, value_types, value_units

    def _check_input_gids(self, gid_list, id_column):
        """
        Checks gid values and column for consistency.

        gid_list: list of int or None, gid to load.
        id_column: int, id of the column containing the gids.

        Returns
        adjusted list of [gid_list, id_column].
        """
        if gid_list is None:
            gid_list = [gid_list]

        if None in gid_list and id_column is not None:
            raise ValueError('No neuron IDs specified but file contains '
                             'neuron IDs in column %s. Specify empty list to '
                             'retrieve spiketrains of all neurons.'
                             '' % str(id_column))

        if gid_list != [None] and id_column is None:
            raise ValueError('Specified neuron IDs to be %s, but no ID column '
                             'specified.' % gid_list)
        return gid_list, id_column

    def _check_input_sampling_period(self, sampling_period, time_column,
                                     time_unit, data):
        """
        Checks sampling period, times and time unit for consistency.

        sampling_period: pq.quantity.Quantity, sampling period of data to load.
        time_column: int, column id of times in data to load.
        time_unit: pq.quantity.Quantity, unit of time used in the data to load.
        data: numpy array, the data to be loaded / interpreted.

        Returns
        pq.quantities.Quantity object, the updated sampling period.
        """
        if sampling_period is None:
            if time_column is not None:
                data_sampling = np.unique(
                    np.diff(sorted(np.unique(data[:, 1]))))
                if len(data_sampling) > 1:
                    raise ValueError('Different sampling distances found in '
                                     'data set (%s)' % data_sampling)
                else:
                    dt = data_sampling[0]
            else:
                raise ValueError('Can not estimate sampling rate without time '
                                 'column id provided.')
            sampling_period = pq.CompoundUnit(str(dt) + '*'
                                              + time_unit.units.u_symbol)
        elif not isinstance(sampling_period, pq.UnitQuantity):
            raise ValueError("sampling_period is not specified as a unit.")
        return sampling_period

    def _get_conditions_and_sorting(self, id_column, time_column, gid_list,
                                    t_start, t_stop):
        """
        Calculates the condition, condition_column and sorting_column based on
        other parameters supplied for loading the data.

        id_column: int, id of the column containing gids.
        time_column: int, id of the column containing times.
        gid_list: list of int, gid to be loaded.
        t_start: pq.quantity.Quantity, start of the time range to be loaded.
        t_stop: pq.quantity.Quantity, stop of the time range to be loaded.

        Returns
        updated [condition, condition_column, sorting_column].
        """
        condition, condition_column = None, None
        sorting_column = []
        curr_id = 0
        if ((gid_list != [None]) and (gid_list is not None)):
            if gid_list != []:
                def condition(x):
                    return x in gid_list

                condition_column = id_column
            sorting_column.append(curr_id)  # Sorting according to gids first
            curr_id += 1
        if time_column is not None:
            sorting_column.append(curr_id)  # Sorting according to time
            curr_id += 1
        elif t_start != -np.inf and t_stop != np.inf:
            warnings.warn('Ignoring t_start and t_stop parameters, because no '
                          'time column id is provided.')
        if sorting_column == []:
            sorting_column = None
        else:
            sorting_column = sorting_column[::-1]
        return condition, condition_column, sorting_column

    def _get_selected_ids(self, gid, id_column, time_column, t_start, t_stop,
                          time_unit, data):
        """
        Calculates the data range to load depending on the selected gid
        and the provided time range (t_start, t_stop)

        gid: int, gid to be loaded.
        id_column: int, id of the column containing gids.
        time_column: int, id of the column containing times.
        t_start: pq.quantity.Quantity, start of the time range to load.
        t_stop: pq.quantity.Quantity, stop of the time range to load.
        time_unit: pq.quantity.Quantity, time unit of the data to load.
        data: numpy array, data to load.

        Returns
        list of selected gids
        """
        gid_ids = np.array([0, data.shape[0]])
        if id_column is not None:
            gid_ids = np.array([np.searchsorted(data[:, 0], gid, side='left'),
                                np.searchsorted(data[:, 0], gid, side='right')])
        gid_data = data[gid_ids[0]:gid_ids[1], :]

        # select only requested time range
        id_shifts = np.array([0, 0])
        if time_column is not None:
            id_shifts[0] = np.searchsorted(gid_data[:, 1],
                                           t_start.rescale(
                                               time_unit).magnitude,
                                           side='left')
            id_shifts[1] = (np.searchsorted(gid_data[:, 1],
                                            t_stop.rescale(
                                                time_unit).magnitude,
                                            side='left') - gid_data.shape[0])

        selected_ids = gid_ids + id_shifts
        return selected_ids

    def read_block(self, gid_list=None, time_unit=pq.ms, t_start=None,
                   t_stop=None, sampling_period=None, id_column_dat=0,
                   time_column_dat=1, value_columns_dat=2,
                   id_column_gdf=0, time_column_gdf=1, value_types=None,
                   value_units=None, lazy=False):
        assert not lazy, 'Do not support lazy'

        seg = self.read_segment(gid_list, time_unit, t_start,
                                t_stop, sampling_period, id_column_dat,
                                time_column_dat, value_columns_dat,
                                id_column_gdf, time_column_gdf, value_types,
                                value_units)
        blk = Block(file_origin=seg.file_origin, file_datetime=seg.file_datetime)
        blk.segments.append(seg)
        seg.block = blk
        return blk

    def read_segment(self, gid_list=None, time_unit=pq.ms, t_start=None,
                     t_stop=None, sampling_period=None, id_column_dat=0,
                     time_column_dat=1, value_columns_dat=2,
                     id_column_gdf=0, time_column_gdf=1, value_types=None,
                     value_units=None, lazy=False):
        """
        Reads a Segment which contains SpikeTrain(s) with specified neuron IDs
        from the GDF data.

        Arguments
        ----------
        gid_list : list, default: None
            A list of GDF IDs of which to return SpikeTrain(s). gid_list must
            be specified if the GDF file contains neuron IDs, the default None
            then raises an error. Specify an empty list [] to retrieve the spike
            trains of all neurons.
        time_unit : Quantity (time), optional, default: quantities.ms
            The time unit of recorded time stamps in DAT as well as GDF files.
        t_start : Quantity (time), optional, default: 0 * pq.ms
            Start time of SpikeTrain.
        t_stop : Quantity (time), default: None
            Stop time of SpikeTrain. t_stop must be specified, the default None
            raises an error.
        sampling_period : Quantity (frequency), optional, default: None
            Sampling period of the recorded data.
        id_column_dat : int, optional, default: 0
            Column index of neuron IDs in the DAT file.
        time_column_dat : int, optional, default: 1
            Column index of time stamps in the DAT file.
        value_columns_dat : int, optional, default: 2
            Column index of the analog values recorded in the DAT file.
        id_column_gdf : int, optional, default: 0
            Column index of neuron IDs in the GDF file.
        time_column_gdf : int, optional, default: 1
            Column index of time stamps in the GDF file.
        value_types : str, optional, default: None
            Nest data type of the analog values recorded, eg.'V_m', 'I', 'g_e'
        value_units : Quantity (amplitude), default: None
            The physical unit of the recorded signal values.
        lazy : bool, optional, default: False

        Returns
        -------
        seg : Segment
            The Segment contains one SpikeTrain and one AnalogSignal for
            each ID in gid_list.
        """
        assert not lazy, 'Do not support lazy'

        if isinstance(gid_list, tuple):
            if gid_list[0] > gid_list[1]:
                raise ValueError('The second entry in gid_list must be '
                                 'greater or equal to the first entry.')
            gid_list = range(gid_list[0], gid_list[1] + 1)

        # __read_xxx() needs a list of IDs
        if gid_list is None:
            gid_list = [None]

        # create an empty Segment
        seg = Segment(file_origin=",".join(self.filenames))
        seg.file_datetime = datetime.fromtimestamp(os.stat(self.filenames[0]).st_mtime)
        # todo: rather than take the first file for the timestamp, we should take the oldest
        #       in practice, there won't be much difference

        # Load analogsignals and attach to Segment
        if 'dat' in self.avail_formats:
            seg.analogsignals = self.__read_analogsignals(
                gid_list,
                time_unit,
                t_start,
                t_stop,
                sampling_period=sampling_period,
                id_column=id_column_dat,
                time_column=time_column_dat,
                value_columns=value_columns_dat,
                value_types=value_types,
                value_units=value_units)
        if 'gdf' in self.avail_formats:
            seg.spiketrains = self.__read_spiketrains(
                gid_list,
                time_unit,
                t_start,
                t_stop,
                id_column=id_column_gdf,
                time_column=time_column_gdf)

        return seg

    def read_analogsignal(self, gid=None, time_unit=pq.ms, t_start=None,
                          t_stop=None, sampling_period=None, id_column=0,
                          time_column=1, value_column=2, value_type=None,
                          value_unit=None, lazy=False):
        """
        Reads an AnalogSignal with specified neuron ID from the DAT data.

        Arguments
        ----------
        gid : int, default: None
            The GDF ID of the returned SpikeTrain. gdf_id must be specified if
            the GDF file contains neuron IDs, the default None then raises an
            error. Specify an empty list [] to retrieve the spike trains of all
            neurons.
        time_unit : Quantity (time), optional, default: quantities.ms
            The time unit of recorded time stamps.
        t_start : Quantity (time), optional, default: 0 * pq.ms
            Start time of SpikeTrain.
        t_stop : Quantity (time), default: None
            Stop time of SpikeTrain. t_stop must be specified, the default None
            raises an error.
        sampling_period : Quantity (frequency), optional, default: None
            Sampling period of the recorded data.
        id_column : int, optional, default: 0
            Column index of neuron IDs.
        time_column : int, optional, default: 1
            Column index of time stamps.
        value_column : int, optional, default: 2
            Column index of the analog values recorded.
        value_type : str, optional, default: None
            Nest data type of the analog values recorded, eg.'V_m', 'I', 'g_e'.
        value_unit : Quantity (amplitude), default: None
            The physical unit of the recorded signal values.
        lazy : bool, optional, default: False

        Returns
        -------
        spiketrain : SpikeTrain
            The requested SpikeTrain object with an annotation 'id'
            corresponding to the gdf_id parameter.
        """
        assert not lazy, 'Do not support lazy'

        # __read_spiketrains() needs a list of IDs
        return self.__read_analogsignals([gid], time_unit,
                                         t_start, t_stop,
                                         sampling_period=sampling_period,
                                         id_column=id_column,
                                         time_column=time_column,
                                         value_columns=value_column,
                                         value_types=value_type,
                                         value_units=value_unit)[0]

    def read_spiketrain(
            self, gdf_id=None, time_unit=pq.ms, t_start=None, t_stop=None,
            id_column=0, time_column=1, lazy=False, **args):
        """
        Reads a SpikeTrain with specified neuron ID from the GDF data.

        Arguments
        ----------
        gdf_id : int, default: None
            The GDF ID of the returned SpikeTrain. gdf_id must be specified if
            the GDF file contains neuron IDs. Providing [] loads all available
            IDs.
        time_unit : Quantity (time), optional, default: quantities.ms
            The time unit of recorded time stamps.
        t_start : Quantity (time), default: None
            Start time of SpikeTrain. t_start must be specified.
        t_stop : Quantity (time), default: None
            Stop time of SpikeTrain. t_stop must be specified.
        id_column : int, optional, default: 0
            Column index of neuron IDs.
        time_column : int, optional, default: 1
            Column index of time stamps.
        lazy : bool, optional, default: False

        Returns
        -------
        spiketrain : SpikeTrain
            The requested SpikeTrain object with an annotation 'id'
            corresponding to the gdf_id parameter.
        """
        assert not lazy, 'Do not support lazy'

        if (not isinstance(gdf_id, int)) and gdf_id is not None:
            raise ValueError('gdf_id has to be of type int or None.')

        if gdf_id is None and id_column is not None:
            raise ValueError('No neuron ID specified but file contains '
                             'neuron IDs in column ' + str(id_column) + '.')

        return self.__read_spiketrains([gdf_id], time_unit,
                                       t_start, t_stop,
                                       id_column, time_column,
                                       **args)[0]


class ColumnIO:
    '''
    Class for reading an ASCII file containing multiple columns of data.
    '''

    def __init__(self, filename):
        """
        filename: string, path to ASCII file to read.
        """

        self.filename = filename

        # read the first line to check the data type (int or float) of the data
        f = open(self.filename)
        line = f.readline()

        additional_parameters = {}
        if '.' not in line:
            additional_parameters['dtype'] = np.int32

        self.data = np.loadtxt(self.filename, **additional_parameters)

        if len(self.data.shape) == 1:
            self.data = self.data[:, np.newaxis]

    def get_columns(self, column_ids='all', condition=None,
                    condition_column=None, sorting_columns=None):
        """
        column_ids : 'all' or list of int, the ids of columns to
                    extract.
        condition : None or function, which is applied to each row to evaluate
                    if it should be included in the result.
                    Needs to return a bool value.
        condition_column : int, id of the column on which the condition
                    function is applied to
        sorting_columns : int or list of int, column ids to sort by.
                    List entries have to be ordered by increasing sorting
                    priority!

        Returns
        -------
        numpy array containing the requested data.
        """

        if column_ids == [] or column_ids == 'all':
            column_ids = range(self.data.shape[-1])

        if isinstance(column_ids, (int, float)):
            column_ids = [column_ids]
        column_ids = np.array(column_ids)

        if column_ids is not None:
            if max(column_ids) >= len(self.data) - 1:
                raise ValueError('Can not load column ID %i. File contains '
                                 'only %i columns' % (max(column_ids),
                                                      len(self.data)))

        if sorting_columns is not None:
            if isinstance(sorting_columns, int):
                sorting_columns = [sorting_columns]
            if (max(sorting_columns) >= self.data.shape[1]):
                raise ValueError('Can not sort by column ID %i. File contains '
                                 'only %i columns' % (max(sorting_columns),
                                                      self.data.shape[1]))

        # Starting with whole dataset being selected for return
        selected_data = self.data

        # Apply filter condition to rows
        if condition and (condition_column is None):
            raise ValueError('Filter condition provided, but no '
                             'condition_column ID provided')
        elif (condition_column is not None) and (condition is None):
            warnings.warn('Condition column ID provided, but no condition '
                          'given. No filtering will be performed.')

        elif (condition is not None) and (condition_column is not None):
            condition_function = np.vectorize(condition)
            mask = condition_function(
                selected_data[:, condition_column]).astype(bool)
            selected_data = selected_data[mask, :]

        # Apply sorting if requested
        if sorting_columns is not None:
            values_to_sort = selected_data[:, sorting_columns].T
            ordered_ids = np.lexsort(tuple(values_to_sort[i] for i in
                                           range(len(values_to_sort))))
            selected_data = selected_data[ordered_ids, :]

        # Select only requested columns
        selected_data = selected_data[:, column_ids]

        return selected_data