Browse Source

extract stimulation script runs

Paul Pfeiffer 5 years ago
commit
dd23450096
7 changed files with 635 additions and 0 deletions
  1. 1 0
      .gitignore
  2. 25 0
      makefile
  3. 10 0
      metadata/cells.csv
  4. 122 0
      scripts/extract_stimulations.py
  5. 0 0
      scripts/tools/__init__.py
  6. 54 0
      scripts/tools/helper.py
  7. 423 0
      scripts/tools/pyrelacs.py

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+out/

+ 25 - 0
makefile

@@ -0,0 +1,25 @@
+all: extract_stimulations assign_protocols filter analyse_firing_rates analyse_persistent_activity plot_firing_rates plot_traces report
+
+extract_stimulations:
+	python scripts/extract_stimulations.py
+
+assign_protocols:
+	python extract_protocols.py
+
+filter:
+	python filter_stimulations.py
+
+analyse_firing_rates:
+	python analyse_firing_rates.py
+
+analyse_persistent_activity:
+	python analyse_persistent_activity.py
+
+plot_firing_rates:
+	python plot_traces.py
+
+plot_traces:
+	python plot_traces.py
+
+report:
+	python generate_report.py

+ 10 - 0
metadata/cells.csv

@@ -0,0 +1,10 @@
+CellId, Recording
+20170216_S1_C1, 2017-02-16-ac
+20170216_S1_C1, 2017-02-16-ad
+20170217_S1_C1, 2017-02-17-aa
+20170217_S1_C1, 2017-02-17-ab
+20170217_S1_C1, 2017-02-17-ac
+20170217_S1_C1, 2017-02-17-ad
+20170217_S1_C1, 2017-02-17-ae
+20170217_S1_C1, 2017-02-17-af
+20170217_S2_C1, 2017-02-17-ag

+ 122 - 0
scripts/extract_stimulations.py

@@ -0,0 +1,122 @@
+import re
+
+import pandas as pd
+
+import tools.pyrelacs as pr
+from tools.helper import stimulation_metadata_raw
+
+
+def extract_value_unit(value_unit_string):
+    value_unit_pattern = re.compile('(\d+[e][+]\d*|[-+]?\d*\.\d+|\d+)\s*(\w+)')
+    value, unit = value_unit_pattern.match(value_unit_string).groups()
+    return float(value), unit
+
+
+def convert_to_sec(time_value, time_unit):
+    if time_unit == "ms":
+        time_value *= 0.001
+    return time_value
+
+
+overview_of_recordings = "metadata/cells.csv"
+test_protocol_for_pa = "SingleStimulus"
+
+print "# Extract stimulation protocols"
+print "Search the recordings listed in {}".format(overview_of_recordings)
+
+print "Collect all instances of the stimulation protocol {} used to test for persistent activity".format(
+    test_protocol_for_pa)
+
+cell_recordings = pd.read_csv(overview_of_recordings, header=0)
+
+cell_recordings.columns = cell_recordings.columns.str.strip()
+for column in cell_recordings.columns:
+    cell_recordings[column] = cell_recordings[column].str.strip()
+
+cells = cell_recordings["CellId"].unique()
+stimulation_id = []
+cell_ids = []
+recordings = []
+before_protocols = []
+after_protocols = []
+stimulus_lengths = []
+pulse_periods = []
+pulse_lengths = []
+pulse_amplitudes = []
+pulse_offsets = []
+dts = []
+start_indices = []
+end_indices = []
+
+parameter_dict={}
+
+before_stimulation_interval = 2
+after_stimulation_interval = 8
+
+for cell in cells:
+    recording_counter = 0
+    for _, recording in cell_recordings[cell_recordings["CellId"] == cell].iterrows():
+        cell_id = cell
+        recording_dir = "recordings/"+recording["Recording"]
+        for info, key, dat in pr.iload('%s/stimuli.dat' % (recording_dir,)):
+            if info[1]['RePro'] != test_protocol_for_pa:
+                continue
+            repro_starting_idx = dat[1, 0]
+            left_appendum, right_appendum = pr.get_trace_indices(info, before_stimulation_interval,
+                                                                 after_stimulation_interval)
+            dt = pr.get_sampling_interval(info)
+
+            stimulation_id.append("{:s}_R{:03d}".format(cell_id, recording_counter))
+            cell_ids.append(cell_id)
+            recordings.append(recording_dir)
+            available_model_parameters = map(lambda key: info[0][key], filter(lambda key: key.startswith("identifier") and key != "identifier1", info[0].keys()))
+            parameter_values = [dat[0][key[1].index(param)+1] for param in available_model_parameters]
+            for param, value in zip(available_model_parameters, parameter_values):
+                try:
+                    parameter_dict[param].append(value)
+                except KeyError as err:
+                    parameter_dict[param] = [value]
+            dts.append(dt)
+
+            stimulus_length, unit = extract_value_unit(info[1]["duration"])
+            stimulus_length = convert_to_sec(stimulus_length, unit)
+            pulse_period, unit = extract_value_unit(info[1]["period"])
+            pulse_period = convert_to_sec(pulse_period, unit)
+            pulse_length, unit = extract_value_unit(info[1]["pulseduration"])
+            pulse_length = convert_to_sec(pulse_length, unit)
+            pulse_amplitude, _ = extract_value_unit(info[1]["amplitude"])
+            pulse_offset, _ = extract_value_unit(info[1]["offset"])
+
+            before_protocols.append(before_stimulation_interval)
+            after_protocols.append(after_stimulation_interval)
+            stimulus_lengths.append(stimulus_length)
+            pulse_periods.append(pulse_period)
+            pulse_lengths.append(pulse_length)
+            pulse_amplitudes.append(pulse_amplitude)
+            pulse_offsets.append(pulse_offset)
+            start_indices.append(repro_starting_idx - left_appendum)
+            end_indices.append(repro_starting_idx + right_appendum)
+            recording_counter += 1
+
+dictionary_of_stimulations = {
+    "stimulation_id": stimulation_id,
+    "cell_id": cell_ids,
+    "recording": recordings,
+    "dt": dts,
+    "before_protocol": before_protocols,
+    "after_protocol": after_protocols,
+    "stimulus_length": stimulus_lengths,
+    "pulse_period": pulse_periods,
+    "pulse_length": pulse_lengths,
+    "pulse_amplitude": pulse_amplitudes,
+    "pulse_offset": pulse_offsets,
+    "start_index": start_indices,
+    "end_index": end_indices
+}
+
+dictionary_of_stimulations.update(parameter_dict)
+
+pd.DataFrame(data=dictionary_of_stimulations).set_index("stimulation_id").to_csv(path_or_buf=stimulation_metadata_raw)
+
+print "Gathered meta data for each stimulation and saved to {}".format(stimulation_metadata_raw)
+print ""

+ 0 - 0
scripts/tools/__init__.py


+ 54 - 0
scripts/tools/helper.py

@@ -0,0 +1,54 @@
+import numpy as np
+
+import pyrelacs as pr
+
+
+def get_times(stimulation):
+    times = np.arange(-stimulation["before_protocol"], stimulation["stimulus_length"]+stimulation["after_protocol"],
+                      stimulation["dt"])
+    return times
+
+
+def get_trace(stimulation, trace_id):
+    available_traces = pr.get_available_traces(stimulation["recording"])
+    trace_ids = [available_trace["identifier"] for available_trace in
+                 available_traces]
+    trace_index = trace_ids.index(trace_id)
+
+    trace_file = open("{}/{}".format(stimulation["recording"], available_traces[trace_index]["data_file"]))
+    is_current = "Current" in trace_id
+    is_affected_recording = stimulation.name.startswith("20170217")
+    scale_correction = 1.0 if not (is_current and is_affected_recording) else 0.1
+    return scale_correction * pr.read_from_trace_file(trace_file, stimulation["start_index"],
+                                   stimulation["end_index"] - stimulation["start_index"])
+
+
+def get_traces(stimulation):
+    available_trace_ids = [available_trace["identifier"] for available_trace in
+                           pr.get_available_traces(stimulation["recording"])]
+    traces = [get_trace(stimulation, trace_id) for trace_id in available_trace_ids]
+    return traces
+
+
+stimulation_metadata_raw = "stimulations_raw.csv"
+stimulation_metadata_extended = "stimulations_analysed.csv"
+stimulation_metadata_filtered = "stimulations_analysed_and_filtered.csv"
+
+
+def get_filters(stimulations):
+    filter_columnns = filter(lambda key: key.startswith("filter"), stimulations.keys())
+    combined_filters = reduce(lambda filtering_status, filtering_column: filtering_status & stimulations[filtering_column] == True, filter_columnns, True)
+    return combined_filters
+
+
+def get_pulse_times(stimulation):
+    pulse_length = stimulation["pulse_length"]
+    pulse_period = stimulation["pulse_period"]
+
+    return [{"start": (pulse_idx - 1) * pulse_period,
+             "pulse_end": (pulse_idx - 1) * pulse_period + pulse_length,
+             "end": pulse_idx * pulse_period} for pulse_idx in range(1, stimulation["pulse_number"] + 1)]
+
+
+pulse_mean_string = "p_{:d}_ISI_mean"
+after_pulse_mean_string = "ap_{:d}_ISI_mean"

+ 423 - 0
scripts/tools/pyrelacs.py

@@ -0,0 +1,423 @@
+from os import path
+
+try:
+    from itertools import izip
+except:
+    izip = zip
+import types
+from numpy import array, arange, NaN, fromfile, float32, asarray, squeeze, isnan, fromstring
+from numpy.core.records import fromarrays
+# import nixio as nix
+import re
+import warnings
+
+identifiers = {
+    'stimspikes1.dat': lambda info: ('RePro' in info[-1] and info[-1]['RePro'] == 'FileStimulus'),
+    'samallspikes1.dat': lambda info: ('RePro' in info[-1] and info[-1]['RePro'] == 'SAM'),
+}
+
+
+def isfloat(value):
+    try:
+        float(value)
+        return True
+    except ValueError:
+        return False
+
+
+def info_filter(iter, filterfunc):
+    for info, key, dat in iter:
+        if filterfunc(info):
+            yield info, key, dat
+
+
+def iload_io_pairs(basedir, spikefile, traces, filterfunc=None):
+    """
+    Iterator that returns blocks of spike traces and spike times from the base directory basedir (e.g. 2014-06-06-aa)
+    and the spiketime file (e.g. stimspikes1.dat). A filter function can filter out unwanted blocks. It gets the info
+    (see iload and iload trace_trials) from all traces and spike times and has to return True is the block is wanted
+     and False otherwise.
+
+     :param basedir: basis directory of the recordings (e.g. 2014-06-06-aa)
+     :param spikefile: spikefile (e.g. stimspikes1.dat)
+     :param traces: trace numbers as a list (e.g. [1,2])
+     :param filterfunc: function that gets the infos from all traces and spike times and indicates whether the block is wanted or not
+    """
+
+    if filterfunc is None: filterfunc = lambda inp: True
+
+    if type(traces) is not types.ListType:
+        traces = [traces]
+
+    assert spikefile in identifiers, """iload_io_pairs does not know how to identify trials in stimuli.dat which
+                                        correspond to trials in {0}. Please update pyRELACS.DataLoader.identifiers
+                                        accordingly""".format(spikefile)
+    iterators = [info_filter(iload_trace_trials(basedir, tn), identifiers[spikefile]) for tn in traces] \
+                + [iload_spike_blocks(basedir + '/' + spikefile)]
+
+    for stuff in izip(*iterators):
+        info, key, dat = zip(*stuff)
+        if filterfunc(*info):
+            yield info, key, dat
+
+
+def iload_spike_blocks(filename):
+    """
+    Loades spike times from filename and merges trials with incremental trial numbers into one block.
+    Spike times are assumed to be in seconds and are converted into ms.
+    """
+    current_trial = -1
+    ret_dat = []
+    old_info = old_key = None
+    for info, key, dat in iload(filename):
+        if 'trial' in info[-1]:
+            if int(info[-1]['trial']) != current_trial + 1:
+                yield old_info[:-1], key, ret_dat
+                ret_dat = []
+
+            current_trial = int(info[-1]['trial'])
+            if not any(isnan(dat)):
+                ret_dat.append(squeeze(dat) / 1000.)
+            else:
+                ret_dat.append(array([]))
+            old_info, old_key = info, key
+
+        else:
+            if len(ret_dat) > 0:
+                yield old_info[:-1], old_key, ret_dat
+                ret_dat = []
+            yield info, key, dat
+    else:
+        if len(ret_dat) > 0:
+            yield old_info[:-1], old_key, ret_dat
+
+
+def iload_trace_trials(basedir, trace_no=1, before=0.0, after=0.0):
+    """
+    returns:
+    info : metadata from stimuli.dat
+    key : key from stimuli.dat
+    data : the data of the specified trace of all trials
+    """
+    x = fromfile('%s/trace-%i.raw' % (basedir, trace_no), float32)
+    p = re.compile('([-+]?\d*\.\d+|\d+)\s*(\w+)')
+
+    for info, key, dat in iload('%s/stimuli.dat' % (basedir,)):
+        X = []
+        val, unit = p.match(info[-1]['duration']).groups()
+        val = float(val)
+        if unit == 'ms':
+            val *= 0.001
+        duration_index = key[2].index('duration')
+
+        # if 'RePro' in info[1] and info[1]['RePro'] == 'FileStimulus':
+        #     embed()
+        #     exit()
+        sval, sunit = p.match(info[0]['sample interval%i' % trace_no]).groups()
+        sval = float(sval)
+        if sunit == 'ms':
+            sval *= 0.001
+
+        l = int(before / sval)
+        r = int((val + after) / sval)
+
+        if dat.shape == (1, 1) and dat[0, 0] == 0:
+            warnings.warn("iload_trace_trials: Encountered incomplete '-0' trial.")
+            yield info, key, array([])
+            continue
+
+        for col, duration in zip(asarray([e[trace_no - 1] for e in dat], dtype=int),
+                                 asarray([e[duration_index] for e in dat],
+                                         dtype=float32)):  # dat[:,trace_no-1].astype(int):
+            tmp = x[col - l:col + r]
+
+            if duration < 0.001:  # if the duration is less than 1ms
+                warnings.warn(
+                    "iload_trace_trials: Skipping one trial because its duration is <1ms and therefore it is probably rubbish")
+                continue
+
+            if len(X) > 0 and len(tmp) != len(X[0]):
+                warnings.warn("iload_trace_trials: Setting one trial to NaN because it appears to be incomplete!")
+                X.append(NaN * X[0])
+            else:
+                X.append(tmp)
+
+        yield info, key, asarray(X)
+
+
+def iload_traces(basedir, repro='', before=0.0, after=0.0):
+    """
+    returns:
+    info : metadata from stimuli.dat
+    key : key from stimuli.dat
+    time : an array for the time axis
+    data : the data of all traces of a single trial
+    """
+
+    # open traces files:
+
+    sf = get_raw_traces(basedir)
+    for info, key, dat in iload('%s/stimuli.dat' % (basedir,)):
+
+        if len(repro) > 0 and repro != info[1]['RePro']:
+            continue
+
+        start_idx, end_idx = get_trace_indices(info, before, after)
+
+        deltat = get_sampling_interval(info)
+
+        time = arange(0.0, start_idx + end_idx) * deltat - before
+
+        duration_index = key[2].index('duration')
+
+        if dat.shape == (1, 1) and dat[0, 0] == 0:
+            warnings.warn("iload_traces: Encountered incomplete '-0' trial.")
+            yield info, key, array([])
+            continue
+
+        for d in dat:
+            duration = d[duration_index]
+            if duration < 0.001:  # if the duration is less than 1ms
+                warnings.warn(
+                    "iload_traces: Skipping one trial because its duration is <1ms and therefore it is probably rubbish")
+                continue
+
+            x = []
+            for trace in xrange(len(sf)):
+                col = int(d[trace])
+                from_idx = col - start_idx
+                indexes_to_read = (start_idx + end_idx)
+                trace_file = sf[trace]
+                tmp = read_from_trace_file(trace_file, from_idx, indexes_to_read)
+                if len(x) > 0 and len(tmp) != len(x[0]):
+                    warnings.warn("iload_traces: Setting one trial to NaN because it appears to be incomplete!")
+                    x.append(NaN * x[0])
+                else:
+                    x.append(tmp)
+
+            yield info, key, time, asarray(x)
+
+
+def get_sampling_interval(info):
+    p = re.compile('(\d+[e][+]\d*|[-+]?\d*\.\d+|\d+)\s*(\w+)')
+    deltat, unit = p.match(info[0]['sample interval1']).groups()
+    deltat = float(deltat)
+    if unit == 'ms':
+        deltat *= 0.001
+    return deltat
+
+
+def read_from_trace_file(trace_file, from_idx, to_idx):
+    trace_file.seek(int(from_idx) * 4)
+    buffer = trace_file.read(int(to_idx) * 4)
+    tmp = fromstring(buffer, float32)
+    return tmp
+
+
+def get_trace_indices(repro_info, before, after):
+    p = re.compile('(\d+[e][+]\d*|[-+]?\d*\.\d+|\d+)\s*(\w+)')
+    val, unit = p.match(repro_info[-1]['duration']).groups()
+    val = float(val)
+    if unit == 'ms':
+        val *= 0.001
+    sval, sunit = p.match(repro_info[0]['sample interval%i' % 1]).groups()
+    sval = float(sval)
+    if sunit == 'ms':
+        sval *= 0.001
+    l = int(before / sval)
+    r = int((val + after) / sval)
+    return l, r
+
+
+def get_raw_traces(basedir):
+    sf = []
+    for trace in xrange(1, 1000000):
+        if path.isfile('%s/trace-%i.raw' % (basedir, trace)):
+            sf.append(get_raw_trace(basedir, trace))
+        else:
+            break
+    return sf
+
+
+def get_raw_trace(basedir, trace):
+    return open('%s/trace-%i.raw' % (basedir, trace), 'rb')
+
+
+def get_available_traces(basedir):
+    stimulus_file = "{}/stimuli.dat".format(basedir)
+    traces_info = []
+    info_pattern = re.compile('#\s+([\w\s]*):\s+([\w+-.]*)')
+    key_pattern = re.compile('(\w*)(\d+)')
+    with open(stimulus_file, 'r') as fid:
+        read = False
+        for line in fid:
+            if "analog input traces" in line:
+                read = True
+                continue
+            if "event lists" in line:
+                break
+            if read:
+                key, value = map(lambda s: s.strip(), info_pattern.match(line).groups())
+                key = key.replace(" ", "_")
+                key_id, key_idx = key_pattern.match(key).groups()
+                key_idx = int(key_idx) - 1
+                if key_id == "identifier":
+                    traces_info.append({key_id: value})
+                else:
+                    traces_info[key_idx].update({key_id: value})
+
+    return traces_info
+
+
+def iload(filename):
+    meta_data = []
+    new_meta_data = []
+    key = []
+
+    within_key = within_meta_block = within_data_block = False
+    currkey = None
+    data = []
+
+    with open(filename, 'r') as fid:
+        for line in fid:
+
+            line = line.rstrip().lstrip()
+
+            if within_data_block and (line.startswith('#') or not line):
+                within_data_block = False
+
+                yield list(meta_data), tuple(key), array(data)
+                data = []
+
+            # Key parsing
+            if line.startswith('#Key'):
+                key = []
+                within_key = True
+                continue
+
+            if within_key:
+                if not line.startswith('#'):
+                    within_key = False
+                    if not line and key[-1][0] == '1':
+                        within_data_block = True
+                        n = len(new_meta_data)
+                        meta_data[-n:] = new_meta_data
+                        new_meta_data = []
+                        currkey = None
+                        continue
+
+                else:
+                    key.append(tuple([e.strip() for e in line[1:].split("  ") if len(e.strip()) > 0]))
+                    continue
+
+            # fast forward to first data point or meta data
+            if not line:
+                within_key = within_meta_block = False
+                currkey = None
+                continue
+            # meta data blocks
+            elif line.startswith('#'):  # cannot be a key anymore
+                if not within_meta_block:
+                    within_meta_block = True
+                    new_meta_data.append({})
+
+                if ':' in line:
+                    tmp = [e.rstrip().lstrip() for e in line[1:].split(':')]
+                elif '=' in line:
+                    tmp = [e.rstrip().lstrip() for e in line[1:].split('=')]
+                else:
+                    currkey = line[1:].rstrip().lstrip()
+                    new_meta_data[-1][currkey] = {}
+                    continue
+
+                if currkey is None:
+                    new_meta_data[-1][tmp[0]] = tmp[1]
+                else:
+                    new_meta_data[-1][currkey][tmp[0]] = tmp[1]
+            else:
+                if not within_data_block:
+                    within_data_block = True
+                    n = len(new_meta_data)
+                    meta_data[-n:] = new_meta_data
+                    new_meta_data = []
+                    currkey = None
+                    within_key = within_meta_block = False
+                data.append([float(e) if (e != '-0' and isfloat(e)) else NaN for e in line.split()])
+        else:  # if for loop is finished, return the data we have so far
+            if within_data_block and len(data) > 0:
+                yield list(meta_data), tuple(key), array(data)
+
+
+def recload(filename):
+    for meta, key, dat in iload(filename):
+        yield meta, fromarrays(dat.T, names=key[0])
+
+
+def load(filename):
+    """
+
+    Loads a data file saved by relacs. Returns a tuple of dictionaries
+    containing the data and the header information
+
+    :param filename: Filename of the data file.
+    :type filename: string
+    :returns:  a tuple of dictionaries containing the head information and the data.
+    :rtype: tuple
+
+    """
+    with open(filename, 'r') as fid:
+        L = [l.lstrip().rstrip() for l in fid.readlines()]
+
+    ret = []
+    dat = {}
+    X = []
+    keyon = False
+    currkey = None
+    for l in L:
+        # if empty line and we have data recorded
+        if (not l or l.startswith('#')) and len(X) > 0:
+            keyon = False
+            currkey = None
+            dat['data'] = array(X)
+            ret.append(dat)
+            X = []
+            dat = {}
+
+        if '---' in l:
+            continue
+        if l.startswith('#'):
+            if ":" in l:
+                tmp = [e.rstrip().lstrip() for e in l[1:].split(':')]
+                if currkey is None:
+                    dat[tmp[0]] = tmp[1]
+                else:
+                    dat[currkey][tmp[0]] = tmp[1]
+            elif "=" in l:
+                tmp = [e.rstrip().lstrip() for e in l[1:].split('=')]
+                if currkey is None:
+                    dat[tmp[0]] = tmp[1]
+                else:
+                    dat[currkey][tmp[0]] = tmp[1]
+            elif l[1:].lower().startswith('key'):
+                dat['key'] = []
+
+                keyon = True
+            elif keyon:
+
+                dat['key'].append(tuple([e.lstrip().rstrip() for e in l[1:].split()]))
+            else:
+                currkey = l[1:].rstrip().lstrip()
+                dat[currkey] = {}
+
+        elif l:  # if l != ''
+            keyon = False
+            currkey = None
+            X.append([float(e) for e in l.split()])
+
+    if len(X) > 0:
+        dat['data'] = array(X)
+    else:
+        dat['data'] = []
+    ret.append(dat)
+
+    return tuple(ret)