{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os, json, h5py, time\n", "import numpy as np\n", "from scipy import signal" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Pack a particular session\n", "#h5name = pack('sessions\\\\47_aSIT_2021-07-31_19-05-54')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def pack(session_path):\n", " \"\"\"\n", " Pack independent tracking datasets into a single HDF5 file.\n", " \n", " File has the following structure:\n", " \n", " /raw\n", " /positions - raw positions from .csv\n", " /events - raw events from .csv\n", " /sounds - raw sounds from .csv\n", " /processed\n", " /timeline - matrix of [time, x, y, speed] sampled at 50Hz, data is smoothed with gaussian kernel\n", " /trial_idxs - matrix of trial indices to timeline\n", " /sound_idxs - matrix of sound indices to timeline\n", " \n", " each dataset has an attribute 'headers' with the description of columns.\n", " \"\"\"\n", " params_file = [x for x in os.listdir(session_path) if x.endswith('.json')][0]\n", "\n", " with open(os.path.join(session_path, params_file)) as json_file:\n", " parameters = json.load(json_file)\n", " \n", " h5name = os.path.join(session_path, '%s.h5' % params_file.split('.')[0])\n", " with h5py.File(h5name, 'w') as f: # overwrite mode\n", "\n", "\n", " # -------- save raw data ------------\n", " raw = f.create_group('raw')\n", " raw.attrs['parameters'] = json.dumps(parameters)\n", "\n", " for ds_name in ['positions', 'events', 'sounds']:\n", " filename = os.path.join(session_path, '%s.csv' % ds_name)\n", " with open(filename) as ff:\n", " headers = ff.readline()\n", " data = np.loadtxt(filename, delimiter=',', skiprows=1)\n", "\n", " ds = raw.create_dataset(ds_name, data=data)\n", " ds.attrs['headers'] = headers\n", "\n", "\n", " # -------- save processed ------------\n", " proc = f.create_group('processed')\n", "\n", " positions = np.array(f['raw']['positions'])\n", "\n", " # TODO remove outliers - position jumps over 20cm?\n", " #diffs_x = np.diff(positions[:, 1])\n", " #diffs_y = np.diff(positions[:, 2])\n", " #dists = np.sqrt(diffs_x**2 + diffs_y**2)\n", " #np.where(dists > 0.2 / pixel_size)[0]\n", "\n", " # convert timeline to 100 Hz\n", " time_freq = 100 # at 100Hz\n", " s_start, s_end = positions[:, 0][0], positions[:, 0][-1]\n", " times = np.linspace(s_start, s_end, int((s_end - s_start) * time_freq))\n", " pos_at_freq = np.zeros((len(times), 3))\n", "\n", " curr_idx = 0\n", " for i, t in enumerate(times):\n", " if curr_idx < len(positions) - 1 and \\\n", " np.abs(t - positions[:, 0][curr_idx]) > np.abs(t - positions[:, 0][curr_idx + 1]):\n", " curr_idx += 1\n", " pos_at_freq[i] = (t, positions[curr_idx][1], positions[curr_idx][2])\n", "\n", " # make time from session start\n", " pos_at_freq[:, 0] = pos_at_freq[:, 0] - pos_at_freq[0][0]\n", "\n", " # convert positions from pixels to meters and center\n", " #arena_d = parameters['position']['arena_diameter']\n", " arena_d = 0.92\n", " pixel_size = arena_d / (2 * float(parameters['position']['arena_radius']))\n", " pos_at_freq[:, 1] = (parameters['position']['arena_x'] - pos_at_freq[:, 1]) * pixel_size\n", " pos_at_freq[:, 2] = (parameters['position']['arena_y'] - pos_at_freq[:, 2]) * pixel_size\n", "\n", " width = 100 # 100 points ~= 1 sec with at 100Hz\n", " kernel = signal.gaussian(width, std=(width) / 7.2)\n", "\n", " x_smooth = np.convolve(pos_at_freq[:, 1], kernel, 'same') / kernel.sum()\n", " y_smooth = np.convolve(pos_at_freq[:, 2], kernel, 'same') / kernel.sum()\n", "\n", " # speed\n", " dx = np.sqrt(np.square(np.diff(x_smooth)) + np.square(np.diff(y_smooth)))\n", " dt = np.diff(pos_at_freq[:, 0])\n", " speed = np.concatenate([dx/dt, [dx[-1]/dt[-1]]])\n", "\n", " timeline = proc.create_dataset('timeline', data=np.column_stack([pos_at_freq[:, 0], x_smooth, y_smooth, speed]))\n", " timeline.attrs['headers'] = 'time, x, y, speed'\n", "\n", " # save trials\n", " events = np.array(f['raw']['events'])\n", " events[:, 0] = events[:, 0] - s_start\n", "\n", " t_count = len(np.unique(events[events[:, 5] != 0][:, 4]))\n", " trials = np.zeros((t_count, 6))\n", " for i in range(t_count):\n", " t_start_idx = (np.abs(pos_at_freq[:, 0] - events[2*i][0])).argmin()\n", " t_end_idx = (np.abs(pos_at_freq[:, 0] - events[2*i + 1][0])).argmin()\n", " x_in_m = (parameters['position']['arena_x'] - events[2*i][1]) * pixel_size\n", " y_in_m = (parameters['position']['arena_y'] - events[2*i][2]) * pixel_size\n", " r_in_m = events[2*i][3] * pixel_size\n", " state = 0 if events[2*i + 1][5] > 1 else 1\n", "\n", " trials[i] = (t_start_idx, t_end_idx, x_in_m, y_in_m, r_in_m, state)\n", "\n", " trial_idxs = proc.create_dataset('trial_idxs', data=trials)\n", " trial_idxs.attrs['headers'] = 't_start_idx, t_end_idx, target_x, target_y, target_r, fail_or_success'\n", "\n", " # save sounds\n", " sounds = np.array(f['raw']['sounds'])\n", " sounds[:, 0] = sounds[:, 0] - s_start\n", "\n", " sound_idxs = np.zeros((len(sounds), 2))\n", " left_idx = 0\n", " delta = 10**5\n", " for i in range(len(sounds)):\n", " while left_idx < len(pos_at_freq) and \\\n", " np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx]) < delta:\n", " delta = np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx])\n", " left_idx += 1\n", "\n", " sound_idxs[i] = (left_idx, sounds[i][1])\n", " delta = 10**5\n", "\n", " sound_idxs = proc.create_dataset('sound_idxs', data=sound_idxs)\n", " sound_idxs.attrs['headers'] = 'timeline_idx, sound_id'\n", " \n", " return h5name" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 4 }