{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os, json, h5py, time\n",
    "import numpy as np\n",
    "from scipy import signal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pack a particular session\n",
    "#h5name = pack('sessions\\\\47_aSIT_2021-07-31_19-05-54')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pack(session_path):\n",
    "    \"\"\"\n",
    "    Pack independent tracking datasets into a single HDF5 file.\n",
    "    \n",
    "    File has the following structure:\n",
    "    \n",
    "    /raw\n",
    "        /positions      - raw positions from .csv\n",
    "        /events         - raw events from .csv\n",
    "        /sounds         - raw sounds from .csv\n",
    "    /processed\n",
    "        /timeline       - matrix of [time, x, y, speed] sampled at 50Hz, data is smoothed with gaussian kernel\n",
    "        /trial_idxs     - matrix of trial indices to timeline\n",
    "        /sound_idxs     - matrix of sound indices to timeline\n",
    "        \n",
    "    each dataset has an attribute 'headers' with the description of columns.\n",
    "    \"\"\"\n",
    "    params_file = [x for x in os.listdir(session_path) if x.endswith('.json')][0]\n",
    "\n",
    "    with open(os.path.join(session_path, params_file)) as json_file:\n",
    "        parameters = json.load(json_file)\n",
    "    \n",
    "    h5name = os.path.join(session_path, '%s.h5' % params_file.split('.')[0])\n",
    "    with h5py.File(h5name, 'w') as f:  # overwrite mode\n",
    "\n",
    "\n",
    "        # -------- save raw data ------------\n",
    "        raw = f.create_group('raw')\n",
    "        raw.attrs['parameters'] = json.dumps(parameters)\n",
    "\n",
    "        for ds_name in ['positions', 'events', 'sounds']:\n",
    "            filename = os.path.join(session_path, '%s.csv' % ds_name)\n",
    "            with open(filename) as ff:\n",
    "                headers = ff.readline()\n",
    "            data = np.loadtxt(filename, delimiter=',', skiprows=1)\n",
    "\n",
    "            ds = raw.create_dataset(ds_name, data=data)\n",
    "            ds.attrs['headers'] = headers\n",
    "\n",
    "\n",
    "        # -------- save processed ------------\n",
    "        proc = f.create_group('processed')\n",
    "\n",
    "        positions = np.array(f['raw']['positions'])\n",
    "\n",
    "        # TODO remove outliers - position jumps over 20cm?\n",
    "        #diffs_x = np.diff(positions[:, 1])\n",
    "        #diffs_y = np.diff(positions[:, 2])\n",
    "        #dists = np.sqrt(diffs_x**2 + diffs_y**2)\n",
    "        #np.where(dists > 0.2 / pixel_size)[0]\n",
    "\n",
    "        # convert timeline to 100 Hz\n",
    "        time_freq = 100  # at 100Hz\n",
    "        s_start, s_end = positions[:, 0][0], positions[:, 0][-1]\n",
    "        times = np.linspace(s_start, s_end, int((s_end - s_start) * time_freq))\n",
    "        pos_at_freq = np.zeros((len(times), 3))\n",
    "\n",
    "        curr_idx = 0\n",
    "        for i, t in enumerate(times):\n",
    "            if curr_idx < len(positions) - 1 and \\\n",
    "                np.abs(t - positions[:, 0][curr_idx]) > np.abs(t - positions[:, 0][curr_idx + 1]):\n",
    "                curr_idx += 1\n",
    "            pos_at_freq[i] = (t, positions[curr_idx][1], positions[curr_idx][2])\n",
    "\n",
    "        # make time from session start\n",
    "        pos_at_freq[:, 0] = pos_at_freq[:, 0] - pos_at_freq[0][0]\n",
    "\n",
    "        # convert positions from pixels to meters and center\n",
    "        #arena_d = parameters['position']['arena_diameter']\n",
    "        arena_d = 0.92\n",
    "        pixel_size = arena_d / (2 * float(parameters['position']['arena_radius']))\n",
    "        pos_at_freq[:, 1] = (parameters['position']['arena_x'] - pos_at_freq[:, 1]) * pixel_size\n",
    "        pos_at_freq[:, 2] = (parameters['position']['arena_y'] - pos_at_freq[:, 2]) * pixel_size\n",
    "\n",
    "        width = 100  # 100 points ~= 1 sec with at 100Hz\n",
    "        kernel = signal.gaussian(width, std=(width) / 7.2)\n",
    "\n",
    "        x_smooth = np.convolve(pos_at_freq[:, 1], kernel, 'same') / kernel.sum()\n",
    "        y_smooth = np.convolve(pos_at_freq[:, 2], kernel, 'same') / kernel.sum()\n",
    "\n",
    "        # speed\n",
    "        dx = np.sqrt(np.square(np.diff(x_smooth)) + np.square(np.diff(y_smooth)))\n",
    "        dt = np.diff(pos_at_freq[:, 0])\n",
    "        speed = np.concatenate([dx/dt, [dx[-1]/dt[-1]]])\n",
    "\n",
    "        timeline = proc.create_dataset('timeline', data=np.column_stack([pos_at_freq[:, 0], x_smooth, y_smooth, speed]))\n",
    "        timeline.attrs['headers'] = 'time, x, y, speed'\n",
    "\n",
    "        # save trials\n",
    "        events = np.array(f['raw']['events'])\n",
    "        events[:, 0] = events[:, 0] - s_start\n",
    "\n",
    "        t_count = len(np.unique(events[events[:, 5] != 0][:, 4]))\n",
    "        trials = np.zeros((t_count, 6))\n",
    "        for i in range(t_count):\n",
    "            t_start_idx = (np.abs(pos_at_freq[:, 0] - events[2*i][0])).argmin()\n",
    "            t_end_idx = (np.abs(pos_at_freq[:, 0] - events[2*i + 1][0])).argmin()\n",
    "            x_in_m = (parameters['position']['arena_x'] - events[2*i][1]) * pixel_size\n",
    "            y_in_m = (parameters['position']['arena_y'] - events[2*i][2]) * pixel_size\n",
    "            r_in_m = events[2*i][3] * pixel_size\n",
    "            state = 0 if events[2*i + 1][5] > 1 else 1\n",
    "\n",
    "            trials[i] = (t_start_idx, t_end_idx, x_in_m, y_in_m, r_in_m, state)\n",
    "\n",
    "        trial_idxs = proc.create_dataset('trial_idxs', data=trials)\n",
    "        trial_idxs.attrs['headers'] = 't_start_idx, t_end_idx, target_x, target_y, target_r, fail_or_success'\n",
    "\n",
    "        # save sounds\n",
    "        sounds = np.array(f['raw']['sounds'])\n",
    "        sounds[:, 0] = sounds[:, 0] - s_start\n",
    "\n",
    "        sound_idxs = np.zeros((len(sounds), 2))\n",
    "        left_idx = 0\n",
    "        delta = 10**5\n",
    "        for i in range(len(sounds)):\n",
    "            while left_idx < len(pos_at_freq) and \\\n",
    "                    np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx]) < delta:\n",
    "                delta = np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx])\n",
    "                left_idx += 1\n",
    "\n",
    "            sound_idxs[i] = (left_idx, sounds[i][1])\n",
    "            delta = 10**5\n",
    "\n",
    "        sound_idxs = proc.create_dataset('sound_idxs', data=sound_idxs)\n",
    "        sound_idxs.attrs['headers'] = 'timeline_idx, sound_id'\n",
    "        \n",
    "    return h5name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}