Просмотр исходного кода

refactored postprocessing in a separate notebook

asobolev 2 лет назад
Родитель
Сommit
6ae89c1673
3 измененных файлов с 188 добавлено и 130 удалено
  1. 24 129
      aSIT.ipynb
  2. 162 0
      postprocessing.ipynb
  3. 2 1
      requirements.txt

+ 24 - 129
aSIT.ipynb

@@ -22,7 +22,9 @@
       "pyFirmata==1.1.0\n",
       "numpy==1.18.4\n",
       "opencv-python==4.2.0.34\n",
-      "sounddevice\n"
+      "sounddevice\n",
+      "multiprocess\n",
+      "\n"
      ]
     }
    ],
@@ -36,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -63,7 +65,8 @@
     "from controllers.video import VideoWriter\n",
     "from controllers.position import PositionTracker\n",
     "from controllers.sound import SoundController\n",
-    "from controllers.serial import MCSArduino, FakeArduino, Feeder"
+    "from controllers.serial import MCSArduino, FakeArduino, Feeder\n",
+    "from postprocessing import pack"
    ]
   },
   {
@@ -464,133 +467,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 164,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os, json, h5py, time\n",
-    "import numpy as np\n",
-    "from scipy import signal\n",
-    "\n",
-    "#session_path = to_save\n",
-    "session_path = os.path.join('sessions', '2021-07-30_09-24-14')  # some particular session\n",
-    "params_file = [x for x in os.listdir(session_path) if x.endswith('.json')][0]\n",
-    "\n",
-    "with open(os.path.join(session_path, params_file)) as json_file:\n",
-    "    parameters = json.load(json_file)"
+    "#session_path = save_to\n",
+    "session_path = os.path.join('sessions', '2021-07-30_09-24-14')  # some particular session"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 258,
-   "metadata": {
-    "scrolled": true
-   },
+   "execution_count": 3,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "h5name = os.path.join(session_path, '%s.h5' % params_file.split('.')[0])\n",
-    "with h5py.File(h5name, 'w') as f:  # overwrite mode\n",
-    "    \n",
-    "    \n",
-    "    # -------- save raw data ------------\n",
-    "    raw = f.create_group('raw')\n",
-    "    raw.attrs['parameters'] = json.dumps(parameters)\n",
-    "\n",
-    "    for ds_name in ['positions', 'events', 'sounds']:\n",
-    "        filename = os.path.join(session_path, '%s.csv' % ds_name)\n",
-    "        with open(filename) as ff:\n",
-    "            headers = ff.readline()\n",
-    "        data = np.loadtxt(filename, delimiter=',', skiprows=1)\n",
-    "\n",
-    "        ds = raw.create_dataset(ds_name, data=data)\n",
-    "        ds.attrs['headers'] = headers\n",
-    "\n",
-    "        \n",
-    "    # -------- save processed ------------\n",
-    "    proc = f.create_group('processed')\n",
-    "    \n",
-    "    positions = np.array(f['raw']['positions'])\n",
-    "    \n",
-    "    # TODO remove outliers - position jumps over 20cm?\n",
-    "    #diffs_x = np.diff(positions[:, 1])\n",
-    "    #diffs_y = np.diff(positions[:, 2])\n",
-    "    #dists = np.sqrt(diffs_x**2 + diffs_y**2)\n",
-    "    #np.where(dists > 0.2 / pixel_size)[0]\n",
-    "\n",
-    "    # convert timeline to 100 Hz\n",
-    "    time_freq = 100  # at 100Hz\n",
-    "    s_start, s_end = positions[:, 0][0], positions[:, 0][-1]\n",
-    "    times = np.linspace(s_start, s_end, int((s_end - s_start) * time_freq))\n",
-    "    pos_at_freq = np.zeros((len(times), 3))\n",
-    "    \n",
-    "    curr_idx = 0\n",
-    "    for i, t in enumerate(times):\n",
-    "        if curr_idx < len(positions) - 1 and \\\n",
-    "            np.abs(t - positions[:, 0][curr_idx]) > np.abs(t - positions[:, 0][curr_idx + 1]):\n",
-    "            curr_idx += 1\n",
-    "        pos_at_freq[i] = (t, positions[curr_idx][1], positions[curr_idx][2])\n",
-    "    \n",
-    "    # make time from session start\n",
-    "    pos_at_freq[:, 0] = pos_at_freq[:, 0] - pos_at_freq[0][0]\n",
-    "    \n",
-    "    # convert positions from pixels to meters and center\n",
-    "    #arena_d = parameters['position']['arena_diameter']\n",
-    "    arena_d = 0.92\n",
-    "    pixel_size = arena_d / (2 * float(parameters['position']['arena_radius']))\n",
-    "    pos_at_freq[:, 1] = (parameters['position']['arena_x'] - pos_at_freq[:, 1]) * pixel_size\n",
-    "    pos_at_freq[:, 2] = (parameters['position']['arena_y'] - pos_at_freq[:, 2]) * pixel_size\n",
-    "\n",
-    "    width = 100  # 100 points ~= 1 sec with at 100Hz\n",
-    "    kernel = signal.gaussian(width, std=(width) / 7.2)\n",
-    "\n",
-    "    x_smooth = np.convolve(pos_at_freq[:, 1], kernel, 'same') / kernel.sum()\n",
-    "    y_smooth = np.convolve(pos_at_freq[:, 2], kernel, 'same') / kernel.sum()\n",
-    "\n",
-    "    # speed\n",
-    "    dx = np.sqrt(np.square(np.diff(x_smooth)) + np.square(np.diff(y_smooth)))\n",
-    "    dt = np.diff(pos_at_freq[:, 0])\n",
-    "    speed = np.concatenate([dx/dt, [dx[-1]/dt[-1]]])\n",
-    "\n",
-    "    proc.create_dataset('timeline', data=np.column_stack([pos_at_freq[:, 0], x_smooth, y_smooth, speed]))\n",
-    "    proc.attrs['headers'] = 'time, x, y, speed'\n",
-    "    \n",
-    "    # save trials\n",
-    "    events = np.array(f['raw']['events'])\n",
-    "    events[:, 0] = events[:, 0] - s_start\n",
-    "    \n",
-    "    t_count = len(np.unique(events[events[:, 7] != 0][:, 6]))\n",
-    "    trials = np.zeros((t_count, 6))\n",
-    "    for i in range(t_count):\n",
-    "        t_start_idx = (np.abs(pos_at_freq[:, 0] - events[2*i][0])).argmin()\n",
-    "        t_end_idx = (np.abs(pos_at_freq[:, 0] - events[2*i + 1][0])).argmin()\n",
-    "        x_in_m = (parameters['position']['arena_x'] - events[2*i][3]) * pixel_size\n",
-    "        y_in_m = (parameters['position']['arena_y'] - events[2*i][4]) * pixel_size\n",
-    "        r_in_m = events[2*i][5] * pixel_size\n",
-    "        state = 0 if events[2*i + 1][7] > 1 else 1\n",
-    "        \n",
-    "        trials[i] = (t_start_idx, t_end_idx, x_in_m, y_in_m, r_in_m, state)\n",
-    "    \n",
-    "    proc.create_dataset('trial_idxs', data=trials)\n",
-    "    proc.attrs['headers'] = 't_start_idx, t_end_idx, target_x, target_y, target_r, fail_or_success'\n",
-    "    \n",
-    "    # save sounds\n",
-    "    sounds = np.array(f['raw']['sounds'])\n",
-    "    sounds[:, 0] = sounds[:, 0] - s_start\n",
-    "    \n",
-    "    sound_idxs = np.zeros((len(sounds), 2))\n",
-    "    left_idx = 0\n",
-    "    delta = 10**5\n",
-    "    for i in range(len(sounds)):\n",
-    "        while left_idx < len(pos_at_freq) and \\\n",
-    "                np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx]) < delta:\n",
-    "            delta = np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx])\n",
-    "            left_idx += 1\n",
-    "            \n",
-    "        sound_idxs[i] = (left_idx, sounds[i][1])\n",
-    "        delta = 10**5\n",
-    "    \n",
-    "    proc.create_dataset('sound_idxs', data=sound_idxs)\n",
-    "    proc.attrs['headers'] = 'timeline_idx, sound_id'"
+    "# do pack data to HDF5\n",
+    "h5name = pack(session_path)"
    ]
   },
   {
@@ -602,16 +494,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 215,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import matplotlib.pyplot as plt"
+    "import matplotlib.pyplot as plt\n",
+    "import h5py\n",
+    "import numpy as np\n",
+    "from scipy import signal"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 264,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -624,7 +519,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 301,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -633,7 +528,7 @@
        "Text(0.5, 1.0, 'Speed')"
       ]
      },
-     "execution_count": 301,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -681,11 +576,11 @@
     "ax.grid()\n",
     "\n",
     "# trials\n",
-    "durations = tl[trials[:, 1].astype(int)][:, 0] - tl[trials[:, 0].astype(int)][:, 0]\n",
-    "colors = ['red' if x == 1 else 'grey' for x in trials[:, 5]]\n",
+    "durations = tl[trial_idxs[:, 1].astype(int)][:, 0] - tl[trial_idxs[:, 0].astype(int)][:, 0]\n",
+    "colors = ['red' if x == 1 else 'grey' for x in trial_idxs[:, 5]]\n",
     "\n",
     "ax = fig.add_subplot(223)\n",
-    "ax.barh(np.arange(len(trials)), durations, color=colors, align='center')\n",
+    "ax.barh(np.arange(len(trial_idxs)), durations, color=colors, align='center')\n",
     "ax.set_xlabel('Time, s', fontsize=14)\n",
     "ax.set_ylabel('Trial, #', fontsize=14)\n",
     "ax.set_title('Trials', fontsize=14)\n",

+ 162 - 0
postprocessing.ipynb

@@ -0,0 +1,162 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, json, h5py, time\n",
+    "import numpy as np\n",
+    "from scipy import signal"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def pack(session_path):\n",
+    "    params_file = [x for x in os.listdir(session_path) if x.endswith('.json')][0]\n",
+    "\n",
+    "    with open(os.path.join(session_path, params_file)) as json_file:\n",
+    "        parameters = json.load(json_file)\n",
+    "    \n",
+    "    h5name = os.path.join(session_path, '%s.h5' % params_file.split('.')[0])\n",
+    "    with h5py.File(h5name, 'w') as f:  # overwrite mode\n",
+    "\n",
+    "\n",
+    "        # -------- save raw data ------------\n",
+    "        raw = f.create_group('raw')\n",
+    "        raw.attrs['parameters'] = json.dumps(parameters)\n",
+    "\n",
+    "        for ds_name in ['positions', 'events', 'sounds']:\n",
+    "            filename = os.path.join(session_path, '%s.csv' % ds_name)\n",
+    "            with open(filename) as ff:\n",
+    "                headers = ff.readline()\n",
+    "            data = np.loadtxt(filename, delimiter=',', skiprows=1)\n",
+    "\n",
+    "            ds = raw.create_dataset(ds_name, data=data)\n",
+    "            ds.attrs['headers'] = headers\n",
+    "\n",
+    "\n",
+    "        # -------- save processed ------------\n",
+    "        proc = f.create_group('processed')\n",
+    "\n",
+    "        positions = np.array(f['raw']['positions'])\n",
+    "\n",
+    "        # TODO remove outliers - position jumps over 20cm?\n",
+    "        #diffs_x = np.diff(positions[:, 1])\n",
+    "        #diffs_y = np.diff(positions[:, 2])\n",
+    "        #dists = np.sqrt(diffs_x**2 + diffs_y**2)\n",
+    "        #np.where(dists > 0.2 / pixel_size)[0]\n",
+    "\n",
+    "        # convert timeline to 100 Hz\n",
+    "        time_freq = 100  # at 100Hz\n",
+    "        s_start, s_end = positions[:, 0][0], positions[:, 0][-1]\n",
+    "        times = np.linspace(s_start, s_end, int((s_end - s_start) * time_freq))\n",
+    "        pos_at_freq = np.zeros((len(times), 3))\n",
+    "\n",
+    "        curr_idx = 0\n",
+    "        for i, t in enumerate(times):\n",
+    "            if curr_idx < len(positions) - 1 and \\\n",
+    "                np.abs(t - positions[:, 0][curr_idx]) > np.abs(t - positions[:, 0][curr_idx + 1]):\n",
+    "                curr_idx += 1\n",
+    "            pos_at_freq[i] = (t, positions[curr_idx][1], positions[curr_idx][2])\n",
+    "\n",
+    "        # make time from session start\n",
+    "        pos_at_freq[:, 0] = pos_at_freq[:, 0] - pos_at_freq[0][0]\n",
+    "\n",
+    "        # convert positions from pixels to meters and center\n",
+    "        #arena_d = parameters['position']['arena_diameter']\n",
+    "        arena_d = 0.92\n",
+    "        pixel_size = arena_d / (2 * float(parameters['position']['arena_radius']))\n",
+    "        pos_at_freq[:, 1] = (parameters['position']['arena_x'] - pos_at_freq[:, 1]) * pixel_size\n",
+    "        pos_at_freq[:, 2] = (parameters['position']['arena_y'] - pos_at_freq[:, 2]) * pixel_size\n",
+    "\n",
+    "        width = 100  # 100 points ~= 1 sec with at 100Hz\n",
+    "        kernel = signal.gaussian(width, std=(width) / 7.2)\n",
+    "\n",
+    "        x_smooth = np.convolve(pos_at_freq[:, 1], kernel, 'same') / kernel.sum()\n",
+    "        y_smooth = np.convolve(pos_at_freq[:, 2], kernel, 'same') / kernel.sum()\n",
+    "\n",
+    "        # speed\n",
+    "        dx = np.sqrt(np.square(np.diff(x_smooth)) + np.square(np.diff(y_smooth)))\n",
+    "        dt = np.diff(pos_at_freq[:, 0])\n",
+    "        speed = np.concatenate([dx/dt, [dx[-1]/dt[-1]]])\n",
+    "\n",
+    "        proc.create_dataset('timeline', data=np.column_stack([pos_at_freq[:, 0], x_smooth, y_smooth, speed]))\n",
+    "        proc.attrs['headers'] = 'time, x, y, speed'\n",
+    "\n",
+    "        # save trials\n",
+    "        events = np.array(f['raw']['events'])\n",
+    "        events[:, 0] = events[:, 0] - s_start\n",
+    "\n",
+    "        t_count = len(np.unique(events[events[:, 7] != 0][:, 6]))\n",
+    "        trials = np.zeros((t_count, 6))\n",
+    "        for i in range(t_count):\n",
+    "            t_start_idx = (np.abs(pos_at_freq[:, 0] - events[2*i][0])).argmin()\n",
+    "            t_end_idx = (np.abs(pos_at_freq[:, 0] - events[2*i + 1][0])).argmin()\n",
+    "            x_in_m = (parameters['position']['arena_x'] - events[2*i][3]) * pixel_size\n",
+    "            y_in_m = (parameters['position']['arena_y'] - events[2*i][4]) * pixel_size\n",
+    "            r_in_m = events[2*i][5] * pixel_size\n",
+    "            state = 0 if events[2*i + 1][7] > 1 else 1\n",
+    "\n",
+    "            trials[i] = (t_start_idx, t_end_idx, x_in_m, y_in_m, r_in_m, state)\n",
+    "\n",
+    "        proc.create_dataset('trial_idxs', data=trials)\n",
+    "        proc.attrs['headers'] = 't_start_idx, t_end_idx, target_x, target_y, target_r, fail_or_success'\n",
+    "\n",
+    "        # save sounds\n",
+    "        sounds = np.array(f['raw']['sounds'])\n",
+    "        sounds[:, 0] = sounds[:, 0] - s_start\n",
+    "\n",
+    "        sound_idxs = np.zeros((len(sounds), 2))\n",
+    "        left_idx = 0\n",
+    "        delta = 10**5\n",
+    "        for i in range(len(sounds)):\n",
+    "            while left_idx < len(pos_at_freq) and \\\n",
+    "                    np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx]) < delta:\n",
+    "                delta = np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx])\n",
+    "                left_idx += 1\n",
+    "\n",
+    "            sound_idxs[i] = (left_idx, sounds[i][1])\n",
+    "            delta = 10**5\n",
+    "\n",
+    "        proc.create_dataset('sound_idxs', data=sound_idxs)\n",
+    "        proc.attrs['headers'] = 'timeline_idx, sound_id'\n",
+    "        \n",
+    "    return h5name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

+ 2 - 1
requirements.txt

@@ -2,4 +2,5 @@ jupyter==1.0.0
 pyFirmata==1.1.0
 numpy==1.18.4
 opencv-python==4.2.0.34
-sounddevice
+sounddevice
+multiprocess