Browse Source

Update analysis scripts and re-extracted adult call bandwidth data

Michael Schutte 3 years ago
parent
commit
28564d5838

File diff suppressed because it is too large
+ 22319 - 22319
vocalisations/adult_calls.csv


+ 69 - 0
vocalisations/scripts/analysis.R

@@ -0,0 +1,69 @@
+library(tidyverse)
+library(lubridate)
+library(nlme)
+
+birthdates <- tribble(~calling_bat, ~calling_bat_birthdate,
+                      "b2", "2017-01-26",
+                      "b4", "2017-01-30",
+                      "b7", "2017-02-08",
+                      "b3", "2017-01-29",
+                      "b5", "2017-02-02",
+                      "b8", "2017-02-08")
+birthdates <- birthdates %>%
+    mutate(calling_bat_birthdate=ymd(calling_bat_birthdate))
+
+load_pup_data <- function(filename) {
+    data <- read_csv(filename, na="--",
+                     col_types=cols(.default="?", session_id="f",
+                                    calling_bat="f", calling_bat_deaf="l", before_deafening="l"))
+    data <- inner_join(data, birthdates)
+
+    # annotate with ages in weeks
+    data <- data %>%
+        mutate(age_in_weeks=
+               floor(as.numeric(as.duration(data$calling_bat_birthdate %--% data$start_time),
+                                "weeks")),
+               bandwidth=f_max - f_min) %>%
+        filter(!before_deafening)
+
+    data
+}
+
+load_pup_activity_data <- function(filename) {
+    data <- read_csv(filename,
+                     col_types=cols(.default="?", session_id="f",
+                                    calling_bat="f", calling_bat_deaf="l", before_deafening="l")) %>%
+        rename(age_in_weeks=calling_bat_age_in_weeks)
+
+    data
+}
+
+pup_data <- load_pup_data("../output/pup_calls_for_model.csv")
+pup_activity_data <- load_pup_activity_data("../output/pup_activity_for_model.csv")
+
+fit_pup_model <- function(data, response) {
+    frm <- as.formula(paste(response, "~ calling_bat_deaf*age_in_weeks"))
+    lme(frm, random=~1 + age_in_weeks | calling_bat,
+        data=data, control=lmeControl(opt="optim"))
+}
+
+anova_p <- function(model) {
+    result <- as.data.frame(anova(model))
+    as.data.frame.list(c(p_group=result[["calling_bat_deaf", "p-value"]],
+			 p_age=result[["age_in_weeks", "p-value"]],
+			 p_interaction=result[["calling_bat_deaf:age_in_weeks", "p-value"]]))
+}
+
+parameters <- c("call_rms", "call_duration", "f_min", "f_max", "bandwidth",
+                "f0_mean", "f0_min", "f0_max", "f0_start", "f0_end", "f0_slope",
+                "spectral_centroid", "mean_aperiodicity")
+
+pup_p <- bind_rows(lapply(parameters,
+                            function(parameter)
+                                anova_p(fit_pup_model(pup_data, parameter))),
+                   anova_p(fit_pup_model(pup_activity_data, "calls_per_10s"))) %>%
+    mutate(p_group_adjusted=p.adjust(p_group, "BH"),
+	   p_age_adjusted=p.adjust(p_age, "BH"),
+	   p_interaction_adjusted=p.adjust(p_age, "BH"),
+           parameter=c(parameters, "calls_per_10s"))
+write_csv(pup_p, "../output/pup_model_results.csv")

+ 0 - 714
vocalisations/scripts/figures.ipynb

@@ -1,714 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%matplotlib inline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import IPython.display as ipd\n",
-    "import sys\n",
-    "import io\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import matplotlib as mpl\n",
-    "import matplotlib.pyplot as plt\n",
-    "import scipy.stats as sps"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Global information\n",
-    "==="
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mpl.rcParams[\"font.family\"] = \"TeX Gyre Pagella\"\n",
-    "mpl.rcParams[\"font.size\"] = 8\n",
-    "rst_column_width = 3.3\n",
-    "rst_total_width = 7"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "deaf_pups = {\"b3\", \"b5\", \"b8\"}\n",
-    "hearing_pups = {\"b2\", \"b4\", \"b7\"}\n",
-    "all_pups = deaf_pups.union(hearing_pups)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "group_colors = dict(hearing=\"#7bb992\", deaf=\"#5f0f00\")\n",
-    "bat_colors = {**{bat: group_colors[\"deaf\"] for bat in deaf_pups},\n",
-    "              **{bat: group_colors[\"hearing\"] for bat in hearing_pups}}\n",
-    "bat_markers = dict(zip(sorted(all_pups), [\"P\", \"o\", \"*\", \"s\", \"X\", \"D\"]))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load data for juveniles\n",
-    "==="
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pup_sessions = pd.read_csv(\"../pup_sessions.csv\", index_col=0, parse_dates=[\"start_time\"],\n",
-    "                           dtype=dict(before_deafening=np.bool))\n",
-    "pup_calls = pd.read_csv(\"../pup_calls.csv\", index_col=[0, 1], parse_dates=[\"start_time\"], na_values=[\"--\"],\n",
-    "                        dtype=dict(calling_bat_deaf=np.bool, calling_bat_mother=np.bool, before_deafening=np.bool))\n",
-    "\n",
-    "# filter out mother vocalisations\n",
-    "pup_calls = pup_calls[~pup_calls[\"calling_bat_mother\"]].reset_index()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "bat_birthdates = pd.DataFrame([(\"b2\", \"2017/01/26\"),\n",
-    "                               (\"b4\", \"2017/01/30\"),\n",
-    "                               (\"b7\", \"2017/02/08\"),\n",
-    "                               (\"b3\", \"2017/01/29\"),\n",
-    "                               (\"b5\", \"2017/02/02\"),\n",
-    "                               (\"b8\", \"2017/02/08\")], columns=[\"bat\", \"birthdate\"])\n",
-    "bat_birthdates[\"birthdate\"] = pd.to_datetime(bat_birthdates[\"birthdate\"])\n",
-    "bat_birthdates.set_index(\"bat\", inplace=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# determine ages (in full days) of the calling bats\n",
-    "\n",
-    "birth_dates = bat_birthdates.loc[pup_calls[\"calling_bat\"]][\"birthdate\"].reset_index(drop=True)\n",
-    "ages = pup_calls[\"start_time\"] - birth_dates\n",
-    "pup_calls[\"calling_bat_age_in_days\"] = ages.dt.days\n",
-    "pup_calls[\"calling_bat_age_in_weeks\"] = ages.dt.days // 7\n",
-    "pup_calls[\"call_rms\"] = pup_calls[\"call_rms\"] + 55"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Load data for adults\n",
-    "==="
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def single_value(values):\n",
-    "    value_set = set(values)\n",
-    "    if len(value_set) != 1:\n",
-    "        raise ValueError(\"non-unity set\")\n",
-    "    return next(iter(value_set))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adult_calls = pd.read_csv(\"../adult_calls.csv\",\n",
-    "                          parse_dates=[\"start_time\"],\n",
-    "                          na_values=[\"--\"],\n",
-    "                          dtype=dict(calling_bat_deaf=np.bool, session_id=np.int),\n",
-    "                          index_col=[\"session_id\", \"call_id\"])\n",
-    "adult_calls[\"recording_day\"] = adult_calls[\"start_time\"].dt.date\n",
-    "adult_calls[\"call_rms\"] -= adult_calls[\"call_rms\"].min()\n",
-    "adult_calls[\"group\"] = np.where(adult_calls[\"calling_bat_deaf\"], \"deaf\", \"hearing\")\n",
-    "\n",
-    "adult_sessions = pd.read_csv(\"../adult_sessions.csv\",\n",
-    "                             parse_dates=[\"start_time\"],\n",
-    "                             dtype=dict(group=\"category\"),\n",
-    "                             index_col=0)\n",
-    "\n",
-    "adult_recording_days = adult_sessions[[\"group\"]].groupby(adult_sessions[\"start_time\"].dt.date).agg(single_value)\n",
-    "adult_recording_days.index.name = \"recording_day\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Data to be dropped\n",
-    "==="
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Possible echolocation calls (3 ms or shorter)\n",
-    "---"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pup_calls = pup_calls[pup_calls[\"call_duration\"] > 3e-3]\n",
-    "adult_calls = adult_calls[adult_calls[\"call_duration\"] > 3e-3]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Sessions shorter than 20 minutes\n",
-    "---"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "session_gaps = pup_sessions.sort_values(\"start_time\")[\"start_time\"].diff()\n",
-    "new_index = session_gaps.index.insert(0, -1)\n",
-    "new_index = new_index[:-1]\n",
-    "session_gaps.index = new_index\n",
-    "session_gaps.drop(-1, inplace=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ids_to_drop = session_gaps[session_gaps < pd.Timedelta(20, \"m\")].index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pup_calls = pup_calls[~pup_calls[\"session_id\"].isin(ids_to_drop)]\n",
-    "pup_sessions = pup_sessions.drop(ids_to_drop)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Last two recordings with mother for pups that have more than others\n",
-    "---"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sorted_pup_mother_sessions = pup_sessions[pup_sessions[\"animal2\"].str.startswith(\"m\")].sort_values(\"start_time\")\n",
-    "session_counts_with_mothers = sorted_pup_mother_sessions[\"animal1\"].value_counts()\n",
-    "num_sessions_to_drop = session_counts_with_mothers - session_counts_with_mothers.min()\n",
-    "\n",
-    "ids_to_drop = set()\n",
-    "for bat, drop_count in num_sessions_to_drop.iteritems():\n",
-    "    if drop_count == 0:\n",
-    "        continue\n",
-    "    this_bat_sessions = sorted_pup_mother_sessions[sorted_pup_mother_sessions[\"animal1\"] == bat]\n",
-    "    ids_to_drop = ids_to_drop.union(this_bat_sessions.tail(drop_count).index)\n",
-    "\n",
-    "pup_calls = pup_calls[~pup_calls[\"session_id\"].isin(ids_to_drop)]\n",
-    "pup_sessions = pup_sessions.drop(index=ids_to_drop)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Compute activity statistics (separately for pups before and after deafening)\n",
-    "==="
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "calls_per_pup_and_session = pup_calls.reset_index().groupby([\"calling_bat\", \"session_id\"])[[\"calling_bat\"]].count() / (20*6) # per 10 seconds\n",
-    "calls_per_pup_and_session.columns = [\"calls_per_10s\"]\n",
-    "calls_per_pup_and_session.reset_index(inplace=True)\n",
-    "\n",
-    "session_dates = pup_sessions.loc[calls_per_pup_and_session[\"session_id\"]][\"start_time\"]\n",
-    "birth_dates = bat_birthdates.loc[calls_per_pup_and_session[\"calling_bat\"]][\"birthdate\"]\n",
-    "calls_per_pup_and_session[\"calling_bat_age_in_days\"] = \\\n",
-    "    (session_dates.reset_index(drop=True) - birth_dates.reset_index(drop=True)).dt.days\n",
-    "calls_per_pup_and_session[\"calling_bat_age_in_weeks\"] = \\\n",
-    "    calls_per_pup_and_session[\"calling_bat_age_in_days\"] // 7\n",
-    "calls_per_pup_and_session = pd.merge(calls_per_pup_and_session, pup_sessions[[\"before_deafening\"]],\n",
-    "                                     left_on=\"session_id\", right_index=True,)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dfs = []\n",
-    "\n",
-    "for before_deafening in [False, True]:\n",
-    "    mask = pup_calls[\"before_deafening\"]\n",
-    "    if not before_deafening:\n",
-    "        mask = ~mask\n",
-    "    gr = pup_calls[mask].groupby([\"calling_bat\", \"calling_bat_age_in_weeks\"])\n",
-    "    gr = gr[[\"call_duration\", \"mean_aperiodicity\", \"f0_mean\", \"call_rms\"]]\n",
-    "    params_per_pup_and_week = gr.median()\n",
-    "    params_per_pup_and_week[\"calls_this_week\"] = gr.size()\n",
-    "    \n",
-    "    mask = calls_per_pup_and_session[\"before_deafening\"]\n",
-    "    if not before_deafening:\n",
-    "        mask = ~mask\n",
-    "    gr = calls_per_pup_and_session[mask].groupby([\"calling_bat\", \"calling_bat_age_in_weeks\"])\n",
-    "    params_per_pup_and_week[\"calls_per_10s\"] = gr[\"calls_per_10s\"].mean()\n",
-    "    params_per_pup_and_week.reset_index(inplace=True)\n",
-    "    params_per_pup_and_week[\"before_deafening\"] = before_deafening\n",
-    "    \n",
-    "    dfs.append(params_per_pup_and_week)\n",
-    "    \n",
-    "params_per_pup_and_week = pd.concat(dfs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gr = adult_calls.groupby([\"group\", \"session_id\"], observed=True)\n",
-    "calls_per_adult_group_and_session = gr.size().to_frame(name=\"calls_per_10s\").reset_index()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Quantities of extracted data\n",
-    "==="
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"Total calls by pups: {}\\nCalls by deaf pups: {} ({:.1f} %)\\nCalls by hearing pups: {} ({:.1f} %)\".format(\n",
-    "        len(pup_calls),\n",
-    "        np.sum(pup_calls[\"calling_bat_deaf\"]),\n",
-    "        np.sum(pup_calls[\"calling_bat_deaf\"]) / len(pup_calls) * 100,\n",
-    "        np.sum(~pup_calls[\"calling_bat_deaf\"]),\n",
-    "        np.sum(~pup_calls[\"calling_bat_deaf\"] / len(pup_calls) * 100)))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"Total calls by adults: {}\\nCalls by deaf adults: {} ({:.1f} %)\\nCalls by hearing adults: {} ({:.1f} %)\".format(\n",
-    "        len(adult_calls),\n",
-    "        np.sum(adult_calls[\"calling_bat_deaf\"]),\n",
-    "        np.sum(adult_calls[\"calling_bat_deaf\"]) / len(adult_calls) * 100,\n",
-    "        np.sum(~adult_calls[\"calling_bat_deaf\"]),\n",
-    "        np.sum(~adult_calls[\"calling_bat_deaf\"] / len(adult_calls) * 100)))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Main figure\n",
-    "==="
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_trajectories(ax, values, parameter, scale):\n",
-    "    line_artists = {}\n",
-    "    for bat, df in values.groupby(\"calling_bat\"):\n",
-    "        bat_color, bat_marker = bat_colors[bat], bat_markers[bat]\n",
-    "        line_artist, = ax.plot(df[\"calling_bat_age_in_weeks\"] + 1, df[parameter] * scale,\n",
-    "                               c=bat_color, linewidth=0.8)\n",
-    "        ax.plot(df[\"calling_bat_age_in_weeks\"] + 1, df[parameter] * scale,\n",
-    "                c=bat_color, marker=bat_marker, markersize=5, clip_on=False,\n",
-    "                markerfacecolor=\"none\", linestyle=\"\")\n",
-    "        line_artists[bat] = line_artist\n",
-    "    return line_artists"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_histograms(ax, values, parameter, scale, y_range):\n",
-    "    data = {}\n",
-    "\n",
-    "    if \"group\" in values:\n",
-    "        for group, hearing in [(\"deaf\", False), (\"hearing\", True)]:\n",
-    "            data[hearing] = (values.loc[values[\"group\"] == group, parameter] * scale).values\n",
-    "    else:\n",
-    "        for hearing, df in values.groupby(values[\"calling_bat\"].isin(hearing_pups)):\n",
-    "            data[hearing] = (df[parameter] * scale).values\n",
-    "\n",
-    "    ax.hist([data[True], data[False]], density=True,\n",
-    "            range=y_range, bins=15,\n",
-    "            color=[group_colors[\"hearing\"], group_colors[\"deaf\"]],\n",
-    "            orientation=\"horizontal\", rwidth=0.85)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def plot_boxplots(ax, values, parameter, scale, test_significance=False):\n",
-    "    data = []\n",
-    "    for group in [\"deaf\", \"hearing\"]:\n",
-    "        data.append(scale * values.loc[values[\"group\"] == group, parameter].dropna().values)\n",
-    "\n",
-    "    artists = boxplot_ax.boxplot(data, vert=True, manage_ticks=False, showfliers=False, widths=0.75,\n",
-    "                                 patch_artist=True,\n",
-    "                                 boxprops=dict(linewidth=0.5),\n",
-    "                                 whiskerprops=dict(linewidth=0.5),\n",
-    "                                 medianprops=dict(linewidth=1, color=\"bisque\"),\n",
-    "                                 capprops=dict(linewidth=0.5))\n",
-    "\n",
-    "    artists[\"boxes\"][0].set_facecolor(group_colors[\"deaf\"])\n",
-    "    artists[\"boxes\"][1].set_facecolor(group_colors[\"hearing\"])\n",
-    "    \n",
-    "    if test_significance:\n",
-    "        p = sps.mannwhitneyu(data[0], data[1], alternative=\"two-sided\").pvalue\n",
-    "        if p < 0.05:\n",
-    "            ax.annotate(\"*\", (0.8, 0.8), xycoords=\"axes fraction\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "call_parameters = [(\"calls_per_10s\", 1, \"Vocal activity\\n[calls/10 s]\", (0, 100), 20),\n",
-    "                   (\"call_rms\", 1, \"Amplitude\\n[dB]\", (0, 54), 12),\n",
-    "                   (\"call_duration\", 1e3, \"Duration\\n[ms]\", (0, 80), 20),\n",
-    "                   (\"f0_mean\", 1e-3, \"Fundamental\\nfrequency [kHz]\", (0, 40), 10),\n",
-    "                   (\"mean_aperiodicity\", 1, \"Aperiodicity\\n[1]\", (0, 1), 0.25)]\n",
-    "\n",
-    "fig_width = rst_total_width\n",
-    "fig_height = 0.7 * fig_width\n",
-    "left_margin, right_margin = 0.75, 0.1\n",
-    "bottom_margin, top_margin = 0.45, 0.45\n",
-    "spacing = 0.2\n",
-    "boxplot_extra_spacing = 0.1\n",
-    "\n",
-    "row_height = (fig_height - bottom_margin - top_margin - (len(call_parameters) - 1) * spacing) / len(call_parameters)\n",
-    "trajectory_width = 3.2\n",
-    "boxplot_width = 0.2\n",
-    "histogram_width = (fig_width - left_margin - right_margin - trajectory_width - boxplot_width - 4 * spacing - boxplot_extra_spacing) / 3\n",
-    "\n",
-    "fig = plt.figure(figsize=(fig_width, fig_height))\n",
-    "\n",
-    "roman_numerals = [\"I\", \"II\", \"III\", \"IV\", \"V\"]\n",
-    "\n",
-    "for i, (parameter, scale, y_label, y_range, y_tick_interval) in enumerate(reversed(call_parameters)):\n",
-    "    early_ax = \\\n",
-    "        fig.add_axes([left_margin / fig_width,\n",
-    "                      (bottom_margin + i * (row_height + spacing)) / fig_height,\n",
-    "                      histogram_width / fig_width,\n",
-    "                      row_height / fig_height])\n",
-    "    late_ax = \\\n",
-    "        fig.add_axes([(left_margin + histogram_width + spacing) / fig_width,\n",
-    "                      (bottom_margin + i * (row_height + spacing)) / fig_height,\n",
-    "                      histogram_width / fig_width,\n",
-    "                      row_height / fig_height])\n",
-    "    adult_ax = \\\n",
-    "        fig.add_axes([(left_margin + 2 * (histogram_width + spacing)) / fig_width,\n",
-    "                      (bottom_margin + i * (row_height + spacing)) / fig_height,\n",
-    "                      histogram_width / fig_width,\n",
-    "                      row_height / fig_height])\n",
-    "    trajectory_ax = \\\n",
-    "        fig.add_axes([(left_margin + 3 * (histogram_width + spacing)) / fig_width,\n",
-    "                      (bottom_margin + i * (row_height + spacing)) / fig_height,\n",
-    "                      trajectory_width / fig_width,\n",
-    "                      row_height / fig_height])\n",
-    "    boxplot_ax = \\\n",
-    "        fig.add_axes([(left_margin + 3 * histogram_width + trajectory_width + 4 * spacing + boxplot_extra_spacing) / fig_width,\n",
-    "                      (bottom_margin + i * (row_height + spacing)) / fig_height,\n",
-    "                      boxplot_width / fig_width,\n",
-    "                      row_height / fig_height])\n",
-    "    overall_ax = \\\n",
-    "        fig.add_axes([left_margin / fig_width,\n",
-    "                      (bottom_margin + i * (row_height + spacing)) / fig_height,\n",
-    "                      (fig_width - left_margin - right_margin) / fig_width,\n",
-    "                      row_height / fig_height], frameon=False)\n",
-    "    overall_ax.set_xticks([])\n",
-    "    overall_ax.set_yticks([])\n",
-    "    \n",
-    "    if parameter == \"calls_per_10s\":\n",
-    "        df = calls_per_pup_and_session\n",
-    "    else:\n",
-    "        df = pup_calls\n",
-    "    plot_histograms(early_ax,\n",
-    "                    df.query(\"before_deafening\"),\n",
-    "                    parameter, scale, y_range)\n",
-    "    plot_histograms(late_ax,\n",
-    "                    df.query(\"not before_deafening\"),\n",
-    "                    parameter, scale, y_range)\n",
-    "    \n",
-    "    if parameter == \"calls_per_10s\":\n",
-    "        df = calls_per_adult_group_and_session\n",
-    "    else:\n",
-    "        df = adult_calls\n",
-    "    plot_histograms(adult_ax, df,\n",
-    "                    parameter, scale, y_range if parameter != \"calls_per_10s\" else (0, 400))\n",
-    "    \n",
-    "    plot_trajectories(trajectory_ax,\n",
-    "                      params_per_pup_and_week.query(\"not before_deafening and calling_bat_age_in_weeks <= 24\"),\n",
-    "                      parameter, scale)\n",
-    "    \n",
-    "    trajectory_ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(4))\n",
-    "    trajectory_ax.set_xlim(2, 25)\n",
-    "    trajectory_ax.spines[\"left\"].set_position((\"outward\", 3))\n",
-    "\n",
-    "    plot_boxplots(boxplot_ax, df, parameter, scale, test_significance=True)\n",
-    "    boxplot_ax.set_xlim(0.25, 2.75)\n",
-    "    \n",
-    "    if i == 0:\n",
-    "        trajectory_ax.set_xlabel(\"Week of age\")\n",
-    "        trajectory_ax.xaxis.set_major_formatter(mpl.ticker.FuncFormatter(lambda x, pos: \"{}th\".format(int(x))))\n",
-    "        #trajectory_ax.xaxis.set_label_coords(0.5, -0.4)\n",
-    "    else:\n",
-    "        for ax in [early_ax, late_ax, trajectory_ax]:\n",
-    "            ax.set_xticklabels([])\n",
-    "            \n",
-    "    if i == len(call_parameters) - 1:\n",
-    "        early_ax.set_title(\"a)\\n\\n< 2 weeks\", fontsize=8)\n",
-    "        late_ax.set_title(\"b)\\n\\n2–25 weeks\", fontsize=8)\n",
-    "        adult_ax.set_title(\"c)\\n\\n2 years\", fontsize=8)\n",
-    "        boxplot_ax.set_title(\"e)\\n\\n2 years\", fontsize=8)\n",
-    "        trajectory_ax.set_title(\"d)\\nDevelopment trajectory of median parameter values\\nat 2–25 weeks of age\", fontsize=8)\n",
-    "        early_ax.annotate(\"hearing\",\n",
-    "                         xy=(0, 1), xycoords=\"axes fraction\",\n",
-    "                         xytext=(5, -10), textcoords=\"offset points\",\n",
-    "                         verticalalignment=\"top\", horizontalalignment=\"left\",\n",
-    "                         color=group_colors[\"hearing\"])\n",
-    "        early_ax.annotate(\"deafened\",\n",
-    "                         xy=(0, 1), xycoords=\"axes fraction\",\n",
-    "                         xytext=(5, 0), textcoords=\"offset points\",\n",
-    "                         verticalalignment=\"top\", horizontalalignment=\"left\",\n",
-    "                         color=group_colors[\"deaf\"])\n",
-    "            \n",
-    "    for ax in [early_ax, late_ax, adult_ax, trajectory_ax, boxplot_ax]:\n",
-    "        if ax is adult_ax and parameter == \"calls_per_10s\":\n",
-    "            ax.set_ylim(0, 400)\n",
-    "            ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(100))\n",
-    "        else:\n",
-    "            ax.set_ylim(y_range[0], y_range[1])\n",
-    "            ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(y_tick_interval))\n",
-    "\n",
-    "        for spine in [\"right\", \"top\"]:\n",
-    "            ax.spines[spine].set_visible(False)\n",
-    "        ax.spines[\"bottom\"].set_position((\"outward\", 3))\n",
-    "            \n",
-    "    for ax in [late_ax, adult_ax, trajectory_ax, boxplot_ax]:\n",
-    "        if (ax is not adult_ax or parameter != \"calls_per_10s\") \\\n",
-    "                and (ax is not trajectory_ax or parameter != \"calls_per_10s\"):\n",
-    "            ax.set_yticklabels([])\n",
-    "        \n",
-    "    for ax in [early_ax, late_ax, adult_ax, boxplot_ax]:\n",
-    "        ax.set_xticks([])\n",
-    "        ax.spines[\"bottom\"].set_visible(False)\n",
-    "        \n",
-    "    early_ax.set_ylabel(y_label)\n",
-    "    early_ax.yaxis.set_label_coords(-0.32 / histogram_width, 0.5)\n",
-    "    \n",
-    "    overall_ax.set_ylabel(roman_numerals[-i - 1],\n",
-    "                          rotation=\"0\",\n",
-    "                          verticalalignment=\"center\",\n",
-    "                         labelpad=49)\n",
-    "    \n",
-    "fig.savefig(\"../parameters.pdf\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Parameter table\n",
-    "==="
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "per_session_parameters = [(\"call_rms\", 1, \"Amplitude [dB]\"),\n",
-    "                          (\"call_duration\", 1e3, \"Duration [ms]\"),\n",
-    "                          (\"f0_min\", 1e-3, \"Minimum fundamental frequency [kHz]\"),\n",
-    "                          (\"f0_mean\", 1e-3, \"Mean fundamental frequency [kHz]\"),\n",
-    "                          (\"f0_max\", 1e-3, \"Maximum fundamental frequency [kHz]\"),\n",
-    "                          (\"f0_start\", 1e-3, \"Fundamental frequency at call onset [kHz]\"),\n",
-    "                          (\"f0_end\", 1e-3, \"Fundamental frequency at end of call [kHz]\"),\n",
-    "                          (\"f_min\", 1e-3, \"Minimum frequency [kHz]\"),\n",
-    "                          (\"f_max\", 1e-3, \"Maximum frequency [kHz]\"),\n",
-    "                          (\"bandwidth\", 1e-3, \"Bandwidth [kHz]\"),\n",
-    "                          (\"spectral_centroid\", 1e-3, \"Spectral centroid frequency [kHz]\"),\n",
-    "                          (\"mean_aperiodicity\", 1, \"Aperiodicity [1]\")]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "adult_calls.loc[:, \"bandwidth\"] = adult_calls[\"f_max\"] - adult_calls[\"f_min\"]\n",
-    "def q25(series):\n",
-    "    return series.quantile(0.25)\n",
-    "def q75(series):\n",
-    "    return series.quantile(0.75)\n",
-    "\n",
-    "adult_groups = adult_calls[[p[0] for p in per_session_parameters] + [\"group\"]].groupby(\"group\")\n",
-    "adult_summary = adult_groups.agg([q25, \"median\", q75])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "per_session_parameters.insert(0, (\"calls_per_10s\", 1, \"Vocal activity [calls/10 s]\"))\n",
-    "adult_activity_summary = calls_per_adult_group_and_session.groupby(\"group\")[[\"calls_per_10s\"]].agg([q25, \"median\", q75])\n",
-    "adult_summary = pd.concat([adult_activity_summary, adult_summary], axis=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fout = io.StringIO()\n",
-    "print(\"<!DOCTYPE html><body><table>\", file=fout)\n",
-    "print(\"<tr><th></th><th colspan='3'>Hearing (N={})</th><th colspan='3'>Deaf (N={})</th></tr>\".format(*adult_calls[\"group\"].value_counts().loc[[\"hearing\", \"deaf\"]]), file=fout)\n",
-    "print(\"<tr><th></th>{}<th>p &lt; 0.05 (Mann–Whitney)</th></tr>\".format(\"<th>Q25</th><th>Q50</th><th>Q75</th>\" * 2), file=fout)\n",
-    "\n",
-    "for parameter_name, scale, parameter_description in per_session_parameters:\n",
-    "    print(\"<tr><th>{}</th>\".format(parameter_description), file=fout, end=\"\")\n",
-    "    test_data = []\n",
-    "    for group in [\"hearing\", \"deaf\"]:\n",
-    "        for statistic in [\"q25\", \"median\", \"q75\"]:\n",
-    "            print(\"<td>{:.2f}</td>\".format(adult_summary.loc[group, (parameter_name, statistic)] * scale), file=fout, end=\"\")\n",
-    "        if parameter_name != \"calls_per_10s\":\n",
-    "            test_data.append(adult_calls.loc[adult_calls[\"group\"] == group, parameter_name].values)\n",
-    "    if not test_data:\n",
-    "        print(\"<td>(see plot)</td>\", file=fout)\n",
-    "    else:\n",
-    "        p = sps.mannwhitneyu(test_data[0], test_data[1], alternative=\"two-sided\").pvalue\n",
-    "        if p < 0.05:\n",
-    "            print(\"<td>*</td>\".format(p), file=fout)\n",
-    "        else:\n",
-    "            print(\"<td>{:.2f}</td>\".format(p), file=fout)\n",
-    "    print(\"</tr>\", file=fout)\n",
-    "print(\"</table></body>\", file=fout)\n",
-    "\n",
-    "parameters_table = fout.getvalue()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ipd.display(ipd.HTML(parameters_table))\n",
-    "with open(\"../parameters_table.html\", \"wt\") as fout:\n",
-    "    print(parameters_table, file=fout, end=\"\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

+ 584 - 0
vocalisations/scripts/make_figures.py

@@ -0,0 +1,584 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+import sys
+import os
+import io
+import subprocess
+
+import numpy as np
+import pandas as pd
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import scipy.stats as sps
+import scipy.io.wavfile as siowav
+import scipy.signal as sps
+
+os.makedirs("../output", exist_ok=True)
+
+###
+# Global plot settings
+
+mpl.rcParams["font.family"] = "TeX Gyre Pagella"
+mpl.rcParams["font.size"] = 8
+rst_column_width = 3.3
+rst_total_width = 7
+
+###
+# Animals
+
+ordered_deaf_pups = ["b3", "b5", "b8"]
+deaf_pups = set(ordered_deaf_pups)
+ordered_hearing_pups = ["b2", "b4", "b7"]
+hearing_pups = set(ordered_hearing_pups)
+all_pups = deaf_pups.union(hearing_pups)
+
+bat_birthdates = pd.DataFrame([("b2", "2017/01/26"),
+                               ("b4", "2017/01/30"),
+                               ("b7", "2017/02/08"),
+                               ("b3", "2017/01/29"),
+                               ("b5", "2017/02/02"),
+                               ("b8", "2017/02/08")], columns=["bat", "birthdate"])
+bat_birthdates["birthdate"] = pd.to_datetime(bat_birthdates["birthdate"])
+bat_birthdates.set_index("bat", inplace=True)
+
+###
+# Group colors and identity markers
+
+group_colors = dict(hearing="#e1be6a", deaf="#40b0a6")
+bat_colors = {**{bat: group_colors["deaf"] for bat in deaf_pups},
+              **{bat: group_colors["hearing"] for bat in hearing_pups}}
+bat_colors["b4"] = "#9b803e"
+bat_markers = dict(zip(sorted(all_pups), ["P", "o", "*", "s", "X", "D"]))
+
+
+###
+# Load data for juveniles
+
+pup_sessions = pd.read_csv("../pup_sessions.csv",
+        index_col=0, parse_dates=["start_time"])
+pup_calls = pd.read_csv("../pup_calls.csv",
+        index_col=[0, 1], parse_dates=["start_time"], na_values=["--"])
+# filter out mothers
+pup_calls = pup_calls[pup_calls["calling_bat"].isin(all_pups)].reset_index()
+
+# determine ages (in full days) of the calling bats
+birth_dates = bat_birthdates.loc[pup_calls["calling_bat"]]["birthdate"].reset_index(drop=True)
+ages = pup_calls["start_time"] - birth_dates
+pup_calls["calling_bat_age_in_days"] = ages.dt.days
+pup_calls["calling_bat_age_in_weeks"] = ages.dt.days // 7
+pup_calls["call_rms"] = pup_calls["call_rms"] + 55
+pup_calls["calling_bat_deaf"] = pup_calls["calling_bat_deaf"] != 0
+
+# exclude sessions without mothers
+pup_calls = pup_calls[~pup_calls["other_bat"].isin(all_pups)]
+pup_sessions = pup_sessions[~pup_sessions["animal2"].isin(all_pups)]
+
+###
+# Load data for adults
+
+def single_value(values):
+    value_set = set(values)
+    if len(value_set) != 1:
+        raise ValueError("non-unity set")
+    return next(iter(value_set))
+
+adult_calls = pd.read_csv("../adult_calls.csv",
+        parse_dates=["start_time"],
+        na_values=["--"],
+        dtype=dict(group="category", session_id="int"),
+        index_col=["session_id", "call_id"])
+adult_calls["recording_day"] = adult_calls["start_time"].dt.date
+adult_calls["call_rms"] -= adult_calls["call_rms"].min()
+
+adult_sessions = pd.read_csv("../adult_sessions.csv",
+        parse_dates=["start_time"],
+        dtype=dict(group="category"),
+        index_col=0)
+
+adult_recording_days = adult_sessions[["group"]] \
+    .groupby(adult_sessions["start_time"].dt.date) \
+    .agg(single_value)
+adult_recording_days.index.name = "recording_day"
+
+
+###
+# Data that is excluded from the analysis
+
+# 1. Possible echolocation calls (3 ms or shorter)
+
+pup_calls = pup_calls[pup_calls["call_duration"] > 3e-3]
+adult_calls = adult_calls[adult_calls["call_duration"] > 3e-3]
+
+# 2. Sessions that are shorter than 20 minutes
+
+session_gaps = pup_sessions.sort_values("start_time")["start_time"].diff()
+new_index = session_gaps.index.insert(0, -1)
+new_index = new_index[:-1]
+session_gaps.index = new_index
+session_gaps.drop(-1, inplace=True)
+
+ids_to_drop = session_gaps[session_gaps < pd.Timedelta(20, "m")].index
+
+pup_calls = pup_calls[~pup_calls["session_id"].isin(ids_to_drop)]
+pup_sessions = pup_sessions.drop(ids_to_drop)
+
+###
+# Last two recordings with mother for pups that have more than others
+# (to ensure the same total recording time for each individual)
+
+sorted_pup_mother_sessions = pup_sessions[pup_sessions["animal2"].str.startswith("m")] \
+        .sort_values("start_time")
+session_counts_with_mothers = sorted_pup_mother_sessions["animal1"].value_counts()
+num_sessions_to_drop = session_counts_with_mothers - session_counts_with_mothers.min()
+
+ids_to_drop = set()
+for bat, drop_count in num_sessions_to_drop.iteritems():
+    if drop_count == 0:
+        continue
+    this_bat_sessions = sorted_pup_mother_sessions[sorted_pup_mother_sessions["animal1"] == bat]
+    ids_to_drop = ids_to_drop.union(this_bat_sessions.tail(drop_count).index)
+
+pup_calls = pup_calls[~pup_calls["session_id"].isin(ids_to_drop)]
+pup_sessions = pup_sessions.drop(index=ids_to_drop)
+
+###
+# Mark pre-deafening calls and sessions
+
+sorted_pup_mother_sessions = pup_sessions[pup_sessions["animal2"].str.startswith("m")] \
+        .sort_values("start_time")
+pup_calls["before_deafening"] = False
+pup_sessions["before_deafening"] = False
+for pup, sessions in sorted_pup_mother_sessions.groupby("animal1"):
+    first_id = sessions.index[0]
+    pup_sessions.loc[first_id, "before_deafening"] = True
+    pup_calls.loc[pup_calls["session_id"] == first_id, "before_deafening"] = True
+
+###
+# Compute activity statistics (separately for pups before and after deafening)
+
+calls_per_pup_and_session = pup_calls.reset_index() \
+        .groupby(["calling_bat", "session_id"])[["calling_bat"]].count() \
+        / (20*6)    # per 10 seconds
+calls_per_pup_and_session.columns = ["calls_per_10s"]
+calls_per_pup_and_session.reset_index(inplace=True)
+
+session_dates = pup_sessions.loc[calls_per_pup_and_session["session_id"]]["start_time"]
+birth_dates = bat_birthdates.loc[calls_per_pup_and_session["calling_bat"]]["birthdate"]
+calls_per_pup_and_session["calling_bat_age_in_days"] = \
+        (session_dates.reset_index(drop=True) - birth_dates.reset_index(drop=True)).dt.days
+calls_per_pup_and_session["calling_bat_age_in_weeks"] = \
+        calls_per_pup_and_session["calling_bat_age_in_days"] // 7
+calls_per_pup_and_session = \
+        pd.merge(calls_per_pup_and_session, pup_sessions[["before_deafening"]],
+                 left_on="session_id", right_index=True,)
+calls_per_pup_and_session.loc[:, "calling_bat_deaf"] = \
+        calls_per_pup_and_session["calling_bat"].isin(deaf_pups)
+
+dfs = []
+
+for before_deafening in [False, True]:
+    mask = pup_calls["before_deafening"]
+    if not before_deafening:
+        mask = ~mask
+    gr = pup_calls[mask].groupby(["calling_bat", "calling_bat_age_in_weeks"])
+    gr = gr[["call_duration", "mean_aperiodicity", "f0_mean", "call_rms"]]
+    params_per_pup_and_week = gr.median()
+    params_per_pup_and_week["calls_this_week"] = gr.size()
+
+    mask = calls_per_pup_and_session["before_deafening"]
+    if not before_deafening:
+        mask = ~mask
+    gr = calls_per_pup_and_session[mask].groupby(["calling_bat", "calling_bat_age_in_weeks"])
+    params_per_pup_and_week["calls_per_10s"] = gr["calls_per_10s"].mean()
+    params_per_pup_and_week.reset_index(inplace=True)
+    params_per_pup_and_week["before_deafening"] = before_deafening
+
+    dfs.append(params_per_pup_and_week)
+
+params_per_pup_and_week = pd.concat(dfs)
+
+gr = adult_calls.groupby(["group", "session_id"], observed=True)
+calls_per_adult_group_and_session = gr.size().to_frame(name="calls_per_10s").reset_index()
+
+###
+# Quantities of extracted data
+
+print("Total calls by pups: {}\nCalls by deaf pups: {} ({:.1f} %)\nCalls by hearing pups: {} ({:.1f} %)".format(
+        len(pup_calls),
+        np.sum(pup_calls["calling_bat_deaf"]),
+        np.sum(pup_calls["calling_bat_deaf"]) / len(pup_calls) * 100,
+        np.sum(~pup_calls["calling_bat_deaf"]),
+        np.sum(~pup_calls["calling_bat_deaf"] / len(pup_calls) * 100)))
+
+print("Total calls by adults: {}\nCalls by deaf adults: {} ({:.1f} %)\nCalls by hearing adults: {} ({:.1f} %)".format(
+        len(adult_calls),
+        np.sum(adult_calls["group"] == "deaf"),
+        np.sum(adult_calls["group"] == "deaf") / len(adult_calls) * 100,
+        np.sum(adult_calls["group"] == "hearing"),
+        np.sum(adult_calls["group"] == "hearing") / len(adult_calls) * 100))
+
+###
+# Run the statistical evaluation
+
+pup_calls.to_csv("../output/pup_calls_for_model.csv", index=False)
+calls_per_pup_and_session.to_csv("../output/pup_activity_for_model.csv", index=False)
+
+subprocess.run(["Rscript", "analysis.R"])
+
+###
+# Load the p-value table generated by R
+
+pup_ps = pd.read_csv("../output/pup_model_results.csv", index_col="parameter")
+
+###
+# Development trajectory of call parameters
+
+def plot_trajectories(ax, values, parameter, scale):
+    line_artists = {}
+    for bat, df in values.groupby("calling_bat"):
+        bat_color, bat_marker = bat_colors[bat], bat_markers[bat]
+        line_artist, = ax.plot(df["calling_bat_age_in_weeks"] + 1, df[parameter] * scale,
+                               c=bat_color, linewidth=0.8)
+        ax.plot(df["calling_bat_age_in_weeks"] + 1, df[parameter] * scale,
+                c=bat_color, marker=bat_marker, markersize=5, clip_on=False,
+                markerfacecolor="none", linestyle="")
+        line_artists[bat] = line_artist
+    return line_artists
+
+def plot_histograms(ax, values, parameter, scale, y_range,
+                    p_values=None):
+    data = {}
+
+    if "group" in values:
+        for group, hearing in [("deaf", False), ("hearing", True)]:
+            data[hearing] = (values.loc[values["group"] == group, parameter] * scale).values
+    else:
+        for hearing, df in values.groupby(values["calling_bat"].isin(hearing_pups)):
+            data[hearing] = (df[parameter] * scale).values
+
+    ax.hist([data[True], data[False]], density=True,
+            range=y_range, bins=15,
+            color=[group_colors["hearing"], group_colors["deaf"]],
+            orientation="horizontal", rwidth=0.85)
+
+    if p_values is not None:
+        p = p_values.loc[parameter, "p_group_adjusted"]
+        if p < 0.05:
+            ax.annotate("*", (0.5, 0.6),
+                        xycoords="axes fraction",
+                        ha="center")
+
+def plot_individual_boxplots(ax, values, parameter, scale,
+                             annotate=False, p_values=None):
+    data = []
+    bat_groups = []
+    bats = []
+    for group, individuals in [("deaf", ordered_deaf_pups),
+                               ("hearing", ordered_hearing_pups)]:
+        for bat in individuals:
+            data.append(scale * values.loc[values["calling_bat"] == bat, parameter].dropna().values)
+            bats.append(bat)
+            bat_groups.append(group)
+
+    artists = ax.boxplot(data, vert=True, manage_ticks=False, showfliers=False,
+                         widths=0.2, positions=np.arange(len(data)) * 0.3,
+                         patch_artist=True,
+                         boxprops=dict(linewidth=0.5),
+                         whiskerprops=dict(linewidth=0.5),
+                         medianprops=dict(linewidth=1, color="#8c1010"),
+                         capprops=dict(linewidth=0.5))
+
+    for artist, bat in zip(artists["boxes"], bats):
+        artist.set_facecolor(bat_colors[bat])
+
+    if annotate:
+        for i, (bat_group, bat) in enumerate(zip(bat_groups, bats)):
+            xy_top = artists["whiskers"][2 * i + 1].get_xydata()[1]
+            ax.plot(xy_top[0], xy_top[1] + 10, marker=bat_markers[bat],
+                       c=bat_colors[bat], markerfacecolor="none",
+                       markersize=3, markeredgewidth=0.5)
+
+    if p_values is not None:
+        p = p_values.loc[parameter, "p_group_adjusted"]
+        if p < 0.05:
+            ax.annotate("*", (0.5, 0.9),
+                        xycoords="axes fraction",
+                        ha="center")
+
+
+# In[35]:
+
+
+def plot_pooled_boxplots(ax, values, parameter, scale):
+    data = []
+    for group in ["deaf", "hearing"]:
+        data.append(scale * values.loc[values["group"] == group, parameter].dropna().values)
+
+    artists = ax.boxplot(data, vert=True, manage_ticks=False, showfliers=False,
+                         widths=0.75, positions=np.arange(len(data)),
+                         patch_artist=True,
+                         boxprops=dict(linewidth=0.5),
+                         whiskerprops=dict(linewidth=0.5),
+                         medianprops=dict(linewidth=1, color="#8c1010"),
+                         capprops=dict(linewidth=0.5))
+
+    for artist, group in zip(artists["boxes"], ["deaf", "hearing"]):
+        artist.set_facecolor(group_colors[group])
+
+call_parameters = [("calls_per_10s", 1, "Vocal activity\n[calls/10 s]", (0, 100), 20),
+                   ("call_rms", 1, "Amplitude\n[dB]", (0, 54), 12),
+                   ("call_duration", 1e3, "Duration\n[ms]", (0, 80), 20),
+                   ("f0_mean", 1e-3, "Fundamental\nfrequency [kHz]", (0, 40), 10),
+                   ("mean_aperiodicity", 1, "Aperiodicity\n[1]", (0, 1), 0.25)]
+
+fig_width = rst_total_width
+fig_height = 0.7 * fig_width
+left_margin, right_margin = 0.75, 0.1
+bottom_margin, top_margin = 0.45, 0.45
+spacing = 0.2
+
+row_height = (fig_height - bottom_margin - top_margin - (len(call_parameters) - 1) * spacing) / len(call_parameters)
+pup_boxplot_width = 0.5
+extra_adult_spacing = 0.1
+adult_boxplot_width = 0.2
+trajectory_width = (fig_width - left_margin - right_margin
+                    - 2 * pup_boxplot_width - adult_boxplot_width
+                    - 3 * spacing - 2 * extra_adult_spacing)
+
+fig = plt.figure(figsize=(fig_width, fig_height))
+
+roman_numerals = ["I", "II", "III", "IV", "V"]
+
+for i, (parameter, scale, y_label, y_range, y_tick_interval) in enumerate(reversed(call_parameters)):
+    early_ax = fig.add_axes([left_margin / fig_width,
+        (bottom_margin + i * (row_height + spacing)) / fig_height,
+        pup_boxplot_width / fig_width,
+        row_height / fig_height])
+    late_ax = fig.add_axes([(left_margin + pup_boxplot_width + spacing) / fig_width,
+        (bottom_margin + i * (row_height + spacing)) / fig_height,
+        pup_boxplot_width / fig_width,
+        row_height / fig_height])
+    adult_ax = fig.add_axes([(left_margin + 2 * pup_boxplot_width
+                              + 2 * spacing + extra_adult_spacing) / fig_width,
+        (bottom_margin + i * (row_height + spacing)) / fig_height,
+        adult_boxplot_width / fig_width,
+        row_height / fig_height])
+    trajectory_ax = fig.add_axes([(left_margin + 2 * pup_boxplot_width
+                                   + adult_boxplot_width + 3 * spacing
+                                   + 2 * extra_adult_spacing) / fig_width,
+        (bottom_margin + i * (row_height + spacing)) / fig_height,
+        trajectory_width / fig_width,
+        row_height / fig_height])
+    overall_ax = fig.add_axes([left_margin / fig_width,
+        (bottom_margin + i * (row_height + spacing)) / fig_height,
+        (fig_width - left_margin - right_margin) / fig_width,
+        row_height / fig_height], frameon=False)
+    overall_ax.set_xticks([])
+    overall_ax.set_yticks([])
+
+    if parameter == "calls_per_10s":
+        df = calls_per_pup_and_session
+    else:
+        df = pup_calls
+
+    plot_individual_boxplots(early_ax,
+                             df.query("before_deafening"),
+                             parameter, scale,
+                             annotate=(i == len(call_parameters) - 1),
+                             p_values=None)
+    plot_individual_boxplots(late_ax,
+                             df.query("not before_deafening"),
+                             parameter, scale,
+                             annotate=(i == len(call_parameters) - 1),
+                             p_values=pup_ps)
+
+    if parameter == "calls_per_10s":
+        df = calls_per_adult_group_and_session
+    else:
+        df = adult_calls
+
+    plot_pooled_boxplots(adult_ax, df, parameter, scale)
+    adult_ax.set_xlim(-0.75, 1.75)
+
+    plot_trajectories(trajectory_ax,
+                      params_per_pup_and_week.query("not before_deafening "
+                          "and calling_bat_age_in_weeks <= 24"),
+                      parameter, scale)
+
+    trajectory_ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(4))
+    trajectory_ax.set_xlim(2, 25)
+    trajectory_ax.spines["left"].set_position(("outward", 3))
+
+    if i == 0:
+        trajectory_ax.set_xlabel("Week of age")
+        trajectory_ax.xaxis.set_major_formatter(
+                mpl.ticker.FuncFormatter(lambda x, pos: "{}th".format(int(x))))
+    else:
+        for ax in [early_ax, late_ax, trajectory_ax]:
+            ax.set_xticklabels([])
+
+    if i == len(call_parameters) - 1:
+        early_ax.set_title("a)\n< 2 weeks\n", fontsize=8)
+        late_ax.set_title("b)\n2–25 weeks\n", fontsize=8)
+        adult_ax.set_title("c)\n2 years\n", fontsize=8)
+        trajectory_ax.set_title("d)\nDevelopment trajectory of median parameter values"
+            "\nat 2–25 weeks of age", fontsize=8)
+        early_ax.annotate("hearing",
+                xy=(0, 1), xycoords="axes fraction",
+                xytext=(5, -10), textcoords="offset points",
+                verticalalignment="top", horizontalalignment="left",
+                color=group_colors["hearing"])
+        early_ax.annotate("deafened",
+                xy=(0, 1), xycoords="axes fraction",
+                xytext=(5, 0), textcoords="offset points",
+                verticalalignment="top", horizontalalignment="left",
+                color=group_colors["deaf"])
+
+    for ax in [early_ax, late_ax, adult_ax, trajectory_ax]:
+        if ax is adult_ax and parameter == "calls_per_10s":
+            ax.set_ylim(0, 400)
+            ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(100))
+        else:
+            ax.set_ylim(y_range[0], y_range[1])
+            ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(y_tick_interval))
+
+        for spine in ["right", "top"]:
+            ax.spines[spine].set_visible(False)
+        ax.spines["bottom"].set_position(("outward", 3))
+
+    for ax in [late_ax, adult_ax, trajectory_ax]:
+        if (ax is not adult_ax and ax is not trajectory_ax) or parameter != "calls_per_10s":
+            ax.set_yticklabels([])
+
+    for ax in [early_ax, late_ax, adult_ax]:
+        ax.set_xticks([])
+        ax.spines["bottom"].set_visible(False)
+
+    early_ax.set_ylabel(y_label)
+    early_ax.yaxis.set_label_coords(-0.32 / pup_boxplot_width, 0.5)
+
+    overall_ax.set_ylabel(roman_numerals[-i - 1],
+                          rotation="0",
+                          verticalalignment="center",
+                          labelpad=49)
+
+fig.savefig("../output/main_figure.pdf")
+
+###
+# Parameter tables
+
+per_session_parameters = [("call_rms", 1, "Amplitude [dB]"),
+                          ("call_duration", 1e3, "Duration [ms]"),
+                          ("f0_min", 1e-3, "Minimum fundamental frequency [kHz]"),
+                          ("f0_mean", 1e-3, "Mean fundamental frequency [kHz]"),
+                          ("f0_max", 1e-3, "Maximum fundamental frequency [kHz]"),
+                          ("f0_start", 1e-3, "Fundamental frequency at call onset [kHz]"),
+                          ("f0_end", 1e-3, "Fundamental frequency at end of call [kHz]"),
+                          ("f_min", 1e-3, "Minimum frequency [kHz]"),
+                          ("f_max", 1e-3, "Maximum frequency [kHz]"),
+                          ("bandwidth", 1e-3, "Bandwidth [kHz]"),
+                          ("spectral_centroid", 1e-3, "Spectral centroid frequency [kHz]"),
+                          ("mean_aperiodicity", 1, "Aperiodicity [1]")]
+
+adult_calls.loc[:, "bandwidth"] = adult_calls["f_max"] - adult_calls["f_min"]
+pup_calls.loc[:, "bandwidth"] = pup_calls["f_max"] - pup_calls["f_min"]
+def q25(series):
+    return series.quantile(0.25)
+def q75(series):
+    return series.quantile(0.75)
+
+adult_groups = adult_calls[[p[0] for p in per_session_parameters] + ["group"]].groupby("group")
+adult_summary = adult_groups.agg([q25, "median", q75, "mean", "std"])
+
+pup_groups = pup_calls.loc[~pup_calls["before_deafening"],
+        [p[0] for p in per_session_parameters] + ["calling_bat_deaf"]].groupby("calling_bat_deaf")
+pup_summary = pup_groups.agg([q25, "median", q75, "mean", "std"])
+
+pre_deafening_pup_groups = pup_calls.loc[pup_calls["before_deafening"],
+        [p[0] for p in per_session_parameters] + ["calling_bat_deaf"]].groupby("calling_bat_deaf")
+pre_deafening_pup_summary = pre_deafening_pup_groups.agg([q25, "median", q75, "mean", "std"])
+
+per_session_parameters.insert(0, ("calls_per_10s", 1, "Vocal activity [calls/10 s]"))
+adult_activity_summary = calls_per_adult_group_and_session.groupby("group")[["calls_per_10s"]] \
+        .agg([q25, "median", q75, "mean", "std"])
+adult_summary = pd.concat([adult_activity_summary, adult_summary], axis=1)
+
+pup_activity_summary = calls_per_pup_and_session \
+        .query("not before_deafening") \
+        .groupby("calling_bat_deaf")[["calls_per_10s"]] \
+        .agg([q25, "median", q75, "mean", "std"])
+pup_summary = pd.concat([pup_activity_summary, pup_summary], axis=1)
+
+pre_deafening_pup_activity_summary = calls_per_pup_and_session \
+        .query("before_deafening") \
+        .groupby("calling_bat_deaf")[["calls_per_10s"]] \
+        .agg([q25, "median", q75, "mean", "std"])
+pre_deafening_pup_summary = \
+        pd.concat([pre_deafening_pup_activity_summary, pre_deafening_pup_summary], axis=1)
+
+pup_summary.rename({False: "hearing", True: "deaf"}, inplace=True)
+pre_deafening_pup_summary.rename({False: "hearing", True: "deaf"}, inplace=True)
+
+for group in ["hearing", "deaf"]:
+    adult_summary.loc[group, ("count", "total")] = \
+            (adult_calls["group"] == group).sum()
+    pup_summary.loc[group, ("count", "total")] = \
+            ((pup_calls["calling_bat_deaf"] == (group == "deaf"))
+                    & (~pup_calls["before_deafening"])).sum()
+    pre_deafening_pup_summary.loc[group, ("count", "total")] = \
+            ((pup_calls["calling_bat_deaf"] == (group == "deaf"))
+                    & (pup_calls["before_deafening"])).sum()
+
+def write_parameters_table(summary, p_values):
+    fout = io.StringIO()
+    print("<!DOCTYPE html><body><table>", file=fout)
+    print("<tr><th></th><th colspan='2'>Hearing ({} vocalisations)</th><th colspan='2'>Deaf ({} vocalisations)</th>".format(
+        int(summary.loc["hearing", ("count", "total")]),
+        int(summary.loc["deaf", ("count", "total")])), file=fout, sep="")
+    if p_values is not None:
+        print("<th colspan='3'>p < 0.05 (ANOVA)</th>", file=fout, sep="")
+    print("</tr>", file=fout)
+    print("<tr><th></th>{}".format("<th>Median (1st &amp; 3rd quartile)</th><th>Mean ± std. dev.</th>" * 2), file=fout, end="")
+    if p_values is not None:
+        print("<th>Deafened/hearing</th><th>Age</th><th>Interaction</th>", file=fout, end="")
+    print("</tr>", file=fout)
+
+    for parameter_name, scale, parameter_description in per_session_parameters:
+        print("<tr><th>{}</th>".format(parameter_description), file=fout, end="")
+        test_data = []
+        for group in ["hearing", "deaf"]:
+            print("<td>{:.2f} ({:.2f}–{:.2f})</td>".format(
+                    summary.loc[group, (parameter_name, "median")] * scale,
+                    summary.loc[group, (parameter_name, "q25")] * scale,
+                    summary.loc[group, (parameter_name, "q75")] * scale),
+                 file=fout, end="")
+            print("<td>{:.2f} ± {:.2f}</td>".format(
+                    summary.loc[group, (parameter_name, "mean")] * scale,
+                    summary.loc[group, (parameter_name, "std")] * scale),
+                 file=fout, end="")
+        if p_values is not None:
+            for p_name in ["p_group_adjusted", "p_age_adjusted", "p_interaction_adjusted"]:
+                p = p_values.loc[parameter_name, p_name]
+                if p < 0.05:
+                    print("<td>*</td>", file=fout, end="")
+                else:
+                    print("<td>{:.2f}</td>".format(p), file=fout, end="")
+        print("</tr>", file=fout)
+    print("</table></body>", file=fout)
+
+    return fout.getvalue()
+
+parameters_table = write_parameters_table(adult_summary, None)
+with open("../output/adult_parameters_table.html", "wt") as fout:
+    print(parameters_table, file=fout, end="")
+
+parameters_table = write_parameters_table(pup_summary, pup_ps)
+with open("../output/pup_parameters_table.html", "wt") as fout:
+    print(parameters_table, file=fout, end="")
+
+parameters_table = write_parameters_table(pre_deafening_pup_summary, None)
+with open("../output/pre_deafening_pup_parameters_table.html", "wt") as fout:
+    print(parameters_table, file=fout, end="")
+
+print("Done, output files written to ../output/.")