jgrewe
/
hladnik_grewe_heterogeneity


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
							########################################################################################
##                   effect of delay on the coding performance                        ##
import os
import numpy as np
import pandas as pd
import matplotlib as mplt
import matplotlib.pyplot as plt
plt.style.use("./code/plots/pnas_onecolumn.mplstyle")

from .figure_style import subfig_labelsize, subfig_labelweight, despine

fig4_help = "plots the effect of conduction delays on stimulus encoding"


def get_mutual_info(df, delay, column="mi", error="std", kernel=0.001):
    """Read the mutual information from the passed dataframe for all population sizes the specified delay and kernel.
    Function returns the average and yerr values as np.arrays. 

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame containing the analysis results.
    delay : float
        One of the delays used during analysis.
    column : str, optional
        The column name to read, by default "mi", i.e. the full mutual information
    error : str, optional
        The error measure that should be returned on of {"std", "se", "quartile"}, by default "std"
    kernel : float, optional
        The kernel width used in the analysis, by default 0.001

    Returns
    -------
        np.array 
            the average mutual information for each population size 
        np.array
            the respective error measures, For standard deviation and standard error, this will be a vector. For quartiles the 25 and 75 percent quartiles are returned in a 2 * number_of_population_sizes array
    """
    pop_sizes = df.pop_size.unique()
    delays = df.delay.unique()
    assert(delay in delays)
    if error == "quartile":
        yerr = np.zeros((2, len(pop_sizes)))
    if error == "std" or error == "se":
        yerr = np.zeros(len(pop_sizes))
    avg = np.zeros(len(pop_sizes))
    for i, p in enumerate(pop_sizes):
        selection = df[column][(df.pop_size == p) & (df.delay == delay) & (df.kernel_sigma == kernel)].values
        avg[i] = np.mean(selection)
        if error == "std":
            yerr[i] = np.std(selection)
        elif error == "se":
            yerr[i] = np.std(selection)/np.sqrt(len(selection))
        elif error == "quartile":
            yerr[0, i] = np.abs(avg[i] - np.percentile(selection, 25))
            yerr[1, i] = np.abs(avg[i] - np.percentile(selection, 75))
    return avg, yerr


def get_mutual_info_delay(df, population_size, column="mi", error="std", kernel=0.001):
    """read mutual information values from the dataframe.

    Args:
        df (pandas.DataFrame): the data frame
        population_size (int): population size to be read out
        column (str, optional): column name. Defaults to "mi".
        error (str, optional): column name for the standard deviation. Defaults to "std".
        kernel (float, optional): kernel size. Defaults to 0.001.

    Returns:
        [type]: [description]
    """
    pop_sizes = df.pop_size.unique()
    delays = df.delay.unique()
    assert(population_size in pop_sizes)
    if error == "quartile":
        yerr = np.zeros((2, len(delays)))
    if error == "std" or error == "se":
        yerr = np.zeros(len(delays))
    avg = np.zeros(len(delays))
    for i, d in enumerate(delays):
        selection = df[column][(df.pop_size == population_size) & (df.delay == d) & (df.kernel_sigma == kernel)].values
        avg[i] = np.mean(selection)
        if error == "std":
            yerr[i] = np.std(selection)
        elif error == "se":
            yerr[i] = np.std(selection)/np.sqrt(len(selection))
        elif error == "quartile":
            yerr[0, i] = np.abs(avg[i] - np.percentile(selection, 25))
            yerr[1, i] = np.abs(avg[i] - np.percentile(selection, 75))
    return avg, yerr


def compare_homogeneous_with_similar_heterogeneous(homogeneous, heterogeneous, axes=[]):
    """
    for each heterogeneous population size compare the mutual info with all 
    homogenous population with similar rates/rate modulations

    Args:
        homogeneous ([pandas.DataFrame]): [description]
        heterogeneous (pandas.DataFrame): [description]
        axes (list(pyplot.axis)): optional, default None, the axis into which the results should be plotted 
    """
    features = ["population_rate", "rate_modulation"]
    if len(axes) < len(features):
        fig = plt.figure()
        axes = []
        axes.append(fig.add_subplot(211))
        axes.append(fig.add_subplot(212))

    population_sizes = heterogeneous.pop_size[heterogeneous.pop_size > 1].unique()
    for i, f in enumerate(features):
        print(f)
        ax = axes[i]
        ax.set_title(f)
        for ps in population_sizes:
            hom_populations = homogeneous[(homogeneous.pop_size == ps)]
            het_populations = heterogeneous[(heterogeneous.pop_size == ps) & (heterogeneous.delay == 0) & (heterogeneous.kernel_sigma == 0.001)]  
            het_mis = np.zeros(len(het_populations))

            for count, (_, row) in enumerate(het_populations.iterrows()):
                feat_value = row[f]    
                het_mis[count] = row["mi_100"]
                trials = hom_populations[(hom_populations[f] >= 0.8 * feat_value) & (hom_populations[f] < 1.0 * feat_value)]    
                hom_datasets = trials.dataset.unique()
                hom_mis = np.zeros(len(hom_datasets))
                for j, d in enumerate(hom_datasets):
                    selection = trials[trials.dataset == d]
                    hom_mis[j] = np.mean(selection.mi_100)
            
                ax.scatter(ps-0.25, np.mean(het_mis))
                ax.scatter(ps+0.25, np.mean(hom_mis))
    plt.show()


def layout_figure():
    fig = plt.figure(figsize=(3.42, 4.2))
    shape = (20, 7)
    ax_total = plt.subplot2grid(shape, (0, 0), rowspan=8, colspan=3)
    ax_100 = plt.subplot2grid(shape, (0, 4), rowspan=6, colspan=3 )
    ax_200 = plt.subplot2grid(shape, (7, 4), rowspan=6, colspan=3 )
    ax_300 = plt.subplot2grid(shape, (14, 4), rowspan=6, colspan=3 )
    delay_axis = plt.subplot2grid(shape, (12, 0), rowspan=8, colspan=3 )

    ax_total.text(-.425, 1.125, "A", fontsize=subfig_labelsize, fontweight=subfig_labelweight, transform=ax_total.transAxes)
    ax_100.text(-.35, 1.125, "B", fontsize=subfig_labelsize, fontweight=subfig_labelweight, transform=ax_100.transAxes)
    delay_axis.text(-.425, 1.125, "C", fontsize=subfig_labelsize, fontweight=subfig_labelweight, transform=delay_axis.transAxes)
    axes = [ax_total, ax_100, ax_200, ax_300, delay_axis]
    fig.subplots_adjust(left=0.15, bottom=0.125, top=0.925, right=0.99, hspace=0.25)

    return fig, axes


def plot_delay_effect(args):
    """Illustrate the effect of a delay, e.g. induced by neuronal conduction delays.

    Args:
        args ([type]): the command line arguments
    """
    fig, axes = layout_figure()

    df = pd.read_csv(args.heterogeneous_data, sep=";", index_col=0)

    pop_sizes = df.pop_size.unique()
    delays = df.delay.unique()
    mi_axes = axes[:4]
    columns = ["mi", "mi_100", "mi_200", "mi_300"]
    titles = [r"$0-300$Hz", r"$0-100$Hz",r"$100-200$Hz",r"$200-300$Hz"]
    cmaps = [mplt.cm.get_cmap('Greys'), mplt.cm.get_cmap('Reds'), mplt.cm.get_cmap('Greens'), mplt.cm.get_cmap('PuBu')]
    limits = [[0, 1000], [0, 400], [0, 400], [0, 400]]
    markers = ["o", "X", "d", "s"]

    # as a function of population size
    marked_delays = [0, 2, 4, 9, len(delays)-1]
    for ax, col, title, ylim, cmap, marker in zip(mi_axes, columns, titles, limits, cmaps, markers):
        for i, d in enumerate(delays):
            avg, yerr = get_mutual_info(df, d, col, error="std", kernel=args.kernel)
            ax.errorbar(pop_sizes, avg, yerr, lw=0.5,markeredgewidth=0.2, markeredgecolor="white",
                        markersize=4, linestyle="--", marker=marker,
                        color=cmap(i/len(delays)*0.8 + 0.2), label=r"%.2fms" % (d * 1000))
            if "$0-300$" in title:
                if i in marked_delays:
                    ax.text(31, avg[-1], "%.1f" % (d *1000), fontsize=5, ha="left", va="center")
                if i == 0:
                    ax.text(31, avg[-1] + 150, r"$\sigma_{delay}$", fontsize=5, ha="left", va="center")
                    ax.text(31, avg[-1] + 75, "[ms]", fontsize=5, ha="left", va="center")
        plt.text(0.05, 0.90 if i == 0 else 0.85, title, transform=ax.transAxes, fontsize=7, ha="left")
        ax.set_ylim(ylim)
        ax.set_yticks(np.arange(0, ylim[1]+1, 250))
        ax.set_yticks(np.arange(0, ylim[1]+1, 50), minor=True)
        ax.set_xlim(0, np.max(pop_sizes)+5)
        ax.set_xticks(range(0, np.max(pop_sizes) + 5, 10))
        ax.set_xticks(range(0, np.max(pop_sizes) + 5, 2), minor=True)
        despine(ax, ["top", "right"], False)  
    axes[0].set_ylabel("mutual information [bit/s]")
    axes[0].set_xlabel("population size")
    axes[0].yaxis.set_label_coords(-0.275, 0.5)
  
    axes[1].set_xticklabels([])
    axes[2].set_xticklabels([])
    axes[2].set_ylabel("mutual information [bit/s]")
    axes[2].yaxis.set_label_coords(-0.225, 0.5)

    axes[3].set_xlabel("population size", ha="center") 
    axes[3].xaxis.set_label_coords(0.5, -0.35)

    delays[0] += 0.00025  # just for the log scale
    for col, title, cmap, marker in zip(columns[1:], titles[1:], cmaps[1:], markers[1:]):
        avg, yerr = get_mutual_info_delay(df, 16, column=col, error="std", kernel=args.kernel)
        axes[-1].errorbar(delays*1000, avg/avg[0], yerr= yerr/avg[0], c=cmap(0.75), lw=0.5,
                          markeredgewidth=0.2, markersize=4, marker=marker, label=title,
                          markeredgecolor="white", zorder=2)
        axes[-1].plot(delays*1000, avg/avg[0], c=cmap(0.75), lw=1.0, ls="--", zorder=1)
       
    axes[-1].legend(ncol=1, handletextpad=0.1, loc="lower left", columnspacing=.5, frameon=True,
                      fancybox=False, shadow=False, markerscale=0.9, borderaxespad=0.05)
    axes[-1].set_xlabel(r"$\sigma_{delay}$ [ms]")
    axes[-1].set_ylabel(r"mutual information [rel.]")
    axes[-1].set_xlim(0.2, 30)
    axes[-1].set_ylim(0.0, 1.1)
    axes[-1].set_yticks(np.arange(0.0, 1.01, 0.25))
    axes[-1].set_xscale("log")
    axes[-1].set_xticks(delays*1000)
    delays[0] -= 0.00025
    yticklabels = list(map(str, np.round(delays*1000,1)))
    for i in range(1, len(yticklabels), 2): 
        yticklabels[i] = "" 
    yticklabels[yticklabels.index("7.0")] = ""
    axes[-1].set_xticklabels(yticklabels, rotation=45)
    axes[-1].yaxis.set_label_coords(-0.275, 0.5)
    despine(axes[-1], ["top", "right"], False)

    if args.nosave:
        plt.show()
    else:
        fig.savefig(args.outfile)
        plt.close()


def command_line_parser(subparsers):
    parser = subparsers.add_parser("figure4", help="Plots the effect of conduction delays on stimulus encoding in heterogeneous populations.")
    parser.add_argument("-hetdf", "--heterogeneous_data", default=os.path.join("derived_data", "heterogeneous_populationcoding.csv"))
    parser.add_argument("-k", "--kernel", type=float, default=0.001, help="The kernel width used for the analysis")
    parser.add_argument("-o", "--outfile", type=str, default=os.path.join("figures", "delay_effect.pdf"), help="The filename of the figure")
    parser.add_argument("-n", "--nosave", action='store_true', help="no saving of the figure, just showing")
    parser.set_defaults(func=plot_delay_effect)