doi
/
hladnik_grewe_heterogeneity
forked from jgrewe/hladnik_grewe_heterogeneity


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
							###############################################################################
##  correlations among the baseline parameters and the mutual formation      ##
import os
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import matplotlib as mplt

from matplotlib import cm
from sklearn import linear_model
from sklearn.metrics import r2_score

from .property_correlations import get_feature_values


def plot_correlation_heatmap(rs, ps, labels, axis=None, textsize=6, labelsize=6):
    """plots a heatmap of correlation coefficients and the respective p-values 

    Args:
        rs (np.ndarray): matrix of correlation coefficients
        ps (np.ndarray): matrix of corresponding p-values
        labels (list): list of labels  
        axis (matplotlib axis): matplotlib axis object, default=None
    """
    if axis is None:
        fig = plt.figure()
        axis = fig.add_subplot(111)

    cmap = cm.get_cmap("RdYlBu", 512)
    newcolors = cmap(np.linspace(0, 1, 512))
    black = np.array([0, 0, 0, 1])
    newcolors[-1, :] = black
    newcmp = mplt.colors.ListedColormap(newcolors)

    im = axis.imshow(rs, cmap=newcmp, vmin=-1.0, vmax=1.0)
    cb = plt.gcf().colorbar(im, ax=axis, orientation='vertical', label='correlation coefficient')
    cb.ax.tick_params(labelsize=labelsize)
    axis.set_xticks(np.arange(len(labels)))
    axis.set_yticks(np.arange(len(labels)))
    axis.set_xticks(np.arange(0.5, len(labels)), minor=True)
    axis.set_yticks(np.arange(0.5, len(labels)), minor=True)
    axis.tick_params(axis='both', which='minor', colors='white')
    axis.grid(color="white", axis="both", which="minor", lw=0.6)
    axis.set_xticklabels(labels, fontsize=labelsize)
    axis.set_yticklabels(labels, fontsize=labelsize)
    plt.setp(axis.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
    plt.setp(axis.get_yticklabels(), rotation=45, ha="right", rotation_mode="anchor")

    bonferroni = (len(labels)**2 - len(labels)) / 2
    for i in range(len(labels)):
        for j in range(len(labels)):
            if i == j:
                continue    
            c = "k" if rs[i, j] > -.5 and rs[i,j] < 0.65 else "w"
            alpha =  ps[i,j] * bonferroni
            p = "n.s." if alpha >= 0.05 else (r"p<%.1i%%" % (5 if alpha > .01 else 1))
            axis.text(j, i, "r: %.2f\n%s"% (rs[i, j], p), ha="center", 
                      va="center", color=c, fontsize=textsize)
    return cb


def correlate(data_frame, columns):
    """calculate pairwise pearsonr correlations for all combinations of columns from the passed DataFrame

    Args:
        data_frame (pandas DataFrame): the data frame containing the data
        columns (list): list of column names

    Returns:
        numpy.ndarray: matrix containing the correlation coefficients of shape(len(columns), len(columns))
        numpy.ndarray: matrix containing the p-values of shape(len(columns), len(columns))

    """
    # do a z-transform    
    temp = np.zeros((len(data_frame), len(columns)))
    for i, c in enumerate(columns): 
        temp[:, i] = (data_frame[c].values - np.mean(data_frame[c].values))/np.std(data_frame[c].values)
    
    ps = np.zeros((len(columns), len(columns)))
    rs = np.ones(ps.shape)
    rscores = np.zeros(len(columns))
    
    reg = linear_model.LinearRegression()
    for i,_ in enumerate(columns):
        pattern = np.ones(len(columns), dtype=bool)
        pattern[i] = 0
        reg.fit(temp[:, pattern], temp[:, i])
        rs[i, pattern] = reg.coef_
        y_pred = reg.predict(list(temp[:, pattern]))
        rscores[i] = r2_score(temp[:, i], y_pred)
    
    ps = np.zeros((len(columns), len(columns)))
    rs = np.ones(ps.shape)
    
    for i in range(len(columns)):
        for j in range(len(columns)):
            r, p = sp.stats.pearsonr(data_frame[columns[i]].values, data_frame[columns[j]].values)
            ps[i,j] = p
            rs[i,j] = r

    return rs, ps


def layout_figure():
    pass
    fig, axis = plt.subplots(1, 1, figsize=(3.5, 0.8 * 3.5))
    fig.subplots_adjust(left=0.1, top=0.95, right=0.925, bottom=0.175)
    return fig, axis


def baseline_correlations(args):
    """
        plots the mutual information estimated from the stimulus response coherence yielded for population sizes of 1 and 
        plots pairwise correlation coefficients for various parameters.
    Args:
        hom (pandas DataFrame): results from homogeneous populations
    """
    df = pd.read_csv(args.inputfile, sep=";", index_col=0)
    feats = ["population_rate", "cv", "rate_modulation", "lower_cutoff", "upper_cutoff", "mi"]
    labels = ["firing rate", r"$CV_{ISI}$", "rate mod.", r"$\omega_{lower}$", 
              r"$\omega_{upper}$", "mutual info."]
    features,_ = get_feature_values(df, feats)
    selection = pd.DataFrame(features)

    fig, axis = layout_figure()

    rs, ps = correlate(selection, list(features.keys()))
    cb = plot_correlation_heatmap(rs, ps, labels, axis=axis)

    cb_pos = list(cb.ax.get_position().bounds)
    cb_pos[0] = cb_pos[0] + 0.005
    cb.ax.set_position(cb_pos)
    if args.nosave:
        plt.show()
    else:
        fig.savefig(args.outfile)
        plt.close()


def command_line_parser(subparsers):
    parser = subparsers.add_parser("supfig4", help="Supplementary figure 4: Plots correlations of various baseline features.")
    parser.add_argument("-i", "--inputfile", default=os.path.join("derived_data", "homogeneous_populationcoding.csv"))
    parser.add_argument("-o", "--outfile", default=os.path.join("figures", "coding_correlations.pdf"))
    parser.add_argument("-n", "--nosave", action='store_true', help="no saving of the figure, just showing") 
    parser.set_defaults(func=baseline_correlations)