123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146 |
- ###############################################################################
- ## correlations among the baseline parameters and the mutual formation ##
- import os
- import pandas as pd
- import numpy as np
- import scipy as sp
- import matplotlib.pyplot as plt
- import matplotlib as mplt
- from matplotlib import cm
- from sklearn import linear_model
- from sklearn.metrics import r2_score
- from .property_correlations import get_feature_values
- def plot_correlation_heatmap(rs, ps, labels, axis=None, textsize=6, labelsize=6):
- """plots a heatmap of correlation coefficients and the respective p-values
- Args:
- rs (np.ndarray): matrix of correlation coefficients
- ps (np.ndarray): matrix of corresponding p-values
- labels (list): list of labels
- axis (matplotlib axis): matplotlib axis object, default=None
- """
- if axis is None:
- fig = plt.figure()
- axis = fig.add_subplot(111)
- cmap = cm.get_cmap("RdYlBu", 512)
- newcolors = cmap(np.linspace(0, 1, 512))
- black = np.array([0, 0, 0, 1])
- newcolors[-1, :] = black
- newcmp = mplt.colors.ListedColormap(newcolors)
- im = axis.imshow(rs, cmap=newcmp, vmin=-1.0, vmax=1.0)
- cb = plt.gcf().colorbar(im, ax=axis, orientation='vertical', label='correlation coefficient')
- cb.ax.tick_params(labelsize=labelsize)
- axis.set_xticks(np.arange(len(labels)))
- axis.set_yticks(np.arange(len(labels)))
- axis.set_xticks(np.arange(0.5, len(labels)), minor=True)
- axis.set_yticks(np.arange(0.5, len(labels)), minor=True)
- axis.tick_params(axis='both', which='minor', colors='white')
- axis.grid(color="white", axis="both", which="minor", lw=0.6)
- axis.set_xticklabels(labels, fontsize=labelsize)
- axis.set_yticklabels(labels, fontsize=labelsize)
- plt.setp(axis.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
- plt.setp(axis.get_yticklabels(), rotation=45, ha="right", rotation_mode="anchor")
- bonferroni = (len(labels)**2 - len(labels)) / 2
- for i in range(len(labels)):
- for j in range(len(labels)):
- if i == j:
- continue
- c = "k" if rs[i, j] > -.5 and rs[i,j] < 0.65 else "w"
- alpha = ps[i,j] * bonferroni
- p = "n.s." if alpha >= 0.05 else (r"p<%.1i%%" % (5 if alpha > .01 else 1))
- axis.text(j, i, "r: %.2f\n%s"% (rs[i, j], p), ha="center",
- va="center", color=c, fontsize=textsize)
- return cb
- def correlate(data_frame, columns):
- """calculate pairwise pearsonr correlations for all combinations of columns from the passed DataFrame
- Args:
- data_frame (pandas DataFrame): the data frame containing the data
- columns (list): list of column names
- Returns:
- numpy.ndarray: matrix containing the correlation coefficients of shape(len(columns), len(columns))
- numpy.ndarray: matrix containing the p-values of shape(len(columns), len(columns))
- """
- # do a z-transform
- temp = np.zeros((len(data_frame), len(columns)))
- for i, c in enumerate(columns):
- temp[:, i] = (data_frame[c].values - np.mean(data_frame[c].values))/np.std(data_frame[c].values)
-
- ps = np.zeros((len(columns), len(columns)))
- rs = np.ones(ps.shape)
- rscores = np.zeros(len(columns))
-
- reg = linear_model.LinearRegression()
- for i,_ in enumerate(columns):
- pattern = np.ones(len(columns), dtype=bool)
- pattern[i] = 0
- reg.fit(temp[:, pattern], temp[:, i])
- rs[i, pattern] = reg.coef_
- y_pred = reg.predict(list(temp[:, pattern]))
- rscores[i] = r2_score(temp[:, i], y_pred)
-
- ps = np.zeros((len(columns), len(columns)))
- rs = np.ones(ps.shape)
-
- for i in range(len(columns)):
- for j in range(len(columns)):
- r, p = sp.stats.pearsonr(data_frame[columns[i]].values, data_frame[columns[j]].values)
- ps[i,j] = p
- rs[i,j] = r
- return rs, ps
- def layout_figure():
- pass
- fig, axis = plt.subplots(1, 1, figsize=(3.5, 0.8 * 3.5))
- fig.subplots_adjust(left=0.1, top=0.95, right=0.925, bottom=0.175)
- return fig, axis
- def baseline_correlations(args):
- """
- plots the mutual information estimated from the stimulus response coherence yielded for population sizes of 1 and
- plots pairwise correlation coefficients for various parameters.
- Args:
- hom (pandas DataFrame): results from homogeneous populations
- """
- df = pd.read_csv(args.inputfile, sep=";", index_col=0)
- feats = ["population_rate", "cv", "rate_modulation", "lower_cutoff", "upper_cutoff", "mi"]
- labels = ["firing rate", r"$CV_{ISI}$", "rate mod.", r"$\omega_{lower}$",
- r"$\omega_{upper}$", "mutual info."]
- features,_ = get_feature_values(df, feats)
- selection = pd.DataFrame(features)
- fig, axis = layout_figure()
- rs, ps = correlate(selection, list(features.keys()))
- cb = plot_correlation_heatmap(rs, ps, labels, axis=axis)
- cb_pos = list(cb.ax.get_position().bounds)
- cb_pos[0] = cb_pos[0] + 0.005
- cb.ax.set_position(cb_pos)
- if args.nosave:
- plt.show()
- else:
- fig.savefig(args.outfile)
- plt.close()
- def command_line_parser(subparsers):
- parser = subparsers.add_parser("supfig4", help="Supplementary figure 4: Plots correlations of various baseline features.")
- parser.add_argument("-i", "--inputfile", default=os.path.join("derived_data", "homogeneous_populationcoding.csv"))
- parser.add_argument("-o", "--outfile", default=os.path.join("figures", "coding_correlations.pdf"))
- parser.add_argument("-n", "--nosave", action='store_true', help="no saving of the figure, just showing")
- parser.set_defaults(func=baseline_correlations)
|