jgrewe
/
hladnik_grewe_heterogeneity


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
							import os
import numpy as np
import pandas as pd

from sklearn.cluster import KMeans


def phase_clustering(df):
    """Clusters the phases using K-means and shifts the right cluster by 2 pi.

    Parameters
    ----------
    df : pandas.DataFrame
        The joined DataFrame of baseline properties and receptive field locations.
    
    Returns
    -------
    pandas.DataFrame
        The dataframe with three more columns, the shifted phase, the cluster label and the absolute delay.
    """
    rfpositions = df.receptor_pos_absolute.values
    phases = df.phase + np.pi

    x = np.asarray([rfpositions/(np.max(rfpositions) - np.min(rfpositions)), 0.75 * phases/(2 * np.pi)]).T

    kmeans = KMeans(n_clusters=2, n_init=200)
    kmeans.fit(x)
    cluster_ids = kmeans.labels_

    phase_shifted = 1 * phases
    if rfpositions[cluster_ids == 0].mean() > rfpositions[cluster_ids == 1].mean():
        phase_shifted[cluster_ids == 0] += 2 * np.pi
    else:
        phase_shifted[cluster_ids == 1] += 2 * np.pi

    df["kmeans_label"] = cluster_ids
    df["phase_shifted"] = phase_shifted - np.pi
    df["phase_time"] = df["phase_shifted"].values / (2 * np.pi) * df.eod_period

    # import matplotlib.pyplot as plt
    # from scipy.stats import pearsonr
    # plt.plot(rfpositions[cluster_ids==1], phases[cluster_ids == 1], c="r", ls="None", marker=".")
    # plt.plot(rfpositions[cluster_ids==0], phases[cluster_ids == 0], c="b", ls="None", marker=".")
    # plt.show()
    # plt.plot(df.receptor_pos_relative, df.phase_shifted, c="k", ls="None", marker=".")
    # plt.show()
    # print(pearsonr(df.receptor_pos_relative, df.phase_shifted))
    # plt.plot(df.receptor_pos_absolute, df.phase_time, c="k", ls="None", marker=".")
    # plt.show()
    # print(pearsonr(df.receptor_pos_absolute, df.phase_time))
    return df


def phase_analysis(data_folder):
    baseline_df = pd.read_csv(os.path.join(data_folder, "baseline_properties.csv"), sep=";", index_col=0)
    receptivefield_df = pd.read_csv(os.path.join(data_folder, "receptivefield_positions.csv"), sep=";", index_col=0)
    joined_df = baseline_df.merge(receptivefield_df) 

    df = phase_clustering(joined_df)
    return df