import numpy as np import pandas as pd from scipy.stats import entropy from sklearn.linear_model import LinearRegression from matplotlib import pyplot as plt import matplotlib from matplotlib import pyplot as plt matplotlib.use("pgf") matplotlib.rcParams.update( { "pgf.texsystem": "xelatex", "font.family": "serif", "font.serif": "Times New Roman", "text.usetex": True, "pgf.rcfonts": False, } ) plt.rcParams["text.latex.preamble"].join([ r"\usepackage{amsmath}", r"\setmainfont{amssymb}", ]) import seaborn as sns import argparse from os.path import join as opj import pickle from cmdstanpy import CmdStanModel parser = argparse.ArgumentParser() parser.add_argument("--input") args = parser.parse_args() topics = pd.read_csv(opj(args.input, "topics.csv")) junk = topics["label"].str.contains("Junk") topics = topics[~junk]["label"].tolist() fig, ax = plt.subplots() n_topics = len(pd.read_csv(opj(args.input, "topics.csv"))) df = pd.read_csv(opj(args.input, "aggregate.csv")) resources = pd.read_parquet(opj(args.input, "pooled_resources.parquet")) df = df.merge(resources, left_on="bai", right_on="bai") NR = np.stack(df[[f"start_{k+1}" for k in range(n_topics)]].values).astype(int) NC = np.stack(df[[f"end_{k+1}" for k in range(n_topics)]].values).astype(int) expertise = np.stack(df[[f"expertise_{k+1}" for k in range(n_topics)]].values) NR = NR[:,~junk] NC = NC[:,~junk] S = np.stack(df["pooled_resources"]) S = S[:,~junk] expertise = expertise[:,~junk] x = NR/NR.sum(axis=1)[:,np.newaxis] y = NC/NC.sum(axis=1)[:,np.newaxis] S = S/S.sum(axis=1)[:,np.newaxis] # R = np.array([ # [((expertise[:,i]>expertise[:,i].mean())&(expertise[:,j]>expertise[:,j].mean())).mean()/((expertise[:,i]>expertise[:,i].mean())|(expertise[:,j]>expertise[:,j].mean())).mean() for j in range(len(topics))] # for i in range(len(topics)) # ]) nu = np.load(opj(args.input, "nu_expertise_symmetric.npy")) print(nu) df["research_diversity"] = np.exp(entropy(x, axis=1)) df["social_diversity"] = np.exp(entropy(np.stack(df["pooled_resources"]),axis=1)) df["intellectual_diversity"] = np.exp(entropy(expertise,axis=1)) # df["social_magnitude"] = np.log(1+np.stack(df["pooled_resources"]).sum(axis=1)) df["social_magnitude"] = np.stack(df["pooled_resources"]).sum(axis=1) expertise_matrix = np.einsum("ki,kj->kij", expertise, expertise) social_expertise_matrix = np.einsum("ki,kj->kij", S, S) df["intellectual_stirling"] = 1-np.einsum("ij,kij->k", nu, expertise_matrix) df["social_stirling"] = 1-np.einsum("ij,kij->k", nu, social_expertise_matrix) df.fillna({ "social_stirling": 0, "social_diversity": 0, "intellectual_diversity": 0 }, inplace=True) df["excess_social_diversity"] = df["social_diversity"]-LinearRegression().fit(df[["intellectual_diversity"]], df["social_diversity"]).predict(df[["intellectual_diversity"]]) df["excess_social_stirling"] = df["social_stirling"]-LinearRegression().fit(df[["intellectual_stirling"]], df["social_stirling"]).predict(df[["intellectual_stirling"]]) brokerage = pd.read_csv(opj(args.input, "brokerage.csv")) df = df.merge(brokerage, left_on="bai", right_on="bai") print(df) # df["brokerage"] = np.log(1+df["brokerage"]) df.fillna(0, inplace=True) measures = ["intellectual_diversity", "intellectual_stirling", "excess_social_diversity", "excess_social_stirling", "social_magnitude", "brokerage"] labels = ["\\textbf{Intellectual diversity}", "Intellectual diversity (Stirling)", "\\textbf{Excess social diversity}", "Excess social diversity (Stirling)", "\\textbf{Power}", "Brokerage"] R = np.zeros((len(measures), len(measures))) for i, a in enumerate(measures): for j, b in enumerate(measures): if i == j: R[i,j] = np.nan else: R[i,j] = np.corrcoef(df[a], df[b])[0, 1] print(R[i,j]) fig, ax = plt.subplots(figsize=(4,3.2)) R[np.tril_indices(R.shape[0])] = np.nan sns.heatmap( R[:-1,1:], cmap="Reds", vmin=0, vmax=1, xticklabels=labels[1:], yticklabels=labels[:-1], ax=ax, annot=R[:-1,1:], fmt=".2f", annot_kws={"fontsize": 6}, square=True ) # ax.xaxis.set_tick_params(rotation=45) ax.yaxis.set_tick_params(rotation=0) ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, ha="right") fig.savefig( opj(args.input, f"capital_measures.eps"), bbox_inches="tight", ) df.agg(['mean', 'std']).to_csv(opj(args.input, "capital_measures.csv"))