#!/usr/bin/env python3 import pandas as pd import pickle import numpy as np import argparse import matplotlib import matplotlib.pyplot as plt matplotlib.use("pgf") matplotlib.rcParams.update({ "pgf.texsystem": "xelatex", 'font.family': 'serif', "font.serif" : "Times New Roman", 'text.usetex': True, 'pgf.rcfonts': False, }) from sklearn.linear_model import LinearRegression def set_size(width, fraction=1, ratio = None): fig_width_pt = width * fraction inches_per_pt = 1 / 72.27 if ratio is None: ratio = (5 ** 0.5 - 1) / 2 fig_width_in = fig_width_pt * inches_per_pt fig_height_in = fig_width_in * ratio return fig_width_in, fig_height_in parser = argparse.ArgumentParser(description = 'plot_pred') parser.add_argument('data') parser.add_argument('fit') parser.add_argument('output') args = parser.parse_args() with open(args.data, 'rb') as fp: data = pickle.load(fp) fit = pd.read_parquet(args.fit) speakers = ['CHI', 'OCH', 'FEM', 'MAL'] #n_simulations = fit['n_sim'].iloc[0] n_sim = data['n_sim'] fit = fit[-1000:] sim = np.zeros(len(fit)) true = np.zeros(len(fit)) for i in range(len(fit)): f = fit.iloc[i] true_beta = f['chi_adu_coef'] chi_truth = np.array([f[f'sim_truth.{k+1}.1'] for k in range(n_sim)]) adu_truth = np.array([f[f'sim_truth.{k+1}.3']+f[f'sim_truth.{k+1}.4'] for k in range(n_sim)]) chi_vtc = np.array([f[f'sim_vtc.{k+1}.1'] for k in range(n_sim)]) adu_vtc = np.array([f[f'sim_vtc.{k+1}.3']+f[f'sim_vtc.{k+1}.4'] for k in range(n_sim)]) regr = LinearRegression() regr.fit(adu_vtc.reshape(-1, 1), chi_vtc) sim[i] = regr.coef_[0] true[i] = true_beta fig, ax = plt.subplots(1,1,figsize=set_size(450,1,1)) ax.scatter(true, sim, s = 1) ax.plot(np.linspace(0,5,4), np.linspace(0,5,4), color = 'black') ax.set_xlabel('true $K$') ax.set_ylabel('fit $\\hat{K}$') ax.set_xlim(0,1) ax.set_ylim(0,1) fig.savefig(args.output)