import pandas as pd import numpy as np from matplotlib import pyplot as plt import seaborn as sns df = pd.read_csv("data/maturity2/maturity2.csv") df = df[df["selection"]=="key_chi"] df["child_age"] /= 365 df = df[~df["majority_label_age"].isin(["Junk",""])] df = df.groupby(["child_id", "child_age", "majority_label_age"]).agg( n=("majority_label_age", "count") ) df = df.reset_index() df = df.pivot(columns="majority_label_age", index=["child_id","child_age"], values="n") df["ratio"] = df["Baby"]/(df["Baby"]+df["Child"]) df = df.reset_index() df = df[df["ratio"]>=0] from cmdstanpy import CmdStanModel data = { "N": len(df), "age": df["child_age"].astype(float).values, "ratio": df["ratio"].astype(float).values } model = CmdStanModel( stan_file=f"model.stan", ) fit = model.sample( data=data, chains=4, threads_per_chain=1, iter_sampling=2000, iter_warmup=500, show_console=True, ) vars = fit.stan_variables() samples = {} for (k, v) in vars.items(): samples[k] = v m = samples["lp"].max() q = np.exp(samples["lp"]-m).mean(axis=0) p = q/q.sum() fig, ax1 = plt.subplots() ax1.scatter(df["child_age"], df["ratio"], label="Data") ax1.set_xlabel("Key child age (in years)") ax1.set_ylabel("Baby/(Baby+Child)") ax2 = ax1.twinx() ax2.plot(np.linspace(0,5,len(p)),q, color="black", label="Threshold probability density") fig.legend() fig.savefig("output/ratio_vs_age.png", bbox_inches="tight")