|
@@ -1 +1,62 @@
|
|
|
-/annex/objects/MD5E-s1459--58acfe400b29daf088dd4488aaa81c39.py
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+from matplotlib import pyplot as plt
|
|
|
+import seaborn as sns
|
|
|
+
|
|
|
+
|
|
|
+df = pd.read_csv("data/maturity2/maturity2.csv")
|
|
|
+df = df[df["selection"]=="key_chi"]
|
|
|
+df["child_age"] /= 365
|
|
|
+
|
|
|
+df = df[~df["majority_label_age"].isin(["Junk",""])]
|
|
|
+
|
|
|
+df = df.groupby(["child_id", "child_age", "majority_label_age"]).agg(
|
|
|
+ n=("majority_label_age", "count")
|
|
|
+)
|
|
|
+
|
|
|
+df = df.reset_index()
|
|
|
+df = df.pivot(columns="majority_label_age", index=["child_id","child_age"], values="n")
|
|
|
+df["ratio"] = df["Baby"]/(df["Baby"]+df["Child"])
|
|
|
+
|
|
|
+df = df.reset_index()
|
|
|
+df = df[df["ratio"]>=0]
|
|
|
+
|
|
|
+from cmdstanpy import CmdStanModel
|
|
|
+
|
|
|
+data = {
|
|
|
+ "N": len(df),
|
|
|
+ "age": df["child_age"].astype(float).values,
|
|
|
+ "ratio": df["ratio"].astype(float).values
|
|
|
+}
|
|
|
+
|
|
|
+model = CmdStanModel(
|
|
|
+ stan_file=f"model.stan",
|
|
|
+)
|
|
|
+fit = model.sample(
|
|
|
+ data=data,
|
|
|
+ chains=4,
|
|
|
+ threads_per_chain=1,
|
|
|
+ iter_sampling=2000,
|
|
|
+ iter_warmup=500,
|
|
|
+ show_console=True,
|
|
|
+)
|
|
|
+
|
|
|
+vars = fit.stan_variables()
|
|
|
+samples = {}
|
|
|
+for (k, v) in vars.items():
|
|
|
+ samples[k] = v
|
|
|
+
|
|
|
+m = samples["lp"].max()
|
|
|
+q = np.exp(samples["lp"]-m).mean(axis=0)
|
|
|
+p = q/q.sum()
|
|
|
+
|
|
|
+fig, ax1 = plt.subplots()
|
|
|
+ax1.scatter(df["child_age"], df["ratio"], label="Data")
|
|
|
+ax1.set_xlabel("Key child age (in years)")
|
|
|
+ax1.set_ylabel("Baby/(Baby+Child)")
|
|
|
+ax2 = ax1.twinx()
|
|
|
+ax2.plot(np.linspace(0,5,len(p)),q, color="black", label="Threshold probability density")
|
|
|
+
|
|
|
+fig.legend()
|
|
|
+
|
|
|
+fig.savefig("output/ratio_vs_age.png", bbox_inches="tight")
|