import pandas as pd import numpy as np from os.path import join as opj corpora = { "output/etm_20_pretrained": "High-energy physics", "output/acl_2002_2022": "Computational Linguistics" } time_periods = { "output/etm_20_pretrained": ("2000--2009", "2015--2019"), "output/acl_2002_2022": ("2002--2011", "2012--2022") } author_disambiguation = { "output/etm_20_pretrained": "Yes", "output/acl_2002_2022": "No" } publication_requirement = { "output/etm_20_pretrained": 5, "output/acl_2002_2022": 3 } df = [] for corpus in corpora: N = len(pd.read_csv(opj(corpus, "aggregate.csv"))) scores = np.load(opj(corpus, "scores.npy")) scores = scores.mean(axis=0) model = scores[1] baseline = scores[2] df.append({ "Cohort": f"{corpora[corpus]}\\newline ($N={N}$)", "Model\\newline $\\mu(d_{{\\mathrm{{TV}}}}(\\bm{{y_{{a}}}}, \\bm{{y_{{a}}}}^{{\\text{{pred}}}}))$": f"\\textbf{{{model:.3f}}}" if model