1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- import pandas as pd
- import numpy as np
- from os.path import join as opj
- locations = [
- "output/etm_20_r",
- "output/etm_20_r",
- "output/etm_20_r",
- "output/etm_20_r",
- "output/etm_20_r",
- "output/etm_15_pretrained",
- "output/acl_2002_2022",
- ]
- corpora = [
- "High-energy physics",
- "High-energy physics",
- "High-energy physics",
- "High-energy physics",
- "High-energy physics",
- "High-energy physics (K=15)",
- "ACL Anthology",
- ]
- time_periods = [
- ("2000--2009", "2015--2019"),
- ("2000--2004", "2005--2009"),
- ("2005--2009", "2010--2014"),
- ("2010--2014", "2015--2019"),
- ("2000--2009", "2010--2019"),
- ("2000--2009", "2015--2019"),
- ("2002--2011", "2012--2022")
- ]
- aggregate_suffix = [
- "",
- "_0_1",
- "_1_2",
- "_2_3",
- "_0-1_2-3",
- "",
- ""
- ]
- score_suffix = [
- "_default",
- "_0_1",
- "_1_2",
- "_2_3",
- "_0-1_2-3",
- "_default",
- ""
- ]
- df = []
- for i, corpus in enumerate(corpora):
- N = len(pd.read_csv(opj(locations[i], f"aggregate{aggregate_suffix[i]}.csv")))
- scores = np.load(opj(locations[i], f"scores{score_suffix[i]}.npy"))
- scores = scores.mean(axis=0)
- model = scores[1]
- baseline = scores[2]
- df.append({
- "Cohort": f"{corpus} (from {time_periods[i][0]} to {time_periods[i][1]})",
- "Model\\newline $\\mu(d_{{\\mathrm{{TV}}}}(\\bm{{y_{{a}}}}, \\bm{{y_{{a}}}}^{{\\text{{pred}}}}))$": f"\\textbf{{{model:.3f}}}" if model<baseline else f"{model:.3f}",
- "Baseline\\newline $\\mu(d_{{\\mathrm{{TV}}}}(\\bm{{y_{{a}}}}, \\bm{{x_{{a}}}}))$": f"\\textbf{{{baseline:.3f}}}" if baseline<model else f"{baseline:.3f}"
- })
- # df = pd.DataFrame(df).set_index("Cohort").transpose()
- df = pd.DataFrame(df).set_index("Cohort")
- pd.set_option('display.max_colwidth', None)
- latex = df.to_latex(
- escape=False,
- multirow=True,
- column_format="|>{\\centering\\arraybackslash}m{4cm}|>{\\centering\\arraybackslash}m{5cm}|>{\\centering\\arraybackslash}m{5cm}|",
- caption="Performance of the actual model versus that of the baseline model, for i) the cohort of high-energy physicists and ii) a cohort from the ACL Anthology corpus of Computation Linguistics papers.",
- label="table:performance"
- )
- print(latex)
- with open(opj("output", "scores.tex"), "w+") as fp:
- fp.write(latex)
|