123456789101112131415161718192021222324252627282930313233343536373839 |
- import numpy as np
- from matplotlib import pyplot as plt
- import matplotlib
- matplotlib.use("pgf")
- matplotlib.rcParams.update(
- {
- "pgf.texsystem": "xelatex",
- "font.family": "serif",
- "font.serif": "Times New Roman",
- "text.usetex": True,
- "pgf.rcfonts": False,
- }
- )
- plt.rcParams["text.latex.preamble"].join([
- r"\usepackage{amsmath}",
- r"\setmainfont{amssymb}",
- ])
- from matplotlib.gridspec import GridSpec
- import pandas as pd
- import argparse
- parser = argparse.ArgumentParser()
- parser.add_argument("--input")
- args = parser.parse_args()
- df = pd.read_csv(args.input)
- fig, ax = plt.subplots(figsize=[6.4,3.2])
- for d, v in df.groupby("dim"):
- ax.scatter([d]*len(v), v["loss"], alpha=0.5, color="gray", facecolors="none")
- avg = df.groupby("dim")["loss"].mean()
- ax.plot(avg.index, avg)
- ax.set_xlabel("Embeddings dimension ($L$)")
- ax.set_ylabel("Skip-gram Word2Vec Loss")
- fig.savefig("output/word2vec_validation.pdf", bbox_inches="tight")
|