1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import pandas as pd
- import numpy as np
- import textwrap
- import matplotlib
- from matplotlib import pyplot as plt
- matplotlib.use("pgf")
- matplotlib.rcParams.update(
- {
- "pgf.texsystem": "xelatex",
- "font.family": "serif",
- "font.serif": "Times New Roman",
- "text.usetex": True,
- "pgf.rcfonts": False,
- }
- )
- usages = pd.read_csv("output/supersymmetry_usages.csv")
- descriptions = pd.read_csv("output/hep-ct-75-0.1-0.001-130000-20/descriptions.csv")
- usages = usages.merge(descriptions)
- rows = 1
- cols = usages["term"].nunique()
- fig, axes = plt.subplots(rows, cols, sharey=True)
- n = 0
- for term, topics in usages.groupby("term"):
- topics = topics.sort_values("p_t_w", ascending=False)
- ax = axes[n]
-
- probs = topics["p_t_w"].values
- labels = topics["description"].tolist()
- w = textwrap.TextWrapper(width=40,break_long_words=False,replace_whitespace=False)
- wlabels = ["\n".join(words) for words in map(w.wrap, labels)]
- if n == 0:
- ax.set_ylabel("Probability $P(z|w)$ that $w$ occurs as part of a topic $z$")
-
- ax.bar(labels, probs)
- ax.set_title(f"``{term}''")
- ax.set_xticklabels(wlabels, rotation=60, ha="right")
- ax.set_label("Topics $z$")
- n += 1
- plt.subplots_adjust(bottom=0.2)
- plt.savefig("plots/susy_usages.pgf", bbox_inches="tight")
- plt.savefig("plots/susy_usages.pdf", bbox_inches="tight")
- plt.savefig("plots/susy_usages.eps", bbox_inches="tight")
|