plot_susy_usages.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import pandas as pd
  2. import numpy as np
  3. import textwrap
  4. import matplotlib
  5. from matplotlib import pyplot as plt
  6. matplotlib.use("pgf")
  7. matplotlib.rcParams.update(
  8. {
  9. "pgf.texsystem": "xelatex",
  10. "font.family": "serif",
  11. "font.serif": "Times New Roman",
  12. "text.usetex": True,
  13. "pgf.rcfonts": False,
  14. }
  15. )
  16. usages = pd.read_csv("output/supersymmetry_usages.csv")
  17. descriptions = pd.read_csv("output/hep-ct-75-0.1-0.001-130000-20/descriptions.csv")
  18. usages = usages.merge(descriptions)
  19. rows = 1
  20. cols = usages["term"].nunique()
  21. fig, axes = plt.subplots(rows, cols, sharey=True)
  22. n = 0
  23. for term, topics in usages.groupby("term"):
  24. topics = topics.sort_values("p_t_w", ascending=False)
  25. ax = axes[n]
  26. probs = topics["p_t_w"].values
  27. labels = topics["description"].tolist()
  28. w = textwrap.TextWrapper(width=40,break_long_words=False,replace_whitespace=False)
  29. wlabels = ["\n".join(words) for words in map(w.wrap, labels)]
  30. if n == 0:
  31. ax.set_ylabel("Probability $P(z|w)$ that $w$ occurs as part of a topic $z$")
  32. ax.bar(labels, probs)
  33. ax.set_title(f"``{term}''")
  34. ax.set_xticklabels(wlabels, rotation=60, ha="right")
  35. ax.set_label("Topics $z$")
  36. n += 1
  37. plt.subplots_adjust(bottom=0.2)
  38. plt.savefig("plots/susy_usages.pgf", bbox_inches="tight")
  39. plt.savefig("plots/susy_usages.pdf", bbox_inches="tight")
  40. plt.savefig("plots/susy_usages.eps", bbox_inches="tight")