terms_cats.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import pandas as pd
  2. import matplotlib
  3. from matplotlib import pyplot as plt
  4. matplotlib.use("pgf")
  5. matplotlib.rcParams.update(
  6. {
  7. "pgf.texsystem": "xelatex",
  8. "font.family": "serif",
  9. "font.serif": "Times New Roman",
  10. "text.usetex": True,
  11. "pgf.rcfonts": False,
  12. }
  13. )
  14. hep_cats = {"Theory-HEP", "Phenomenology-HEP", "Experiment-HEP", "Lattice"}
  15. terms = sorted(["supersymmetry", "supersymmetric", "susy"])
  16. articles = pd.read_parquet("inspire-harvest/database/articles.parquet")
  17. articles["title"] = articles.title.str.lower()
  18. articles["abstract"] = articles.abstract.str.lower()
  19. articles = articles[articles["categories"].map(lambda x: hep_cats&set(x)).map(len) > 0]
  20. articles["year"] = articles["date_created"].str[:4].replace('', 0).astype(int)
  21. articles = articles[(articles["year"] >= 1980) & (articles["year"] <= 2020)]
  22. for cat in hep_cats:
  23. articles[cat] = articles["categories"].map(lambda x: cat in x)
  24. cats = []
  25. for term in terms:
  26. fit = articles[articles.title.str.contains(term) | articles.abstract.str.contains(term)]
  27. c = {}
  28. for cat in hep_cats-{"Lattice"}:
  29. c[cat] = fit[cat].mean()
  30. c["term"] = term
  31. cats.append(c)
  32. cats = pd.DataFrame(cats)
  33. rows = 1
  34. cols = len(terms)
  35. fig, axes = plt.subplots(rows, cols, sharey=True)
  36. n = 0
  37. for term in terms:
  38. ax = axes[n]
  39. labels = ["Theory-HEP", "Phenomenology-HEP", "Experiment-HEP"]
  40. human_friendly_labels = ["Theory", "Phenomenology", "Experiment"]
  41. probs = cats[cats["term"] == term][labels].iloc[0]
  42. if n == 0:
  43. ax.set_ylabel("Share of abstracts that belong to each category")
  44. ax.bar(human_friendly_labels, probs, color = ['#377eb8', '#ff7f00', '#4daf4a'])
  45. ax.set_title(f"``{term}''")
  46. ax.set_xticklabels(human_friendly_labels, rotation=90, ha="right")
  47. ax.set_label("Categories")
  48. n += 1
  49. plt.subplots_adjust(bottom=0.2)
  50. plt.savefig("plots/terms_cats.pgf", bbox_inches="tight")
  51. plt.savefig("plots/terms_cats.pdf", bbox_inches="tight")
  52. plt.savefig("plots/terms_cats.eps", bbox_inches="tight")