plot_emblematic_articles.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import pandas as pd
  2. import numpy as np
  3. import textwrap
  4. import matplotlib
  5. from matplotlib import pyplot as plt
  6. matplotlib.use("pgf")
  7. matplotlib.rcParams.update(
  8. {
  9. "pgf.texsystem": "xelatex",
  10. "font.family": "serif",
  11. "font.serif": "Times New Roman",
  12. "text.usetex": True,
  13. "pgf.rcfonts": False,
  14. }
  15. )
  16. cats = {
  17. "Theory-HEP": "Théorie",
  18. "Phenomenology-HEP": "Phéno.",
  19. "Experiment-HEP": "Exp."
  20. }
  21. articles = pd.read_parquet("../inspire-harvest/database/articles.parquet")[["article_id", "title", "abstract", "categories"]]
  22. topics = pd.read_parquet("output/hep-ct-75-0.1-0.001-130000-20/topics_0.parquet")
  23. topics = topics.merge(articles, how="inner")
  24. del articles
  25. descriptions = pd.read_csv("output/hep-ct-75-0.1-0.001-130000-20/descriptions.csv").set_index("topic")
  26. usages = pd.read_csv("analyses/supersymmetry_usages.csv")
  27. susy_topics = list(usages["topic"].unique())
  28. topics = topics[topics["title"].str.lower().str.contains("super") | topics["title"].str.lower().str.contains("susy")]
  29. topics = topics[topics["abstract"].map(len) >= 500]
  30. articles = []
  31. for susy_topic in susy_topics:
  32. print(susy_topic)
  33. topics[f"susy_{susy_topic}"] = topics["probs"].map(lambda l: l[susy_topic])
  34. topics.sort_values(f"susy_{susy_topic}", ascending=False, inplace=True)
  35. articles.append(
  36. topics.head(3).assign(
  37. topic=susy_topic,
  38. description=descriptions.iloc[susy_topic]["description_fr"],
  39. prob=topics.head(3)[f"susy_{susy_topic}"]
  40. )
  41. )
  42. articles = pd.concat(articles)
  43. articles = articles[["article_id", "title", "description", "prob", "categories"]]
  44. articles["categories"] = articles["categories"].map(lambda l: "/".join([cats[x] for x in list(set(l)&{"Theory-HEP", "Phenomenology-HEP", "Experiment-HEP"})]))
  45. articles["prob"] = articles["prob"].map(lambda f: f"{f:.2f}")
  46. articles.rename(columns = {
  47. "description": "Sujet",
  48. "title": "Article",
  49. "categories": "Catégories",
  50. "prob": "$\\theta_{z}$"
  51. }, inplace=True)
  52. print(articles)
  53. articles["Sujet"] = articles["Sujet"].apply(lambda s: "\\\\ ".join(textwrap.wrap(s, width=15)))
  54. articles["Sujet"] = articles["Sujet"].apply(lambda s: '\\begin{tabular}{l}' + s +'\\end{tabular}')
  55. articles["Article"] = articles["Article"].map(lambda s: f"``{s}''")
  56. latex = articles.reset_index(drop=True).set_index(["Sujet", "Article"]).to_latex(
  57. columns=["Catégories", "$\\theta_{z}$"],
  58. longtable=True,
  59. sparsify=True,
  60. multirow=True,
  61. multicolumn=True,
  62. position='H',
  63. column_format='p{0.25\\textwidth}|p{0.555\\textwidth}|p{0.145\\textwidth}|p{0.05\\textwidth}',
  64. escape=False,
  65. caption="\\textbf{Sélection de trois articles emblématiques pour chacun des sujets associés à la supersymétrie}. Les articles sont sélectionnés parmi ceux qui mentionnent la supersymétrie dans leur résumé.",
  66. label="table:emblematic_articles"
  67. ).replace("Continued on next page", "Suite page suivante")
  68. with open("analyses/emblematic_articles.tex", "w+") as fp:
  69. fp.write(latex)