123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- import pandas as pd
- import numpy as np
- import textwrap
- import matplotlib
- from matplotlib import pyplot as plt
- matplotlib.use("pgf")
- matplotlib.rcParams.update(
- {
- "pgf.texsystem": "xelatex",
- "font.family": "serif",
- "font.serif": "Times New Roman",
- "text.usetex": True,
- "pgf.rcfonts": False,
- }
- )
- cats = {
- "Theory-HEP": "Théorie",
- "Phenomenology-HEP": "Phéno.",
- "Experiment-HEP": "Exp."
- }
- articles = pd.read_parquet("../inspire-harvest/database/articles.parquet")[["article_id", "title", "abstract", "categories"]]
- topics = pd.read_parquet("output/hep-ct-75-0.1-0.001-130000-20/topics_0.parquet")
- topics = topics.merge(articles, how="inner")
- del articles
- descriptions = pd.read_csv("output/hep-ct-75-0.1-0.001-130000-20/descriptions.csv").set_index("topic")
- usages = pd.read_csv("analyses/supersymmetry_usages.csv")
- susy_topics = list(usages["topic"].unique())
- topics = topics[topics["title"].str.lower().str.contains("super") | topics["title"].str.lower().str.contains("susy")]
- topics = topics[topics["abstract"].map(len) >= 500]
- articles = []
- for susy_topic in susy_topics:
- print(susy_topic)
- topics[f"susy_{susy_topic}"] = topics["probs"].map(lambda l: l[susy_topic])
- topics.sort_values(f"susy_{susy_topic}", ascending=False, inplace=True)
- articles.append(
- topics.head(3).assign(
- topic=susy_topic,
- description=descriptions.iloc[susy_topic]["description_fr"],
- prob=topics.head(3)[f"susy_{susy_topic}"]
- )
- )
- articles = pd.concat(articles)
- articles = articles[["article_id", "title", "description", "prob", "categories"]]
- articles["categories"] = articles["categories"].map(lambda l: "/".join([cats[x] for x in list(set(l)&{"Theory-HEP", "Phenomenology-HEP", "Experiment-HEP"})]))
- articles["prob"] = articles["prob"].map(lambda f: f"{f:.2f}")
- articles.rename(columns = {
- "description": "Sujet",
- "title": "Article",
- "categories": "Catégories",
- "prob": "$\\theta_{z}$"
- }, inplace=True)
- print(articles)
- articles["Sujet"] = articles["Sujet"].apply(lambda s: "\\\\ ".join(textwrap.wrap(s, width=15)))
- articles["Sujet"] = articles["Sujet"].apply(lambda s: '\\begin{tabular}{l}' + s +'\\end{tabular}')
- articles["Article"] = articles["Article"].map(lambda s: f"``{s}''")
- latex = articles.reset_index(drop=True).set_index(["Sujet", "Article"]).to_latex(
- columns=["Catégories", "$\\theta_{z}$"],
- longtable=True,
- sparsify=True,
- multirow=True,
- multicolumn=True,
- position='H',
- column_format='p{0.25\\textwidth}|p{0.555\\textwidth}|p{0.145\\textwidth}|p{0.05\\textwidth}',
- escape=False,
- caption="\\textbf{Sélection de trois articles emblématiques pour chacun des sujets associés à la supersymétrie}. Les articles sont sélectionnés parmi ceux qui mentionnent la supersymétrie dans leur résumé.",
- label="table:emblematic_articles"
- ).replace("Continued on next page", "Suite page suivante")
- with open("analyses/emblematic_articles.tex", "w+") as fp:
- fp.write(latex)
|