lucasgautheron
/
trading_zones_material


			
			
				
					
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
							import pandas as pd 
import numpy as np
import textwrap

import matplotlib
from matplotlib import pyplot as plt 
matplotlib.use("pgf")
matplotlib.rcParams.update(
    {
        "pgf.texsystem": "xelatex",
        "font.family": "serif",
        "font.serif": "Times New Roman",
        "text.usetex": True,
        "pgf.rcfonts": False,
    }
)

cats = {
    "Theory-HEP": "Théorie",
    "Phenomenology-HEP": "Phéno.",
    "Experiment-HEP": "Exp."
}

articles = pd.read_parquet("../inspire-harvest/database/articles.parquet")[["article_id", "title", "abstract", "categories"]]
topics = pd.read_parquet("output/hep-ct-75-0.1-0.001-130000-20/topics_0.parquet")
topics = topics.merge(articles, how="inner")
del articles
descriptions = pd.read_csv("output/hep-ct-75-0.1-0.001-130000-20/descriptions.csv").set_index("topic")

usages = pd.read_csv("analyses/supersymmetry_usages.csv")
susy_topics = list(usages["topic"].unique())

topics = topics[topics["title"].str.lower().str.contains("super") | topics["title"].str.lower().str.contains("susy")]
topics = topics[topics["abstract"].map(len) >= 500]

articles = []

for susy_topic in susy_topics:
    print(susy_topic)
    topics[f"susy_{susy_topic}"] = topics["probs"].map(lambda l: l[susy_topic])

    topics.sort_values(f"susy_{susy_topic}", ascending=False, inplace=True)

    articles.append(
        topics.head(3).assign(
            topic=susy_topic,
            description=descriptions.iloc[susy_topic]["description_fr"],
            prob=topics.head(3)[f"susy_{susy_topic}"]
        )
    )

articles = pd.concat(articles)
articles = articles[["article_id", "title", "description", "prob", "categories"]]
articles["categories"] = articles["categories"].map(lambda l: "/".join([cats[x] for x in list(set(l)&{"Theory-HEP", "Phenomenology-HEP", "Experiment-HEP"})]))
articles["prob"] = articles["prob"].map(lambda f: f"{f:.2f}")
articles.rename(columns = {
    "description": "Sujet",
    "title": "Article",
    "categories": "Catégories",
    "prob": "$\\theta_{z}$"
}, inplace=True)

print(articles)

articles["Sujet"] = articles["Sujet"].apply(lambda s: "\\\\ ".join(textwrap.wrap(s, width=15)))
articles["Sujet"] = articles["Sujet"].apply(lambda s: '\\begin{tabular}{l}' + s +'\\end{tabular}')

articles["Article"] = articles["Article"].map(lambda s: f"``{s}''")

latex = articles.reset_index(drop=True).set_index(["Sujet", "Article"]).to_latex(
    columns=["Catégories", "$\\theta_{z}$"],
    longtable=True,
    sparsify=True,
    multirow=True,
    multicolumn=True,
    position='H',
    column_format='p{0.25\\textwidth}|p{0.555\\textwidth}|p{0.145\\textwidth}|p{0.05\\textwidth}',
    escape=False,
    caption="\\textbf{Sélection de trois articles emblématiques pour chacun des sujets associés à la supersymétrie}. Les articles sont sélectionnés parmi ceux qui mentionnent la supersymétrie dans leur résumé.",
    label="table:emblematic_articles"
).replace("Continued on next page", "Suite page suivante")

with open("analyses/emblematic_articles.tex", "w+") as fp:
    fp.write(latex)