\begin{table} \caption{Comparison of the high-energy physics corpus and the computational linguistics corpus.} \label{table:corpora} \begin{tabular}{|c|c|c|} \toprule Cohort & High-energy physics & Computational Linguistics \\ \midrule Cohort size ($N$) & $2\,195$ & $578$ \\ Number of abstracts ($D$) & $186\,162$ & $58\,289$ \\ Vocabulary size ($V$) & $4\,751$ & $5\,966$ \\ Valid topics ($K$) & 16 & 14 \\ Initial time period & 2000--2009 & 2002--2011 \\ Late time period & 2015--2019 & 2012--2022 \\ Author disambiguation & Yes & No \\ Minimum publications & $5$ & $3$ \\ \bottomrule \end{tabular} \end{table}