|
@@ -2,7 +2,8 @@ import numpy as np
|
|
|
import pandas as pd
|
|
|
|
|
|
import plotly.graph_objects as go
|
|
|
-import plotly.io as pio
|
|
|
+import plotly.io as pio
|
|
|
+
|
|
|
pio.kaleido.scope.mathjax = None
|
|
|
|
|
|
import seaborn as sns
|
|
@@ -14,6 +15,9 @@ parser = argparse.ArgumentParser()
|
|
|
parser.add_argument("--input")
|
|
|
parser.add_argument("--suffix")
|
|
|
parser.add_argument("--compact", action="store_true", default=False)
|
|
|
+parser.add_argument("--transparent", action="store_true", default=False)
|
|
|
+parser.add_argument("--highlight-in", default=list(), nargs="+")
|
|
|
+parser.add_argument("--highlight-out", default=list(), nargs="+")
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
samples = np.load(opj(args.input, f"ei_samples_{args.suffix}.npz"))
|
|
@@ -34,20 +38,39 @@ print(colors)
|
|
|
total_incoming = samples["counts"].mean(axis=0).sum(axis=1)
|
|
|
total_outcoming = samples["counts"].mean(axis=0).sum(axis=0)
|
|
|
|
|
|
-incoming_labels = [f"{topics[i]} ({100*total_incoming[i]:.0f}%)" for i in range(n_topics)]
|
|
|
-outcoming_labels = [f"{topics[i]} ({100*total_outcoming[i]:.0f}%)" for i in range(n_topics)]
|
|
|
+incoming_labels = [
|
|
|
+ f"{topics[i]} ({100*total_incoming[i]:.0f}%)" for i in range(n_topics)
|
|
|
+]
|
|
|
+outcoming_labels = [
|
|
|
+ f"{topics[i]} ({100*total_outcoming[i]:.0f}%)" for i in range(n_topics)
|
|
|
+]
|
|
|
|
|
|
for i in range(n_topics):
|
|
|
for j in range(n_topics):
|
|
|
v = samples["counts"][:, i, j].mean()
|
|
|
|
|
|
- x = samples["counts"].mean(axis=0)[i,:].sum()
|
|
|
- y = samples["counts"].mean(axis=0)[:,j].sum()
|
|
|
+ x = samples["counts"].mean(axis=0)[i, :].sum()
|
|
|
+ y = samples["counts"].mean(axis=0)[:, j].sum()
|
|
|
|
|
|
value.append(v)
|
|
|
source.append(i)
|
|
|
target.append(j + n_topics)
|
|
|
- color.append("rgba(100,100,100,0.2)" if v >= x*y else "rgba(200,200,200,0.02)")
|
|
|
+
|
|
|
+ highlight = (
|
|
|
+ (len(args.highlight_in) > 0 and topics[i] in args.highlight_in)
|
|
|
+ or (len(args.highlight_out) > 0 and topics[j] in args.highlight_out)
|
|
|
+ or (
|
|
|
+ (not args.transparent)
|
|
|
+ and (len(args.highlight_in + args.highlight_out) == 0)
|
|
|
+ )
|
|
|
+ ) and (v >= x * y)
|
|
|
+
|
|
|
+ # print(topics[i], topics[j])
|
|
|
+ # print((len(args.highlight_in) > 0 and topics[i] in args.highlight_in))
|
|
|
+ # print((len(args.highlight_out) > 0 and topics[j] in args.highlight_out))
|
|
|
+ # print((len(args.highlight_in + args.highlight_out) == 0))
|
|
|
+
|
|
|
+ color.append("rgba(100,100,100,0.3)" if highlight else "rgba(200,200,200,0.02)")
|
|
|
|
|
|
fig = go.Figure(
|
|
|
data=[
|
|
@@ -75,53 +98,80 @@ fig.update_layout(
|
|
|
width=650 if args.compact else 800,
|
|
|
height=600,
|
|
|
)
|
|
|
-#fig.show()
|
|
|
-fig.write_image(opj(args.input, f"sankey_control_{args.suffix}{'_compact' if args.compact else ''}.pdf"), width=650 if args.compact else 800, height=600)
|
|
|
-
|
|
|
-transfers = pd.DataFrame([
|
|
|
- {
|
|
|
- "from": topics[i],
|
|
|
- "to": topics[j],
|
|
|
- "magnitude": 100*samples["counts"].mean(axis=0)[i,j],
|
|
|
- "ratio": samples["counts"].mean(axis=0)[i,j]/samples["counts"].mean(axis=0)[i,:].sum()
|
|
|
- }
|
|
|
- for i in range(n_topics)
|
|
|
- for j in range(n_topics)
|
|
|
-])
|
|
|
-
|
|
|
-
|
|
|
-latex = transfers[transfers["from"]!=transfers["to"]].sort_values("magnitude", ascending=False).head(10).to_latex(
|
|
|
- columns=["from", "to", "magnitude"],
|
|
|
- header=["Origin research area", "Target research area", "Magnitude"],
|
|
|
- index=False,
|
|
|
- multirow=True,
|
|
|
- multicolumn=True,
|
|
|
- column_format='b{0.4\\textwidth}|b{0.4\\textwidth}|c',
|
|
|
- escape=False,
|
|
|
- float_format=lambda x: f"{x:.2f}",
|
|
|
- caption="Largest transfers across research areas.",
|
|
|
- label="table:largest_transfers"
|
|
|
+# fig.show()
|
|
|
+
|
|
|
+highlights = f"{'_'.join(args.highlight_in)}{'_'.join(args.highlight_out)}".replace(
|
|
|
+ "/", "_"
|
|
|
+).replace("&", "_").replace(" ", "").lower()
|
|
|
+if len(highlights):
|
|
|
+ highlights = f"_{highlights}"
|
|
|
+
|
|
|
+fig.write_image(
|
|
|
+ opj(
|
|
|
+ args.input,
|
|
|
+ f"sankey_control_{args.suffix}{'_compact' if args.compact else ''}{'_transparent' if args.transparent else ''}{highlights}.pdf",
|
|
|
+ ),
|
|
|
+ width=650 if args.compact else 800,
|
|
|
+ height=600,
|
|
|
+)
|
|
|
+
|
|
|
+transfers = pd.DataFrame(
|
|
|
+ [
|
|
|
+ {
|
|
|
+ "from": topics[i],
|
|
|
+ "to": topics[j],
|
|
|
+ "magnitude": 100 * samples["counts"].mean(axis=0)[i, j],
|
|
|
+ "ratio": samples["counts"].mean(axis=0)[i, j]
|
|
|
+ / samples["counts"].mean(axis=0)[i, :].sum(),
|
|
|
+ }
|
|
|
+ for i in range(n_topics)
|
|
|
+ for j in range(n_topics)
|
|
|
+ ]
|
|
|
+)
|
|
|
+
|
|
|
+
|
|
|
+latex = (
|
|
|
+ transfers[transfers["from"] != transfers["to"]]
|
|
|
+ .sort_values("magnitude", ascending=False)
|
|
|
+ .head(10)
|
|
|
+ .to_latex(
|
|
|
+ columns=["from", "to", "magnitude"],
|
|
|
+ header=["Origin research area", "Target research area", "Magnitude"],
|
|
|
+ index=False,
|
|
|
+ multirow=True,
|
|
|
+ multicolumn=True,
|
|
|
+ column_format="b{0.4\\textwidth}|b{0.4\\textwidth}|c",
|
|
|
+ escape=False,
|
|
|
+ float_format=lambda x: f"{x:.2f}",
|
|
|
+ caption="Largest transfers across research areas.",
|
|
|
+ label="table:largest_transfers",
|
|
|
+ )
|
|
|
)
|
|
|
|
|
|
-latex = latex.replace('\\\\\n', '\\\\ \\hline\n')
|
|
|
+latex = latex.replace("\\\\\n", "\\\\ \\hline\n")
|
|
|
|
|
|
with open(opj(args.input, "largest_transfers.tex"), "w+") as fp:
|
|
|
fp.write(latex)
|
|
|
|
|
|
-latex = transfers[transfers["from"]==transfers["to"]].sort_values("ratio", ascending=False).head(10).to_latex(
|
|
|
- columns=["from", "ratio"],
|
|
|
- header=["Research area", "Conservatism"],
|
|
|
- index=False,
|
|
|
- multirow=True,
|
|
|
- multicolumn=True,
|
|
|
- column_format='b{0.4\\textwidth}|b{0.4\\textwidth}|c',
|
|
|
- escape=False,
|
|
|
- float_format=lambda x: f"{x:.2f}",
|
|
|
- caption="Most conservative research areas.",
|
|
|
- label="table:most_conservative"
|
|
|
+latex = (
|
|
|
+ transfers[transfers["from"] == transfers["to"]]
|
|
|
+ .sort_values("ratio", ascending=False)
|
|
|
+ .head(10)
|
|
|
+ .to_latex(
|
|
|
+ columns=["from", "ratio"],
|
|
|
+ header=["Research area", "Conservatism"],
|
|
|
+ index=False,
|
|
|
+ multirow=True,
|
|
|
+ multicolumn=True,
|
|
|
+ column_format="b{0.4\\textwidth}|b{0.4\\textwidth}|c",
|
|
|
+ escape=False,
|
|
|
+ float_format=lambda x: f"{x:.2f}",
|
|
|
+ caption="Most conservative research areas.",
|
|
|
+ label="table:most_conservative",
|
|
|
+ )
|
|
|
)
|
|
|
|
|
|
-latex = latex.replace('\\\\\n', '\\\\ \\hline\n')
|
|
|
+latex = latex.replace("\\\\\n", "\\\\ \\hline\n")
|
|
|
|
|
|
with open(opj(args.input, "most_conservative.tex"), "w+") as fp:
|
|
|
fp.write(latex)
|