Bläddra i källkod

effect of academic age

Lucas Gautheron 3 veckor sedan
förälder
incheckning
10da028bf2
40 ändrade filer med 75613 tillägg och 2757 borttagningar
  1. 48 11
      code/comparative_analysis.py
  2. 630 555
      output/etm_20_pretrained/change_score_effects_entropy_brokerage.eps
  3. 612 571
      output/etm_20_pretrained/change_score_effects_entropy_magnitude.eps
  4. 10106 0
      output/etm_20_pretrained/change_score_effects_stirling_magnitude.eps
  5. 9720 0
      output/etm_20_pretrained/disruption_score_effects_entropy_brokerage.eps
  6. 841 664
      output/etm_20_pretrained/disruption_score_effects_entropy_magnitude.eps
  7. 10004 0
      output/etm_20_pretrained/disruption_score_effects_stirling_magnitude.eps
  8. 10525 0
      output/etm_20_pretrained/entered_score_effects_entropy_brokerage.eps
  9. 10574 0
      output/etm_20_pretrained/entered_score_effects_entropy_magnitude.eps
  10. 10832 0
      output/etm_20_pretrained/entered_score_effects_stirling_magnitude.eps
  11. 984 942
      output/etm_20_pretrained/exited_score_effects_entropy_magnitude.eps
  12. 10723 0
      output/etm_20_pretrained/exited_score_effects_stirling_magnitude.eps
  13. 0 1
      output/etm_20_pretrained/samples_change_entropy_brokerage.npz
  14. 1 0
      output/etm_20_pretrained/samples_change_entropy_brokerage.npz
  15. 0 1
      output/etm_20_pretrained/samples_change_entropy_magnitude.npz
  16. 1 0
      output/etm_20_pretrained/samples_change_entropy_magnitude.npz
  17. 0 1
      output/etm_20_pretrained/samples_change_stirling_magnitude.npz
  18. 1 0
      output/etm_20_pretrained/samples_change_stirling_magnitude.npz
  19. 0 1
      output/etm_20_pretrained/samples_collaborations.npz
  20. 1 0
      output/etm_20_pretrained/samples_collaborations.npz
  21. 0 1
      output/etm_20_pretrained/samples_disruption_entropy_brokerage.npz
  22. 1 0
      output/etm_20_pretrained/samples_disruption_entropy_brokerage.npz
  23. 0 1
      output/etm_20_pretrained/samples_disruption_entropy_magnitude.npz
  24. 1 0
      output/etm_20_pretrained/samples_disruption_entropy_magnitude.npz
  25. 0 1
      output/etm_20_pretrained/samples_disruption_stirling_magnitude.npz
  26. 1 0
      output/etm_20_pretrained/samples_disruption_stirling_magnitude.npz
  27. 0 1
      output/etm_20_pretrained/samples_entered_entropy_brokerage.npz
  28. 1 0
      output/etm_20_pretrained/samples_entered_entropy_brokerage.npz
  29. 0 1
      output/etm_20_pretrained/samples_entered_entropy_magnitude.npz
  30. 1 0
      output/etm_20_pretrained/samples_entered_entropy_magnitude.npz
  31. 0 1
      output/etm_20_pretrained/samples_entered_stirling_magnitude.npz
  32. 1 0
      output/etm_20_pretrained/samples_entered_stirling_magnitude.npz
  33. 0 1
      output/etm_20_pretrained/samples_exited_entropy_brokerage.npz
  34. 1 0
      output/etm_20_pretrained/samples_exited_entropy_brokerage.npz
  35. 0 1
      output/etm_20_pretrained/samples_exited_entropy_magnitude.npz
  36. 1 0
      output/etm_20_pretrained/samples_exited_entropy_magnitude.npz
  37. 0 1
      output/etm_20_pretrained/samples_exited_stirling_magnitude.npz
  38. 1 0
      output/etm_20_pretrained/samples_exited_stirling_magnitude.npz
  39. 0 1
      output/etm_20_pretrained/samples_intellectual_capital_effect.npz
  40. 1 0
      output/etm_20_pretrained/samples_intellectual_capital_effect.npz

+ 48 - 11
code/comparative_analysis.py

@@ -40,6 +40,48 @@ parser.add_argument("--model", default="", choices=["", "bare"])
 parser.add_argument("--compact", action="store_true", default=False)
 args = parser.parse_args()
 
+def age():
+    if not exists(opj(args.input, "age.csv")):
+        articles = pd.read_parquet("../semantics/inspire-harvest/database/articles.parquet")[["article_id", "date_created", "pacs_codes", "curated", "accelerators"]]
+        articles["article_id"] = articles.article_id.astype(int)
+        articles = articles[articles["date_created"].str.len() >= 4]
+        articles["year"] = articles["date_created"].str[:4].astype(int)
+
+        articles["age"] = 2015-articles["date_created"].str[:4].astype(int)
+        age = articles[["article_id", "age"]].copy()
+
+        articles = articles[(articles["year"]>=2000)&(articles["year"]<2010)]
+
+        _articles = pd.read_csv(opj(args.input, "articles.csv"))
+        articles = _articles.merge(articles, how="inner")
+
+        authors = pd.read_parquet("../semantics/inspire-harvest/database/articles_authors.parquet")
+        authors["article_id"] = authors.article_id.astype(int)
+        n_authors = authors.groupby("article_id").agg(n_authors=("bai", "count")).reset_index()
+        articles = articles.merge(n_authors, how="left", left_on="article_id", right_on="article_id")
+        # exclude large collaborations (experiments, software, etc.)
+        articles = articles[articles.accelerators.map(len)==0]
+        articles = articles[articles["n_authors"]<10]
+
+        references = pd.read_parquet("../semantics/inspire-harvest/database/articles_references.parquet")
+        references = references[references["cites"]!=references["cited"]]
+        references = references.groupby("cited").agg(citations=("cites", "count")).reset_index()
+        references["cited"] = references.cited.astype(int)
+        references = references[references["cited"].isin(articles.article_id)]
+        articles = articles.merge(references, how="outer", left_on="article_id", right_on="cited")
+        articles.dropna(subset=["year"], inplace=True)
+        articles.fillna({"citations": 0}, inplace=True)
+        articles["citations_per_author"] = articles["citations"]/articles["n_authors"]
+        del references
+
+        age = age.merge(authors, how="inner", left_on="article_id", right_on="article_id")
+        age = age.groupby("bai").agg(age=("age", "max")).reset_index()
+        age.to_csv(opj(args.input, "age.csv"))
+    else:
+        age = pd.read_csv(opj(args.input, "age.csv"))
+
+    return age
+
 def institution_stability():
     if exists(opj(args.input, "institutional_stability.csv")):
         return pd.read_csv(opj(args.input, "institutional_stability.csv"), index_col="bai")
@@ -104,12 +146,6 @@ x = NR/NR.sum(axis=1)[:,np.newaxis]
 y = NC/NC.sum(axis=1)[:,np.newaxis]
 S_distrib = S/S.sum(axis=1)[:,np.newaxis]
 
-
-# R = np.array([
-#     [((expertise[:,i]>expertise[:,i].mean())&(expertise[:,j]>expertise[:,j].mean())).mean()/((expertise[:,i]>expertise[:,i].mean())|(expertise[:,j]>expertise[:,j].mean())).mean() for j in range(len(topics))]
-#     for i in range(len(topics))
-# ])
-
 R = np.array([
     [((expertise[:,i]>expertise[:,i].mean())&(expertise[:,j]>expertise[:,j].mean())).mean()/(expertise[:,i]>expertise[:,i].mean()).mean() for j in range(len(topics))]
     for i in range(len(topics))
@@ -183,9 +219,7 @@ df["social_stirling"] = 1-np.einsum("ij,kij->k", R, social_expertise_matrix)
 
 stability = institution_stability()
 df = df.merge(stability, left_on="bai", right_index=True)
-
-age = pd.read_csv(opj(args.input, "outcomes.csv"))[["bai", "age"]].drop_duplicates()
-df = df.merge(age, left_on="bai", right_on="bai")
+df = df.merge(age(), left_on="bai", right_on="bai")
 
 df["primary_research_area"] = x.argmax(axis=1)
 
@@ -228,7 +262,8 @@ fig.savefig(opj(args.input, "disruption_score_collider_physics.eps"), bbox_inche
 
 if not exists(opj(args.input, f"samples_{args.metric}_{args.diversity}_{args.power}.npz")):
     model = CmdStanModel(
-        stan_file=f"code/{args.metric}.stan" if args.model=="" else f"code/{args.metric}_{args.model}_{args.power}.stan",
+        stan_file=f"code/{args.metric}.stan" if args.model==""
+        else f"code/{args.metric}_{args.model}_{args.power}.stan",
     )
 
     fit = model.sample(
@@ -254,13 +289,14 @@ labels = [
     "Social capital (diversity)",
     "Social capital (power)",
     "Stable affiliation",
+    "Academic age",
 ]
 labels = [f"\\textbf{{{label}}}" for label in labels]
 
 labels += topics
 
 names = [
-    "beta_int_div", "beta_soc_div", "beta_soc_cap", "beta_stable"
+    "beta_int_div", "beta_soc_div", "beta_soc_cap", "beta_stable", "beta_age",
 ]
 
 if args.metric not in ["entered", "exited"]:
@@ -359,6 +395,7 @@ else:
         "Social capital (diversity)",
         "Social capital (power)",
         "Stable affiliation",
+        "Academic age",
     ]
 
     if not args.compact:

Filskillnaden har hållts tillbaka eftersom den är för stor
+ 630 - 555
output/etm_20_pretrained/change_score_effects_entropy_brokerage.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 612 - 571
output/etm_20_pretrained/change_score_effects_entropy_magnitude.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 10106 - 0
output/etm_20_pretrained/change_score_effects_stirling_magnitude.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 9720 - 0
output/etm_20_pretrained/disruption_score_effects_entropy_brokerage.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 841 - 664
output/etm_20_pretrained/disruption_score_effects_entropy_magnitude.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 10004 - 0
output/etm_20_pretrained/disruption_score_effects_stirling_magnitude.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 10525 - 0
output/etm_20_pretrained/entered_score_effects_entropy_brokerage.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 10574 - 0
output/etm_20_pretrained/entered_score_effects_entropy_magnitude.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 10832 - 0
output/etm_20_pretrained/entered_score_effects_stirling_magnitude.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 984 - 942
output/etm_20_pretrained/exited_score_effects_entropy_magnitude.eps


Filskillnaden har hållts tillbaka eftersom den är för stor
+ 10723 - 0
output/etm_20_pretrained/exited_score_effects_stirling_magnitude.eps


+ 0 - 1
output/etm_20_pretrained/samples_change_entropy_brokerage.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/KZ/99/MD5E-s12450158--3b28bdac6790b4b7f3af16b5a78f01da.npz/MD5E-s12450158--3b28bdac6790b4b7f3af16b5a78f01da.npz

+ 1 - 0
output/etm_20_pretrained/samples_change_entropy_brokerage.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s12747916--08267386a69a81f8ed3e3926048baee6.npz

+ 0 - 1
output/etm_20_pretrained/samples_change_entropy_magnitude.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/7g/4Q/MD5E-s12413344--4a677b5c2da781d8e7e4b0ca132fe4b2.npz/MD5E-s12413344--4a677b5c2da781d8e7e4b0ca132fe4b2.npz

+ 1 - 0
output/etm_20_pretrained/samples_change_entropy_magnitude.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s12727468--ff74aa11b090e3fccbd0762fcf6a7441.npz

+ 0 - 1
output/etm_20_pretrained/samples_change_stirling_magnitude.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/09/PW/MD5E-s12341791--2f7dfa366524aef2303a235053613399.npz/MD5E-s12341791--2f7dfa366524aef2303a235053613399.npz

+ 1 - 0
output/etm_20_pretrained/samples_change_stirling_magnitude.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s12662797--57f748d575c5a49e9ab64af024ed6304.npz

+ 0 - 1
output/etm_20_pretrained/samples_collaborations.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/2j/V4/MD5E-s16396554--4e6e122bb6c203fa9dc36c8b97215f2f.npz/MD5E-s16396554--4e6e122bb6c203fa9dc36c8b97215f2f.npz

+ 1 - 0
output/etm_20_pretrained/samples_collaborations.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s16396554--4e6e122bb6c203fa9dc36c8b97215f2f.npz

+ 0 - 1
output/etm_20_pretrained/samples_disruption_entropy_brokerage.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/Gw/4v/MD5E-s13662903--9113cd17b153b7a04b73a77e53d37eec.npz/MD5E-s13662903--9113cd17b153b7a04b73a77e53d37eec.npz

+ 1 - 0
output/etm_20_pretrained/samples_disruption_entropy_brokerage.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s13805141--67600a7bb7241b00401ced2390c17145.npz

+ 0 - 1
output/etm_20_pretrained/samples_disruption_entropy_magnitude.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/fX/08/MD5E-s13602664--35bb2d768cbc11559d3c6f30527c32e5.npz/MD5E-s13602664--35bb2d768cbc11559d3c6f30527c32e5.npz

+ 1 - 0
output/etm_20_pretrained/samples_disruption_entropy_magnitude.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s13896930--67584b33147009f86701c7be44feae2e.npz

+ 0 - 1
output/etm_20_pretrained/samples_disruption_stirling_magnitude.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/qw/jX/MD5E-s13414078--f7bcb8d308e6115bb1ddb3712d5e9569.npz/MD5E-s13414078--f7bcb8d308e6115bb1ddb3712d5e9569.npz

+ 1 - 0
output/etm_20_pretrained/samples_disruption_stirling_magnitude.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s13727672--bb4b2546af923c5530f56786a21ecf38.npz

+ 0 - 1
output/etm_20_pretrained/samples_entered_entropy_brokerage.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/fx/2X/MD5E-s5627555--7656986b5ee2edb4619dc4b2df69fc42.npz/MD5E-s5627555--7656986b5ee2edb4619dc4b2df69fc42.npz

+ 1 - 0
output/etm_20_pretrained/samples_entered_entropy_brokerage.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s5929527--7b4d9c205f9e97538a1fb11424315f6c.npz

+ 0 - 1
output/etm_20_pretrained/samples_entered_entropy_magnitude.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/v7/0Z/MD5E-s5593317--bdfc46b66b441ca54917b14193419b0b.npz/MD5E-s5593317--bdfc46b66b441ca54917b14193419b0b.npz

+ 1 - 0
output/etm_20_pretrained/samples_entered_entropy_magnitude.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s5887248--04c2100fc7842ffbb14907c36fd49006.npz

+ 0 - 1
output/etm_20_pretrained/samples_entered_stirling_magnitude.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/xQ/Xw/MD5E-s5583950--d11ce37d93e8608fda387df87fb6ad93.npz/MD5E-s5583950--d11ce37d93e8608fda387df87fb6ad93.npz

+ 1 - 0
output/etm_20_pretrained/samples_entered_stirling_magnitude.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s5878548--e69456a6153f452850ad6e54676aa507.npz

+ 0 - 1
output/etm_20_pretrained/samples_exited_entropy_brokerage.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/9Z/VG/MD5E-s5655016--c6d2fe311d0cd3f60f0e9581f7e16333.npz/MD5E-s5655016--c6d2fe311d0cd3f60f0e9581f7e16333.npz

+ 1 - 0
output/etm_20_pretrained/samples_exited_entropy_brokerage.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s5906265--46db3cec1e124addb544181b5a55b24a.npz

+ 0 - 1
output/etm_20_pretrained/samples_exited_entropy_magnitude.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/Fz/63/MD5E-s5593878--b9e500830dc6c978a3a6b99c5c664930.npz/MD5E-s5593878--b9e500830dc6c978a3a6b99c5c664930.npz

+ 1 - 0
output/etm_20_pretrained/samples_exited_entropy_magnitude.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s5834164--d1cf670767431a0f52194f2243e7daad.npz

+ 0 - 1
output/etm_20_pretrained/samples_exited_stirling_magnitude.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/mW/4G/MD5E-s5588897--3a5ea6225e6dd29238df2dcc3685a9d8.npz/MD5E-s5588897--3a5ea6225e6dd29238df2dcc3685a9d8.npz

+ 1 - 0
output/etm_20_pretrained/samples_exited_stirling_magnitude.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s5845361--0dfe6b82056eb2dc3aacde20d3aeb1c5.npz

+ 0 - 1
output/etm_20_pretrained/samples_intellectual_capital_effect.npz

@@ -1 +0,0 @@
-../../.git/annex/objects/QJ/v2/MD5E-s119633490--71b86781abc7307345181585ae461d20.npz/MD5E-s119633490--71b86781abc7307345181585ae461d20.npz

+ 1 - 0
output/etm_20_pretrained/samples_intellectual_capital_effect.npz

@@ -0,0 +1 @@
+/annex/objects/MD5E-s119633490--71b86781abc7307345181585ae461d20.npz