Browse Source

include vandam (cougar) into the fit

Lucas Gautheron 1 year ago
parent
commit
643a5b29ae
5 changed files with 29 additions and 4 deletions
  1. 9 2
      code/models/corpus_bias.py
  2. 0 1
      input/annotators.csv
  3. 18 0
      input/annotators.csv
  4. 1 1
      output/fit_vanuatu_bias.png
  5. 1 0
      output/training_set.csv

+ 9 - 2
code/models/corpus_bias.py

@@ -27,6 +27,7 @@ parser.add_argument("--apply-bias-from", type=str, default="")
 parser.add_argument("--chains", default=4, type=int)
 parser.add_argument("--samples", default=2000, type=int)
 parser.add_argument("--validation", default=0, type=float)
+parser.add_argument("--simulated-children", default=40, type=int)
 parser.add_argument("--output", default="corpus_bias")
 args = parser.parse_args()
 
@@ -130,6 +131,7 @@ def compute_counts(parameters):
                         )
 
         d = {}
+        keep_child = True
         for i, speaker_A in enumerate(speakers):
             for j, speaker_B in enumerate(speakers):
                 if i != j:
@@ -141,11 +143,16 @@ def compute_counts(parameters):
 
                 d[f"vtc_{i}_{j}"] = z
 
+                if z > len(truth[speaker_B]):
+                    keep_child = False
+
             d[f"truth_{i}"] = len(truth[speaker_A])
             d["child"] = child
 
         d["duration"] = ann["duration"].sum() / 2 / 1000
-        data.append(d)
+
+        if keep_child:
+            data.append(d)
 
     return pd.DataFrame(data).assign(
         corpus=corpus,
@@ -345,7 +352,7 @@ if __name__ == "__main__":
         "n_groups": data["child"].nunique(),
         "n_corpora": data["corpus"].nunique(),
         "n_validation": max(1, int(truth.shape[0] * args.validation)),
-        "n_sim": 40,
+        "n_sim": args.simulated_children,
         "group": 1 + data["child"].astype("category").cat.codes.values,
         "corpus": 1 + corpora,
         "selected_corpus": (

+ 0 - 1
input/annotators.csv

@@ -1 +0,0 @@
-../.git/annex/objects/xM/43/MD5E-s346--fd415a66dfdc73b5be5bb646b5427464.csv/MD5E-s346--fd415a66dfdc73b5be5bb646b5427464.csv

+ 18 - 0
input/annotators.csv

@@ -0,0 +1,18 @@
+corpus,annotator
+png2019,eaf/mc
+vanuatu,textgrid/m1
+solomon,solis/NK
+solomon,solis/BC
+solomon,solis/LD
+solomon,solis/NM
+tsimane2017,eaf_2021/CD
+tsimane2017,eaf_2021/CM
+tsimane2017,textgrid/mm
+tsimane2017,eaf/nk
+namibia,textgrid/m1
+namibia,textgrid/mm
+namibia,textgrid/ak
+EL1000/bergelson,eaf/an1
+EL1000/warlaumont,eaf/an1
+EL1000/winnipeg,eaf/an1
+vandam,cha/an1

+ 1 - 1
output/fit_vanuatu_bias.png

@@ -1 +1 @@
-../.git/annex/objects/ZX/m0/MD5E-s34351--239adc31b7420d8a030d36b85b5dcdd2.png/MD5E-s34351--239adc31b7420d8a030d36b85b5dcdd2.png
+../.git/annex/objects/pv/Mf/MD5E-s35465--39bcc8033edfbc9f5c3565c8bc822944.png/MD5E-s35465--39bcc8033edfbc9f5c3565c8bc822944.png

+ 1 - 0
output/training_set.csv

@@ -6,4 +6,5 @@ namibia,26.716666666666665,12
 png2019,0.4,4
 solomon,5.516666666666667,15
 tsimane2017,13.466666666666667,27
+vandam,12.75,51
 vanuatu,2.8833333333333333,12