Browse Source

unbiased majority choice

Lucas Gautheron 2 years ago
parent
commit
bab365937c
1 changed files with 12 additions and 1 deletions
  1. 12 1
      annotations/feed-annotations.py

+ 12 - 1
annotations/feed-annotations.py

@@ -17,9 +17,20 @@ am = AnnotationManager(project)
 classifications = pd.read_csv(args.classifications)
 
 # recover the majority choice
+def majority_choice(values):
+    counts = values.value_counts(sort = True)
+    counts = counts[counts == counts[0]]
+
+    # return the majority choice if it exists
+    # otherwise, do a random pick
+    if len(counts) <= 1:
+        return counts.index[0]
+    else:
+        return counts.sample(1).index[0]
+
 classifications = (
     classifications.groupby(['recording_filename', 'onset', 'offset', 'task_id'])
-    .agg(answer = ('answer', lambda x:x.value_counts().index[0]))
+    .agg(answer = ('answer', majority_choice))
 ).reset_index()
 
 # combine all tasks into one row per chunk