|
@@ -17,9 +17,20 @@ am = AnnotationManager(project)
|
|
|
classifications = pd.read_csv(args.classifications)
|
|
|
|
|
|
# recover the majority choice
|
|
|
+def majority_choice(values):
|
|
|
+ counts = values.value_counts(sort = True)
|
|
|
+ counts = counts[counts == counts[0]]
|
|
|
+
|
|
|
+ # return the majority choice if it exists
|
|
|
+ # otherwise, do a random pick
|
|
|
+ if len(counts) <= 1:
|
|
|
+ return counts.index[0]
|
|
|
+ else:
|
|
|
+ return counts.sample(1).index[0]
|
|
|
+
|
|
|
classifications = (
|
|
|
classifications.groupby(['recording_filename', 'onset', 'offset', 'task_id'])
|
|
|
- .agg(answer = ('answer', lambda x:x.value_counts().index[0]))
|
|
|
+ .agg(answer = ('answer', majority_choice))
|
|
|
).reset_index()
|
|
|
|
|
|
# combine all tasks into one row per chunk
|