compile_validation_surveys.py 939 B

12345678910111213141516171819202122
  1. import pandas as pd
  2. import numpy as np
  3. ans = []
  4. def compile_survey(annotator, truth_file, answers_file):
  5. answers = pd.read_excel(answers_file)
  6. answers['question'] = answers['question'].fillna(method='ffill').astype(int)
  7. answers["answer"] = answers["1 topic or 2 topics ?"].astype(int)
  8. truth = pd.read_csv(truth_file)
  9. answers = answers.merge(truth, how="left", left_on="question", right_on="question")
  10. answers["correct"] = ((answers["answer"]==1)&answers["topic2"].isnull()) | ((answers["answer"]==2)&~answers["topic2"].isnull())
  11. return answers
  12. # ans.append(compile_survey("lucas", "analyses/truth_lucas2.csv", "surveys/questions_lucas2_answered.xlsx"))
  13. ans.append(compile_survey("acordeir", "analyses/truth_acordeir_weighted.csv", "surveys/acordeir.xlsx"))
  14. ans.append(compile_survey("hessel", "analyses/truth_hessel_weighted.csv", "surveys/hessel.xlsx"))
  15. ans = pd.concat(ans)
  16. print(ans["correct"].mean())