match.py 646 B

12345678910111213141516171819202122
  1. import argparse
  2. import pandas as pd
  3. parser = argparse.ArgumentParser(description = 'match classifications with extracted chunks')
  4. parser.add_argument('classifications', help = 'classifications file')
  5. parser.add_argument('output', help = 'output')
  6. parser.add_argument('--chunks', help = 'list of chunks', nargs = '+', required = True)
  7. args = parser.parse_args()
  8. classifications = pd.read_csv(args.classifications)
  9. chunks = pd.concat([
  10. pd.read_csv(f) for f in args.chunks
  11. ])
  12. classifications = classifications.merge(
  13. chunks,
  14. left_on = 'subject_id',
  15. right_on = 'zooniverse_id'
  16. )
  17. classifications.to_csv(args.output, index = False)