Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

dataframe_for_ann_importation.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import pandas as pd
  4. from os import listdir
  5. from os.path import join, isfile
  6. import argparse
  7. if __name__ == "__main__":
  8. parser = argparse.ArgumentParser()
  9. parser.add_argument('--corpus',
  10. required=True,
  11. help='the whole path to the folder with your corpus: /...'
  12. )
  13. args = parser.parse_args()
  14. path_corpus = args.corpus
  15. def change_directory(path):
  16. try:
  17. os.chdir(path)
  18. print("Current working directory: {0}".format(os.getcwd()))
  19. except FileNotFoundError:
  20. print("Directory: {0} does not exist".format(path))
  21. except NotADirectoryError:
  22. print("{0} is not a directory".format(path))
  23. except PermissionError:
  24. print("You do not have permissions to change to {0}".format(path))
  25. change_directory(f'{path_corpus}/metadata')
  26. mypath = f'{path_corpus}/recordings/raw'
  27. dirs = sorted([f for f in listdir(mypath) if isfile(join(mypath, f))])
  28. recording_filename = []
  29. range_offset = []
  30. raw_filename = []
  31. col_list = ['experiment', 'child_id', 'date_iso', 'start_time', 'recording_device_type', 'recording_filename', 'duration']
  32. df = pd.read_csv("recordings.csv", usecols=col_list)
  33. recording_filename = df["recording_filename"]
  34. range_offset = df["duration"]
  35. set_cha = ['cha']*len(recording_filename)
  36. time_seek = [0]*len(recording_filename)
  37. range_onset = [0]*len(recording_filename)
  38. format_cha = ['cha']*len(recording_filename)
  39. for rec in recording_filename:
  40. if rec.endswith('.wav'):
  41. rec1 = rec.replace('.wav', '.cha')
  42. raw_filename.append(rec1)
  43. else:
  44. rec1 = rec.replace('.mp3', '.cha')
  45. raw_filename.append(rec1)
  46. dataframe = {'set': set_cha, 'recording_filename': recording_filename, 'time_seek': time_seek, 'range_onset': range_onset, 'range_offset': range_offset, 'raw_filename': raw_filename, 'format': format_cha}
  47. df = pd.DataFrame(data=dataframe)
  48. df.to_csv(f'/{path_corpus}/metadata/dataframe.csv', index=False)
  49. print('done')