example_pipeline.sh 4.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. example_dir=/scratch2/whavard/CODE/LAAC/URUMETRICS/dat/data_set/recordings/original/renamed;
  2. dataset="URUMETRICS-TEST-WILLIAM"
  3. # For each example directory (i.e. name of the directory is a date, 20221001, 20221002, etc.)
  4. for dir_name in $example_dir/*; do
  5. today=$( basename $dir_name );
  6. echo -e "\t${today}"
  7. # Clone data set
  8. echo -e "\tCloning..."
  9. datalad install -r git@gin.g-node.org:/LAAC-LSCP/${dataset}.git
  10. cd ${dataset}
  11. # Unlock necessary file (metadata, messages, and metrics)
  12. echo -e "\tUnlocking..."
  13. datalad get extra/messages/definition
  14. datalad get extra/metrics && datalad unlock extra/metrics
  15. datalad get metadata && datalad unlock metadata
  16. # Prepare repository (create necessary directories)
  17. echo -e "\tPreparing..."
  18. python -u scripts/URUMETRICS-CODE/import_data/prepare_data_set.py
  19. # Copy data
  20. echo -e "\tCopying data"
  21. mkdir -p recordings/raw/${today}
  22. cp ${example_dir}/${today}/*.wav recordings/raw/${today}
  23. cp ${example_dir}/${today}/VTC_${today}.rttm annotations/vtc/raw
  24. cp ${example_dir}/${today}/VTC_${today}.vcm annotations/vcm/raw
  25. cp ${example_dir}/${today}/ALICE_${today}.txt annotations/alice/raw
  26. # Save raw data
  27. echo -e "\tSaving and pushing recordings and annotations"
  28. datalad save recordings -m "Added new recordings for date ${today}"
  29. datalad save annotations/*/raw -m "Added raw annotations for date ${today}"
  30. datalad push --to origin
  31. # Import recordings
  32. echo -e "\tImporting new recordings"
  33. python -u scripts/URUMETRICS-CODE/import_data/import_recordings.py --experiment Uruguayan_Chatbot_2022
  34. # Import annotations
  35. echo -e "\tImporting VTC/VCM/ALICE annotations"
  36. python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type VTC --annotation-file VTC_${today}.rttm
  37. python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type VCM --annotation-file VTC_${today}.vcm
  38. python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type ALICE --annotation-file ALICE_${today}.txt
  39. echo -e "\tSaving and pushing"
  40. datalad save annotations/*/converted -m "Converted RAW annotations for date ${today}"
  41. datalad save metadata -m "Imported RAW annotations for date ${today}"
  42. datalad push --to origin
  43. datalad unlock metadata
  44. # Compute turn annotations
  45. echo -e "\tComputing CONVERSATIONS annotations"
  46. python scripts/URUMETRICS-CODE/compute_annotations/compute_derived_annotations.py --annotation-type CONVERSATIONS --save-path annotations/conversations/raw/
  47. # Import turn annotations
  48. echo -e "\tImporting CONVERSATIONS annotations"
  49. python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type CONVERSATIONS --annotation-file CONVERSATIONS_VTC_${today}.csv --recordings-from-annotation-file VTC_${today}.rttm
  50. echo -e "\tComputing ACOUSTIC annotations"
  51. python scripts/URUMETRICS-CODE/compute_annotations/compute_derived_annotations.py --annotation-type ACOUSTIC --save-path annotations/acoustic/raw/ --target-sr 16000
  52. # Import turn annotations
  53. echo -e "\tImporting ACOUSTIC annotations"
  54. python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type ACOUSTIC --annotation-file ACOUSTIC_VTC_${today}.csv --recordings-from-annotation-file VTC_${today}.rttm
  55. echo -e "\tSaving and pushing"
  56. datalad save annotations/*/raw -m "Imported DERIVED RAW annotations for date ${today}"
  57. datalad save annotations/*/converted -m "Convected DERIVED annotations for date ${today}"
  58. datalad save metadata -m "Imported DERIVED CONVERTED annotations for date ${today}"
  59. datalad unlock metadata
  60. # Compute metrics
  61. echo -e "\tComputing metrics"
  62. python -u scripts/URUMETRICS-CODE/compute_metrics/metrics.py
  63. # Generating messages
  64. echo -e "\tGenerating messages"
  65. python -u scripts/URUMETRICS-CODE/generate_messages/messages.py --date ${today}
  66. # Save data
  67. echo -e "\tSaving and pushing"
  68. datalad save extra/metrics -m "Computed new metrics for date ${today}"
  69. datalad save extra/messages/generated -m "Message generated for date ${today}"
  70. datalad save .
  71. datalad push --to origin
  72. # Uninstalling
  73. echo -e "\tUninstalling"
  74. git annex dead here
  75. datalad push --to origin
  76. cd ..
  77. datalad remove -d ${dataset}
  78. done;