1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586 |
- set -e
- example_dir=/scratch2/whavard/CODE/LAAC/URUMETRICS/dat/data_set/recordings/original/renamed;
- dataset="URUMETRICS-TEST"
- # For each example directory (i.e. name of the directory is a date, 20221001, 20221002, etc.)
- for dir_name in $example_dir/*; do
- today=$( basename $dir_name );
- # Clone data set
- echo -e "\tCloning..."
- datalad install -r git@gin.g-node.org:/LAAC-LSCP/${dataset}.git
- cd ${dataset}
- # Unlock necessary file (metadata, messages, and metrics)
- echo -e "\tUnlocking..."
- datalad get extra/messages/definition
- datalad get extra/metrics && datalad unlock extra/metrics
- datalad get metadata && datalad unlock metadata
- # Prepare repository (create necessary directories)
- echo -e "\tPreparing..."
- python -u scripts/URUMETRICS-CODE/import_data/prepare_data_set.py
- # Copy data
- echo -e "\tCopying data"
- mkdir -p recordings/raw/${today}
- cp ${example_dir}/${today}/*.wav recordings/raw/${today}
- cp ${example_dir}/${today}/VTC_${today}.rttm annotations/vtc/raw
- cp ${example_dir}/${today}/VTC_${today}.vcm annotations/vcm/raw
- cp ${example_dir}/${today}/ALICE_${today}.txt annotations/alice/raw
- # Save raw data
- echo -e "\tSaving and pushing recordings and annotations"
- datalad save recordings -m "Added new recordings for date ${today}"
- datalad save annotations/*/raw -m "Added raw annotations for date ${today}"
- datalad push --to origin
- # Import recordings
- echo -e "\tImporting new recordings"
- python -u scripts/URUMETRICS-CODE/import_data/import_recordings.py --experiment Uruguayan_Chatbot_2022
- # Compute acoustic annotations
- echo -e "\tComputing acoustic annotations"
- python -u scripts/URUMETRICS-CODE/acoustic_annotations/compute_acoustic_annotations.py --path-vtc ./annotations/vtc/raw/VTC_${today}.rttm --path-recordings ./recordings/raw/ --save-path ./annotations/acoustic/raw
- # Import annotations
- echo -e "\tImporting VTC/VCM/ALICE/ACOUSTIC annotations"
- python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type VTC --annotation-file VTC_${today}.rttm
- python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type VCM --annotation-file VTC_${today}.vcm
- python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type ALICE --annotation-file ALICE_${today}.txt
- python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type ACOUSTIC --annotation-file VTC_${today}.csv
- # Compute turn annotations
- echo -e "\tComputing CONVERSATIONS annotations"
- python scripts/URUMETRICS-CODE/turn_annotations/compute_turn_annotations.py --save-path ./annotations/conversations/raw --save-name CONV_${today}
- # Import turn annotations
- echo -e "\tImporting CONVERSATIONS annotations"
- python -u scripts/URUMETRICS-CODE/import_data/import_annotations.py --annotation-type CONVERSATIONS --annotation-file CONV_${today}.csv
- # Compute metrics
- echo -e "\tComputing metrics"
- python -u scripts/URUMETRICS-CODE/compute_metrics/metrics.py
- # Generating messages
- echo -e "\tGenerating messages"
- python -u scripts/URUMETRICS-CODE/generate_messages/messages.py --date ${today}
- # Save data
- echo -e "\tSaving and pushing"
- datalad save annotations/*/raw -m "Imported derived raw annotations for date ${today}"
- datalad save annotations/*/converted -m "Converted annotations for date ${today}"
- datalad save metadata -m "Updated metadata for date ${today}"
- datalad save extra/metrics -m "Computed new metrics for date ${today}"
- datalad save extra/messages/generated -m "Message generated for date ${today}"
- datalad save .
- datalad push --to origin
- # Uninstalling
- echo -e "\tUninstalling"
- git annex dead here
- datalad push --to origin
- cd ..
- datalad remove -d ${dataset}
- done;
|