|
@@ -69,7 +69,29 @@ def _get_children(children_path):
|
|
|
return data
|
|
|
|
|
|
|
|
|
-def _build_recording_metadata(recordings_path, recording, experiment, recording_device_type):
|
|
|
+#ac2pl
|
|
|
+def _get_correspondance(correspondance_path):
|
|
|
+ """
|
|
|
+ Returns a DataFrame of correspondances across child ID (phone numbers) or an empty DataFrame if `correspondance.csv` does not
|
|
|
+ exist
|
|
|
+ :param recordings_path: Path to the `correspondance.csv` metadata file
|
|
|
+ :type correspondance_path: str
|
|
|
+ :return: dataframe of correspondances across child ID (phone numbers) or empty dataframe
|
|
|
+ :rtype: pandas.DataFrame
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ data = pd.read_csv(correspondance_path)
|
|
|
+ except:
|
|
|
+ columns = ['new_number', 'original_number']
|
|
|
+ data = pd.DataFrame(columns=columns)
|
|
|
+
|
|
|
+ # Change to string
|
|
|
+ data = data.astype({cname:'string' for cname in data.columns})
|
|
|
+
|
|
|
+ return dict(data.values.tolist())
|
|
|
+
|
|
|
+
|
|
|
+def _build_recording_metadata(recordings_path, recording, experiment, recording_device_type, correspondance):
|
|
|
"""
|
|
|
Return the metadata corresponding to a given file (date, time, duration, etc.)
|
|
|
:param recordings_path: path to the directory storing the WAV files
|
|
@@ -86,7 +108,7 @@ def _build_recording_metadata(recordings_path, recording, experiment, recording_
|
|
|
raw_filename, _ = os.path.splitext(os.path.basename(recording))
|
|
|
try:
|
|
|
child_id_, *experiment_stage, date_iso_, start_time_ = raw_filename.split('_')
|
|
|
- child_id = 'chi_{}'.format(child_id_) # coerce ID to be a string (prevent later mistakes)
|
|
|
+ child_id = 'chi_{}'.format(correspondance.get(child_id_, child_id_)) # coerce ID to be a string (prevents later mistakes)
|
|
|
date_iso = datetime.strptime(date_iso_, '%Y%m%d').strftime('%Y-%m-%d')
|
|
|
start_time = datetime.strptime(start_time_, '%H%M%S').strftime('%H:%M:%S')
|
|
|
session_id = '{}_{}'.format(child_id, date_iso_)
|
|
@@ -121,9 +143,13 @@ def import_recordings(project_path, experiment, recording_device_type):
|
|
|
:rtype: None
|
|
|
"""
|
|
|
recordings_metadata_path = os.path.join(project_path, 'metadata', 'recordings.csv')
|
|
|
+ correspondance_metadata_path = os.path.join(project_path, 'metadata', 'correspondance.csv')
|
|
|
+
|
|
|
recordings = _get_recordings(recordings_metadata_path)
|
|
|
recordings_count = len(recordings)
|
|
|
|
|
|
+ correspondance = _get_correspondance(correspondance_metadata_path)
|
|
|
+
|
|
|
recordings_path = os.path.join(project_path, 'recordings', 'raw')
|
|
|
recording_file_list = walk_dir(recordings_path, ext='wav', return_full_path=False)
|
|
|
|
|
@@ -131,7 +157,8 @@ def import_recordings(project_path, experiment, recording_device_type):
|
|
|
if recording_file in recordings['recording_filename'].values: continue
|
|
|
|
|
|
recording_metadata = _build_recording_metadata(recordings_path, recording_file,
|
|
|
- experiment, recording_device_type)
|
|
|
+ experiment, recording_device_type,
|
|
|
+ correspondance)
|
|
|
# Add new recordings only
|
|
|
if not recording_metadata:
|
|
|
continue
|
|
@@ -164,6 +191,7 @@ def import_children(project_path, experiment):
|
|
|
child_id_recordings = set(recordings['child_id'])
|
|
|
missing_children = child_id_recordings - set(children['child_id'])
|
|
|
|
|
|
+
|
|
|
for child_id in missing_children:
|
|
|
child_metadata = {
|
|
|
'experiment': experiment,
|