|
@@ -81,14 +81,17 @@ def _get_correspondance(correspondance_path):
|
|
|
"""
|
|
|
try:
|
|
|
data = pd.read_csv(correspondance_path)
|
|
|
- #TODO, check that data has wanted columns?
|
|
|
except:
|
|
|
- columns = ['number', 'original']
|
|
|
+ columns = ['new_number', 'original_number']
|
|
|
data = pd.DataFrame(columns=columns)
|
|
|
- return data
|
|
|
|
|
|
+ # Change to string
|
|
|
+ data = data.astype({cname:'string' for cname in data.columns})
|
|
|
+
|
|
|
+ return dict(data.values.tolist())
|
|
|
|
|
|
-def _build_recording_metadata(recordings_path, recording, experiment, recording_device_type):
|
|
|
+
|
|
|
+def _build_recording_metadata(recordings_path, recording, experiment, recording_device_type, correspondance):
|
|
|
"""
|
|
|
Return the metadata corresponding to a given file (date, time, duration, etc.)
|
|
|
:param recordings_path: path to the directory storing the WAV files
|
|
@@ -105,7 +108,7 @@ def _build_recording_metadata(recordings_path, recording, experiment, recording_
|
|
|
raw_filename, _ = os.path.splitext(os.path.basename(recording))
|
|
|
try:
|
|
|
child_id_, *experiment_stage, date_iso_, start_time_ = raw_filename.split('_')
|
|
|
- child_id = 'chi_{}'.format(child_id_) # coerce ID to be a string (prevent later mistakes)
|
|
|
+ child_id = 'chi_{}'.format(correspondance.get(child_id_, child_id_)) # coerce ID to be a string (prevents later mistakes)
|
|
|
date_iso = datetime.strptime(date_iso_, '%Y%m%d').strftime('%Y-%m-%d')
|
|
|
start_time = datetime.strptime(start_time_, '%H%M%S').strftime('%H:%M:%S')
|
|
|
session_id = '{}_{}'.format(child_id, date_iso_)
|
|
@@ -140,9 +143,13 @@ def import_recordings(project_path, experiment, recording_device_type):
|
|
|
:rtype: None
|
|
|
"""
|
|
|
recordings_metadata_path = os.path.join(project_path, 'metadata', 'recordings.csv')
|
|
|
+ correspondance_metadata_path = os.path.join(project_path, 'metadata', 'correspondance.csv')
|
|
|
+
|
|
|
recordings = _get_recordings(recordings_metadata_path)
|
|
|
recordings_count = len(recordings)
|
|
|
|
|
|
+ correspondance = _get_correspondance(correspondance_metadata_path)
|
|
|
+
|
|
|
recordings_path = os.path.join(project_path, 'recordings', 'raw')
|
|
|
recording_file_list = walk_dir(recordings_path, ext='wav', return_full_path=False)
|
|
|
|
|
@@ -150,7 +157,8 @@ def import_recordings(project_path, experiment, recording_device_type):
|
|
|
if recording_file in recordings['recording_filename'].values: continue
|
|
|
|
|
|
recording_metadata = _build_recording_metadata(recordings_path, recording_file,
|
|
|
- experiment, recording_device_type)
|
|
|
+ experiment, recording_device_type,
|
|
|
+ correspondance)
|
|
|
# Add new recordings only
|
|
|
if not recording_metadata:
|
|
|
continue
|
|
@@ -175,20 +183,16 @@ def import_children(project_path, experiment):
|
|
|
"""
|
|
|
recordings_metadata_path = os.path.join(project_path, 'metadata', 'recordings.csv')
|
|
|
children_metadata_path = os.path.join(project_path, 'metadata', 'children.csv')
|
|
|
- correspondance_metadata_path = os.path.join(project_path, 'metadata', 'correspondance.csv') #ac2lp
|
|
|
|
|
|
recordings = _get_recordings(recordings_metadata_path)
|
|
|
children = _get_children(children_metadata_path)
|
|
|
children_count = len(children)
|
|
|
- children = _get_correspondance(correspondance_metadata_path) #ac2pl
|
|
|
|
|
|
child_id_recordings = set(recordings['child_id'])
|
|
|
missing_children = child_id_recordings - set(children['child_id'])
|
|
|
|
|
|
|
|
|
for child_id in missing_children:
|
|
|
- # ac2lp check if children may not correspond to another telephone number
|
|
|
- # add: if child_id in correspondance['original'] then redefine child_id as the right cell in correspondance['original'] else what follows
|
|
|
child_metadata = {
|
|
|
'experiment': experiment,
|
|
|
'child_id': child_id,
|