1 jaar geleden · b0ca60170f
--- a/import_data/import_recordings.py
+++ b/import_data/import_recordings.py
@@ -69,7 +69,29 @@ def _get_children(children_path):
 
				     return data
			
 
				 
			
 
				 
			
 
				-def _build_recording_metadata(recordings_path, recording, experiment, recording_device_type):
			
 
				+#ac2pl
			
 
				+def _get_correspondance(correspondance_path):
			
 
				+    """
			
 
				+    Returns a DataFrame of correspondances across child ID (phone numbers) or an empty DataFrame if `correspondance.csv` does not
			
 
				+    exist
			
 
				+    :param recordings_path: Path to the `correspondance.csv` metadata file
			
 
				+    :type correspondance_path: str
			
 
				+    :return: dataframe of correspondances across child ID (phone numbers) or empty dataframe
			
 
				+    :rtype: pandas.DataFrame
			
 
				+    """
			
 
				+    try:
			
 
				+        data = pd.read_csv(correspondance_path)
			
 
				+    except:
			
 
				+        columns = ['new_number', 'original_number']
			
 
				+        data = pd.DataFrame(columns=columns)
			
 
				+
			
 
				+    # Change to string
			
 
				+    data = data.astype({cname:'string' for cname in data.columns})
			
 
				+
			
 
				+    return dict(data.values.tolist())
			
 
				+
			
 
				+
			
 
				+def _build_recording_metadata(recordings_path, recording, experiment, recording_device_type, correspondance):
			
 
				     """
			
 
				     Return the metadata corresponding to a given file (date, time, duration, etc.)
			
 
				     :param recordings_path: path to the directory storing the WAV files
			
@@ -86,7 +108,7 @@ def _build_recording_metadata(recordings_path, recording, experiment, recording_
 
				     raw_filename, _ = os.path.splitext(os.path.basename(recording))
			
 
				     try:
			
 
				         child_id_, *experiment_stage, date_iso_, start_time_ = raw_filename.split('_')
			
 
				-        child_id = 'chi_{}'.format(child_id_) # coerce ID to be a string (prevent later mistakes)
			
 
				+        child_id = 'chi_{}'.format(correspondance.get(child_id_, child_id_)) # coerce ID to be a string (prevents later mistakes)
			
 
				         date_iso = datetime.strptime(date_iso_, '%Y%m%d').strftime('%Y-%m-%d')
			
 
				         start_time = datetime.strptime(start_time_, '%H%M%S').strftime('%H:%M:%S')
			
 
				         session_id = '{}_{}'.format(child_id, date_iso_)
			
@@ -121,9 +143,13 @@ def import_recordings(project_path, experiment, recording_device_type):
 
				     :rtype: None
			
 
				     """
			
 
				     recordings_metadata_path = os.path.join(project_path, 'metadata', 'recordings.csv')
			
 
				+    correspondance_metadata_path = os.path.join(project_path, 'metadata', 'correspondance.csv')
			
 
				+
			
 
				     recordings = _get_recordings(recordings_metadata_path)
			
 
				     recordings_count = len(recordings)
			
 
				 
			
 
				+    correspondance = _get_correspondance(correspondance_metadata_path)
			
 
				+
			
 
				     recordings_path = os.path.join(project_path, 'recordings', 'raw')
			
 
				     recording_file_list = walk_dir(recordings_path, ext='wav', return_full_path=False)
			
 
				 
			
@@ -131,7 +157,8 @@ def import_recordings(project_path, experiment, recording_device_type):
 
				         if recording_file in recordings['recording_filename'].values: continue
			
 
				 
			
 
				         recording_metadata = _build_recording_metadata(recordings_path, recording_file,
			
 
				-                                                       experiment, recording_device_type)
			
 
				+                                                       experiment, recording_device_type,
			
 
				+                                                       correspondance)
			
 
				         # Add new recordings only
			
 
				         if not recording_metadata:
			
 
				             continue
			
@@ -164,6 +191,7 @@ def import_children(project_path, experiment):
 
				     child_id_recordings = set(recordings['child_id'])
			
 
				     missing_children = child_id_recordings - set(children['child_id'])
			
 
				 
			
 
				+
			
 
				     for child_id in missing_children:
			
 
				         child_metadata = {
			
 
				             'experiment': experiment,