Procházet zdrojové kódy

Handle telephone number change

William N. Havard před 1 rokem
rodič
revize
ae4bfa4b43
1 změnil soubory, kde provedl 14 přidání a 10 odebrání
  1. 14 10
      import_data/import_recordings.py

+ 14 - 10
import_data/import_recordings.py

@@ -81,14 +81,17 @@ def _get_correspondance(correspondance_path):
     """
     try:
         data = pd.read_csv(correspondance_path)
-        #TODO, check that data has wanted columns?
     except:
-        columns = ['number', 'original']
+        columns = ['new_number', 'original_number']
         data = pd.DataFrame(columns=columns)
-    return data
 
+    # Change to string
+    data = data.astype({cname:'string' for cname in data.columns})
+
+    return dict(data.values.tolist())
 
-def _build_recording_metadata(recordings_path, recording, experiment, recording_device_type):
+
+def _build_recording_metadata(recordings_path, recording, experiment, recording_device_type, correspondance):
     """
     Return the metadata corresponding to a given file (date, time, duration, etc.)
     :param recordings_path: path to the directory storing the WAV files
@@ -105,7 +108,7 @@ def _build_recording_metadata(recordings_path, recording, experiment, recording_
     raw_filename, _ = os.path.splitext(os.path.basename(recording))
     try:
         child_id_, *experiment_stage, date_iso_, start_time_ = raw_filename.split('_')
-        child_id = 'chi_{}'.format(child_id_) # coerce ID to be a string (prevent later mistakes)
+        child_id = 'chi_{}'.format(correspondance.get(child_id_, child_id_)) # coerce ID to be a string (prevents later mistakes)
         date_iso = datetime.strptime(date_iso_, '%Y%m%d').strftime('%Y-%m-%d')
         start_time = datetime.strptime(start_time_, '%H%M%S').strftime('%H:%M:%S')
         session_id = '{}_{}'.format(child_id, date_iso_)
@@ -140,9 +143,13 @@ def import_recordings(project_path, experiment, recording_device_type):
     :rtype: None
     """
     recordings_metadata_path = os.path.join(project_path, 'metadata', 'recordings.csv')
+    correspondance_metadata_path = os.path.join(project_path, 'metadata', 'correspondance.csv')
+
     recordings = _get_recordings(recordings_metadata_path)
     recordings_count = len(recordings)
 
+    correspondance = _get_correspondance(correspondance_metadata_path)
+
     recordings_path = os.path.join(project_path, 'recordings', 'raw')
     recording_file_list = walk_dir(recordings_path, ext='wav', return_full_path=False)
 
@@ -150,7 +157,8 @@ def import_recordings(project_path, experiment, recording_device_type):
         if recording_file in recordings['recording_filename'].values: continue
 
         recording_metadata = _build_recording_metadata(recordings_path, recording_file,
-                                                       experiment, recording_device_type)
+                                                       experiment, recording_device_type,
+                                                       correspondance)
         # Add new recordings only
         if not recording_metadata:
             continue
@@ -175,20 +183,16 @@ def import_children(project_path, experiment):
     """
     recordings_metadata_path = os.path.join(project_path, 'metadata', 'recordings.csv')
     children_metadata_path = os.path.join(project_path, 'metadata', 'children.csv')
-    correspondance_metadata_path = os.path.join(project_path, 'metadata', 'correspondance.csv') #ac2lp
 
     recordings = _get_recordings(recordings_metadata_path)
     children = _get_children(children_metadata_path)
     children_count = len(children)
-    children = _get_correspondance(correspondance_metadata_path) #ac2pl
 
     child_id_recordings = set(recordings['child_id'])
     missing_children = child_id_recordings - set(children['child_id'])
 
 
     for child_id in missing_children:
-        # ac2lp check if children may not correspond to another telephone number
-	# add: if child_id in correspondance['original'] then redefine child_id as the right cell in correspondance['original'] else what follows
         child_metadata = {
             'experiment': experiment,
             'child_id': child_id,