Browse Source

Use generic converter instead of two separate converters

William N. Havard 1 year ago
parent
commit
5aef0cfde2
2 changed files with 7 additions and 39 deletions
  1. 4 37
      import_data/custom_converters.py
  2. 3 2
      import_data/import_annotations.py

+ 4 - 37
import_data/custom_converters.py

@@ -23,46 +23,13 @@ from ChildProject.annotations import AnnotationConverter
 
 from consts import ANNOTATION_TYPES
 
-class AcousticConverter(AnnotationConverter):
-    FORMAT = ANNOTATION_TYPES.ACOUSTIC.value[0]
 
-    @staticmethod
-    def convert(filename: str, source_file: str = "", **kwargs) -> pd.DataFrame:
-        df = pd.read_csv(
-            filename,
-            sep=r",",
-            header=0,
-            engine="python",
-        )
-
-        n_recordings = len(df["file"].unique())
-        if  n_recordings > 1 and not source_file:
-            print(
-                f"""WARNING: {filename} contains annotations from {n_recordings} different audio files, """
-                """but no filter was specified which means all of these annotations will be imported.\n"""
-                """as if they belonged to the same recording. Please make sure this is the intended behavior """
-                """(it probably isn't)."""
-            )
-
-        if source_file:
-            df = df[df["file"].str.contains(source_file)]
-
-        df.drop(columns=["file"], inplace=True)
-
-        return df
-
-
-class ConversationsConverter(AnnotationConverter):
-    FORMAT = ANNOTATION_TYPES.CONVERSATIONS.value[0]
+class FilteredCsvConverter(AnnotationConverter):
+    FORMAT = 'filtered_csv'
 
     @staticmethod
     def convert(filename: str, source_file: str = "", **kwargs) -> pd.DataFrame:
-        df = pd.read_csv(
-            filename,
-            sep=r",",
-            header=0,
-            engine="python",
-        )
+        df = pd.read_csv(filename)
 
         n_recordings = len(df["recording_filename"].unique())
         if  n_recordings > 1 and not source_file:
@@ -76,4 +43,4 @@ class ConversationsConverter(AnnotationConverter):
         if source_file:
             df = df[df["recording_filename"].str.contains(source_file)]
 
-        return df
+        return df

+ 3 - 2
import_data/import_annotations.py

@@ -35,7 +35,7 @@ from ChildProject.projects import ChildProject
 
 from consts import ANNOTATION_TYPES
 from utils import get_raw_filename
-from custom_converters import AcousticConverter, ConversationsConverter # /!\ Do not remove this line
+from custom_converters import FilteredCsvConverter # /!\ Do not remove this line
 
 logger = logging.getLogger(__name__)
 pd.set_option('mode.chained_assignment', None)  # Silences pandas' complaints
@@ -157,7 +157,8 @@ def _build_raw_filename(input, annotation_format, filename='', extension=''):
 
 def _import_annotation(project, am, annotation_type, annotation_file, recordings_from_annotation_file, recording):
     annotation_set = annotation_type.lower()
-    annotation_format = ANNOTATION_TYPES.asdict()[annotation_set.upper()][0]
+    annotation_format = ANNOTATION_TYPES.asdict()[annotation_set.upper()][0] \
+                        if annotation_set not in ['conversations', 'acoustic'] else 'filtered_csv'
 
     # Get recordings and set up df
     input = _get_recordings(project, annotation_set, annotation_format, recording)