2 年前 · 5aef0cfde2
--- a/import_data/custom_converters.py
+++ b/import_data/custom_converters.py
@@ -23,46 +23,13 @@ from ChildProject.annotations import AnnotationConverter
 
				 
			
 
				 from consts import ANNOTATION_TYPES
			
 
				 
			
 
				-class AcousticConverter(AnnotationConverter):
			
 
				-    FORMAT = ANNOTATION_TYPES.ACOUSTIC.value[0]
			
 
				 
			
 
				-    @staticmethod
			
 
				-    def convert(filename: str, source_file: str = "", **kwargs) -> pd.DataFrame:
			
 
				-        df = pd.read_csv(
			
 
				-            filename,
			
 
				-            sep=r",",
			
 
				-            header=0,
			
 
				-            engine="python",
			
 
				-        )
			
 
				-
			
 
				-        n_recordings = len(df["file"].unique())
			
 
				-        if  n_recordings > 1 and not source_file:
			
 
				-            print(
			
 
				-                f"""WARNING: {filename} contains annotations from {n_recordings} different audio files, """
			
 
				-                """but no filter was specified which means all of these annotations will be imported.\n"""
			
 
				-                """as if they belonged to the same recording. Please make sure this is the intended behavior """
			
 
				-                """(it probably isn't)."""
			
 
				-            )
			
 
				-
			
 
				-        if source_file:
			
 
				-            df = df[df["file"].str.contains(source_file)]
			
 
				-
			
 
				-        df.drop(columns=["file"], inplace=True)
			
 
				-
			
 
				-        return df
			
 
				-
			
 
				-
			
 
				-class ConversationsConverter(AnnotationConverter):
			
 
				-    FORMAT = ANNOTATION_TYPES.CONVERSATIONS.value[0]
			
 
				+class FilteredCsvConverter(AnnotationConverter):
			
 
				+    FORMAT = 'filtered_csv'
			
 
				 
			
 
				     @staticmethod
			
 
				     def convert(filename: str, source_file: str = "", **kwargs) -> pd.DataFrame:
			
 
				-        df = pd.read_csv(
			
 
				-            filename,
			
 
				-            sep=r",",
			
 
				-            header=0,
			
 
				-            engine="python",
			
 
				-        )
			
 
				+        df = pd.read_csv(filename)
			
 
				 
			
 
				         n_recordings = len(df["recording_filename"].unique())
			
 
				         if  n_recordings > 1 and not source_file:
			
@@ -76,4 +43,4 @@ class ConversationsConverter(AnnotationConverter):
 
				         if source_file:
			
 
				             df = df[df["recording_filename"].str.contains(source_file)]
			
 
				 
			
 
				-        return df
			
 
				+        return df
			
--- a/import_data/import_annotations.py
+++ b/import_data/import_annotations.py
@@ -35,7 +35,7 @@ from ChildProject.projects import ChildProject
 
				 
			
 
				 from consts import ANNOTATION_TYPES
			
 
				 from utils import get_raw_filename
			
 
				-from custom_converters import AcousticConverter, ConversationsConverter # /!\ Do not remove this line
			
 
				+from custom_converters import FilteredCsvConverter # /!\ Do not remove this line
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				 pd.set_option('mode.chained_assignment', None)  # Silences pandas' complaints
			
@@ -157,7 +157,8 @@ def _build_raw_filename(input, annotation_format, filename='', extension=''):
 
				 
			
 
				 def _import_annotation(project, am, annotation_type, annotation_file, recordings_from_annotation_file, recording):
			
 
				     annotation_set = annotation_type.lower()
			
 
				-    annotation_format = ANNOTATION_TYPES.asdict()[annotation_set.upper()][0]
			
 
				+    annotation_format = ANNOTATION_TYPES.asdict()[annotation_set.upper()][0] \
			
 
				+                        if annotation_set not in ['conversations', 'acoustic'] else 'filtered_csv'
			
 
				 
			
 
				     # Get recordings and set up df
			
 
				     input = _get_recordings(project, annotation_set, annotation_format, recording)