Added_voxceleb, VCTK_corpus and Librivox formats for annotation exports

midas-research · Aug 20, 2024 · ac403c3 · ac403c3
1 parent 391c3dc
commit ac403c3
Show file tree

Hide file tree

Showing 2 changed files with 64 additions and 1 deletion.
diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
@@ -1155,6 +1155,69 @@ def convert_annotation_data_format(data, format_name):
                 formatted_entry[key] = entry[key]
             formatted_data.append(formatted_entry)
         return formatted_data
+    elif format_name == "VoxCeleb":
+        formatted_data = []
+        for entry in data:
+            formatted_entry = {
+                "file": entry["path"],
+                "text": entry["sentence"],
+                "gender": entry["gender"],
+                "nationality" : "",
+                "age": entry["age"],
+                "id": str(uuid.uuid4()),
+                "speaker_id": "",
+                "label": entry["label"],
+                "start": entry["start"],
+                "end": entry["end"]
+            }
+            attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")]
+            for key in attribute_keys:
+                formatted_entry[key] = entry[key]
+            formatted_data.append(formatted_entry)
+        return formatted_data
+    elif format_name == "VCTK_Corpus":
+        formatted_data = []
+        for entry in data:
+            formatted_entry = {
+                "file": entry["path"],
+                "text": entry["sentence"],
+                "gender": entry["gender"],
+                "accent": entry["accents"],
+                "age": entry["age"],
+                "id": str(uuid.uuid4()),
+                "speaker_id": "",
+                "label": entry["label"],
+                "start": entry["start"],
+                "end": entry["end"]
+            }
+            attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")]
+            for key in attribute_keys:
+                formatted_entry[key] = entry[key]
+            formatted_data.append(formatted_entry)
+        return formatted_data
+    elif format_name == "LibriVox":
+        formatted_data = []
+        for entry in data:
+            formatted_entry = {
+                "file": entry["path"],
+                "text": entry["sentence"],
+                "gender": entry["gender"],
+                "language": language_id_mapping.get(entry["locale"], None),
+                "duration": str(abs(float(entry["end"]) - float(entry["start"]))),
+                "accent": entry["accents"],
+                "age": entry["age"],
+                "id": str(uuid.uuid4()),
+                "speaker_id": "",
+                "label": entry["label"],
+                "start": entry["start"],
+                "end": entry["end"]
+            }
+            attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")]
+            for key in attribute_keys:
+                formatted_entry[key] = entry[key]
+            formatted_data.append(formatted_entry)
+        return formatted_data
+
     return data
 
 def export_audino_job(job_id, dst_file, format_name, server_url=None, save_images=False):

diff --git a/cvat/apps/engine/background_operations.py b/cvat/apps/engine/background_operations.py
@@ -195,7 +195,7 @@ def __init__(
             location_config=location_config,
         )
         self.EXPORT_FOR = ""
-        AUDIO_FORMATS = ["Common Voice", "Librispeech", "VoxPopuli", "Ted-Lium"]
+        AUDIO_FORMATS = ["Common Voice", "Librispeech", "VoxPopuli", "Ted-Lium", "VoxCeleb", "VCTK_Corpus", "LibriVox"]
         if format_name in AUDIO_FORMATS:
             self.EXPORT_FOR = "audio"