diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py
index 594965c09311..1adf5ec5da12 100644
--- a/cvat/apps/dataset_manager/task.py
+++ b/cvat/apps/dataset_manager/task.py
@@ -1155,6 +1155,69 @@ def convert_annotation_data_format(data, format_name):
                 formatted_entry[key] = entry[key]
             formatted_data.append(formatted_entry)
         return formatted_data
+    elif format_name == "VoxCeleb":
+        formatted_data = []
+        for entry in data:
+            formatted_entry = {
+                "file": entry["path"],
+                "text": entry["sentence"],
+                "gender": entry["gender"],
+                "nationality" : "",
+                "age": entry["age"],
+                "id": str(uuid.uuid4()),
+                "speaker_id": "",
+                "label": entry["label"],
+                "start": entry["start"],
+                "end": entry["end"]
+            }
+            attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")]
+            for key in attribute_keys:
+                formatted_entry[key] = entry[key]
+            formatted_data.append(formatted_entry)
+        return formatted_data
+    elif format_name == "VCTK_Corpus":
+        formatted_data = []
+        for entry in data:
+            formatted_entry = {
+                "file": entry["path"],
+                "text": entry["sentence"],
+                "gender": entry["gender"],
+                "accent": entry["accents"],
+                "age": entry["age"],
+                "id": str(uuid.uuid4()),
+                "speaker_id": "",
+                "label": entry["label"],
+                "start": entry["start"],
+                "end": entry["end"]
+            }
+            attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")]
+            for key in attribute_keys:
+                formatted_entry[key] = entry[key]
+            formatted_data.append(formatted_entry)
+        return formatted_data
+    elif format_name == "LibriVox":
+        formatted_data = []
+        for entry in data:
+            formatted_entry = {
+                "file": entry["path"],
+                "text": entry["sentence"],
+                "gender": entry["gender"],
+                "language": language_id_mapping.get(entry["locale"], None),
+                "duration": str(abs(float(entry["end"]) - float(entry["start"]))),
+                "accent": entry["accents"],
+                "age": entry["age"],
+                "id": str(uuid.uuid4()),
+                "speaker_id": "",
+                "label": entry["label"],
+                "start": entry["start"],
+                "end": entry["end"]
+            }
+            attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")]
+            for key in attribute_keys:
+                formatted_entry[key] = entry[key]
+            formatted_data.append(formatted_entry)
+        return formatted_data
+
     return data
 
 def export_audino_job(job_id, dst_file, format_name, server_url=None, save_images=False):
diff --git a/cvat/apps/engine/background_operations.py b/cvat/apps/engine/background_operations.py
index 4add5b4d347e..64dacf2e1ac2 100644
--- a/cvat/apps/engine/background_operations.py
+++ b/cvat/apps/engine/background_operations.py
@@ -195,7 +195,7 @@ def __init__(
             location_config=location_config,
         )
         self.EXPORT_FOR = ""
-        AUDIO_FORMATS = ["Common Voice", "Librispeech", "VoxPopuli", "Ted-Lium"]
+        AUDIO_FORMATS = ["Common Voice", "Librispeech", "VoxPopuli", "Ted-Lium", "VoxCeleb", "VCTK_Corpus", "LibriVox"]
         if format_name in AUDIO_FORMATS:
             self.EXPORT_FOR = "audio"