From 392792318e96e9197f464f91fb4bf9dcaab06767 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Mon, 9 Sep 2024 21:15:52 +0530 Subject: [PATCH 01/14] added task import feature for audino --- cvat/apps/dataset_manager/formats/LibriVox.py | 104 +++++++++++++++++ .../dataset_manager/formats/VCTK_Corpus.py | 105 ++++++++++++++++++ cvat/apps/dataset_manager/formats/VoxCeleb.py | 105 ++++++++++++++++++ .../apps/dataset_manager/formats/Voxpopuli.py | 103 +++++++++++++++++ .../dataset_manager/formats/common_voice.py | 105 ++++++++++++++++++ .../dataset_manager/formats/librispeech.py | 105 ++++++++++++++++++ cvat/apps/dataset_manager/formats/registry.py | 8 ++ cvat/apps/dataset_manager/formats/tedlium.py | 104 +++++++++++++++++ cvat/apps/dataset_manager/task.py | 7 ++ 9 files changed, 746 insertions(+) create mode 100644 cvat/apps/dataset_manager/formats/LibriVox.py create mode 100644 cvat/apps/dataset_manager/formats/VCTK_Corpus.py create mode 100644 cvat/apps/dataset_manager/formats/VoxCeleb.py create mode 100644 cvat/apps/dataset_manager/formats/Voxpopuli.py create mode 100644 cvat/apps/dataset_manager/formats/common_voice.py create mode 100644 cvat/apps/dataset_manager/formats/librispeech.py create mode 100644 cvat/apps/dataset_manager/formats/tedlium.py diff --git a/cvat/apps/dataset_manager/formats/LibriVox.py b/cvat/apps/dataset_manager/formats/LibriVox.py new file mode 100644 index 000000000000..c51e1c57e676 --- /dev/null +++ b/cvat/apps/dataset_manager/formats/LibriVox.py @@ -0,0 +1,104 @@ +import os.path as osp +import zipfile +from glob import glob +from cvat.apps.dataset_manager.bindings import InstanceLabelData +from cvat.apps.engine.serializers import LabeledDataSerializer +import cvat.apps.dataset_manager as dm +from cvat.apps.dataset_manager.task import PatchAction +from .registry import importer + + + + + + + + +def load_anno(file_object, annotations): + if isinstance(file_object, str): + with open(file_object, 'r', encoding='utf-8') as f: + content = f.read() + + lines = content.splitlines() + headers = lines[0].split('\t') + + label_data = InstanceLabelData(annotations.db_instance) + + + + + for line in lines[1:]: + fields = line.split('\t') + record = dict(zip(headers, fields)) + + job_id = record.get('job_id') + + start = float(record.get('start', 0)) + end = float(record.get('end', 0)) + + label_name = record.get('label') + label_id = label_data._get_label_id(label_name) + + language_id_to_locale_mapping = {0: "en"} + language_id = int(record.get('language',0)) + + spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + + + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start, start, end, end], + "frame":0, + "occluded" : False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age",""), + "locale":language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent",""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": [ + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } ] + } + ] + + + data = { + 'shapes': shapes_data + } + + serializer = LabeledDataSerializer(data=data) + pk = int(job_id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + + + + + + + + +@importer(name='LibriVox', ext='TSV, ZIP', version=" ") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + is_zip = zipfile.is_zipfile(src_file) + src_file.seek(0) + if is_zip: + zipfile.ZipFile(src_file).extractall(temp_dir) + + anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py new file mode 100644 index 000000000000..b1a202cc151a --- /dev/null +++ b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py @@ -0,0 +1,105 @@ +import os.path as osp +import zipfile +from glob import glob +from cvat.apps.dataset_manager.bindings import InstanceLabelData +from cvat.apps.engine.serializers import LabeledDataSerializer +import cvat.apps.dataset_manager as dm +from cvat.apps.dataset_manager.task import PatchAction +from .registry import importer + + + + + + + + +def load_anno(file_object, annotations): + if isinstance(file_object, str): + with open(file_object, 'r', encoding='utf-8') as f: + content = f.read() + + lines = content.splitlines() + headers = lines[0].split('\t') + + label_data = InstanceLabelData(annotations.db_instance) + + + + + for line in lines[1:]: + fields = line.split('\t') + record = dict(zip(headers, fields)) + + job_id = record.get('job_id') + + start = float(record.get('start', 0)) + end = float(record.get('end', 0)) + + label_name = record.get('label') + label_id = label_data._get_label_id(label_name) + + language_id_to_locale_mapping = {0: "en"} + language_id = int(record.get('language',0)) + + + spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + + + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start, start, end, end], + "frame":0, + "occluded" : False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age",""), + "locale":language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent",""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": [ + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } ] + } + ] + + + data = { + 'shapes': shapes_data + } + + serializer = LabeledDataSerializer(data=data) + pk = int(job_id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + + + + + + + + +@importer(name='VCTK Corpus', ext='TSV, ZIP', version=" ") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + is_zip = zipfile.is_zipfile(src_file) + src_file.seek(0) + if is_zip: + zipfile.ZipFile(src_file).extractall(temp_dir) + + anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/VoxCeleb.py b/cvat/apps/dataset_manager/formats/VoxCeleb.py new file mode 100644 index 000000000000..3e4ef99be05e --- /dev/null +++ b/cvat/apps/dataset_manager/formats/VoxCeleb.py @@ -0,0 +1,105 @@ +import os.path as osp +import zipfile +from glob import glob +from cvat.apps.dataset_manager.bindings import InstanceLabelData +from cvat.apps.engine.serializers import LabeledDataSerializer +import cvat.apps.dataset_manager as dm +from cvat.apps.dataset_manager.task import PatchAction +from .registry import importer + + + + + + + + +def load_anno(file_object, annotations): + if isinstance(file_object, str): + with open(file_object, 'r', encoding='utf-8') as f: + content = f.read() + + lines = content.splitlines() + headers = lines[0].split('\t') + + label_data = InstanceLabelData(annotations.db_instance) + + + + + for line in lines[1:]: + fields = line.split('\t') + record = dict(zip(headers, fields)) + + job_id = record.get('job_id') + + start = float(record.get('start', 0)) + end = float(record.get('end', 0)) + + label_name = record.get('label') + label_id = label_data._get_label_id(label_name) + + language_id_to_locale_mapping = {0: "en"} + language_id = int(record.get('language',0)) + + + spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + + + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start, start, end, end], + "frame":0, + "occluded" : False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age",""), + "locale":language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent",""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": [ + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } ] + } + ] + + + data = { + 'shapes': shapes_data + } + + serializer = LabeledDataSerializer(data=data) + pk = int(job_id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + + + + + + + + +@importer(name='VoxCeleb', ext='TSV, ZIP', version=" ") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + is_zip = zipfile.is_zipfile(src_file) + src_file.seek(0) + if is_zip: + zipfile.ZipFile(src_file).extractall(temp_dir) + + anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/Voxpopuli.py b/cvat/apps/dataset_manager/formats/Voxpopuli.py new file mode 100644 index 000000000000..5d0928e98d61 --- /dev/null +++ b/cvat/apps/dataset_manager/formats/Voxpopuli.py @@ -0,0 +1,103 @@ +import os.path as osp +import zipfile +from glob import glob +from cvat.apps.dataset_manager.bindings import InstanceLabelData +from cvat.apps.engine.serializers import LabeledDataSerializer +import cvat.apps.dataset_manager as dm +from cvat.apps.dataset_manager.task import PatchAction +from .registry import importer + + + + + + + + +def load_anno(file_object, annotations): + if isinstance(file_object, str): + with open(file_object, 'r', encoding='utf-8') as f: + content = f.read() + + lines = content.splitlines() + headers = lines[0].split('\t') + + label_data = InstanceLabelData(annotations.db_instance) + + + + + for line in lines[1:]: + fields = line.split('\t') + record = dict(zip(headers, fields)) + + job_id = record.get('job_id') + + start = float(record.get('start', 0)) + end = float(record.get('end', 0)) + + label_name = record.get('label') + label_id = label_data._get_label_id(label_name) + + language_id_to_locale_mapping = {0: "en"} + language_id = int(record.get('language',0)) + + spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start, start, end, end], + "frame":0, + "occluded" : False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("normalized_text", ""), + "gender": record.get("gender", ""), + "age": record.get("age",""), + "locale":language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent",""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": [ + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } ] + } + ] + + + data = { + 'shapes': shapes_data + } + + serializer = LabeledDataSerializer(data=data) + pk = int(job_id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + + + + + + + + +@importer(name='Voxpopuli', ext='TSV, ZIP', version=" ") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + is_zip = zipfile.is_zipfile(src_file) + src_file.seek(0) + if is_zip: + zipfile.ZipFile(src_file).extractall(temp_dir) + + anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py new file mode 100644 index 000000000000..5e6b7981c8fc --- /dev/null +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -0,0 +1,105 @@ +import os.path as osp +import zipfile +from glob import glob +from cvat.apps.dataset_manager.bindings import InstanceLabelData +from cvat.apps.engine.serializers import LabeledDataSerializer +import cvat.apps.dataset_manager as dm +from cvat.apps.dataset_manager.task import PatchAction +from .registry import importer + + + + + + + + +def load_anno(file_object, annotations): + if isinstance(file_object, str): + with open(file_object, 'r', encoding='utf-8') as f: + content = f.read() + + lines = content.splitlines() + headers = lines[0].split('\t') + + label_data = InstanceLabelData(annotations.db_instance) + + + + + for line in lines[1:]: + fields = line.split('\t') + record = dict(zip(headers, fields)) + + job_id = record.get('job_id') + + start = float(record.get('start', 0)) + end = float(record.get('end', 0)) + + label_name = record.get('label') + label_id = label_data._get_label_id(label_name) + + language_id_to_locale_mapping = {0: "en"} + language_id = int(record.get('language',0)) + + + spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start, start, end, end], + "frame":0, + "occluded" : False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("sentence", ""), + "gender": record.get("gender", ""), + "age": record.get("age",""), + "locale":language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accents",""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": [ + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } ] + } + ] + + + + data = { + 'shapes': shapes_data + } + + serializer = LabeledDataSerializer(data=data) + pk = int(job_id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + + + + + + + + +@importer(name='Common Voice', ext='TSV, ZIP', version=" ") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + is_zip = zipfile.is_zipfile(src_file) + src_file.seek(0) + if is_zip: + zipfile.ZipFile(src_file).extractall(temp_dir) + + anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/librispeech.py b/cvat/apps/dataset_manager/formats/librispeech.py new file mode 100644 index 000000000000..73c46cdcb49d --- /dev/null +++ b/cvat/apps/dataset_manager/formats/librispeech.py @@ -0,0 +1,105 @@ +import os.path as osp +import zipfile +from glob import glob +from cvat.apps.dataset_manager.bindings import InstanceLabelData +from cvat.apps.engine.serializers import LabeledDataSerializer +import cvat.apps.dataset_manager as dm +from cvat.apps.dataset_manager.task import PatchAction +from .registry import importer + + + + + + + + +def load_anno(file_object, annotations): + if isinstance(file_object, str): + with open(file_object, 'r', encoding='utf-8') as f: + content = f.read() + + lines = content.splitlines() + headers = lines[0].split('\t') + + label_data = InstanceLabelData(annotations.db_instance) + + + + + for line in lines[1:]: + fields = line.split('\t') + record = dict(zip(headers, fields)) + + job_id = record.get('job_id') + + start = float(record.get('start', 0)) + end = float(record.get('end', 0)) + + label_name = record.get('label') + label_id = label_data._get_label_id(label_name) + + spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + + + + language_id_to_locale_mapping = {0: "en"} + language_id = int(record.get('language',0)) + + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start, start, end, end], + "frame":0, + "occluded" : False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age",""), + "locale":language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent",""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": [ + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } ] + } + ] + + + data = { + 'shapes': shapes_data + } + + serializer = LabeledDataSerializer(data=data) + pk = int(job_id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + + + + + + + + +@importer(name='Librispeech', ext='TSV, ZIP', version=" ") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + is_zip = zipfile.is_zipfile(src_file) + src_file.seek(0) + if is_zip: + zipfile.ZipFile(src_file).extractall(temp_dir) + + anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/formats/registry.py b/cvat/apps/dataset_manager/formats/registry.py index 435f7fc28425..c8fec4159ef5 100644 --- a/cvat/apps/dataset_manager/formats/registry.py +++ b/cvat/apps/dataset_manager/formats/registry.py @@ -119,3 +119,11 @@ def make_exporter(name): import cvat.apps.dataset_manager.formats.openimages #Audino Export Formats import cvat.apps.dataset_manager.formats.aud_common_voice +#Audino Import Formats +import cvat.apps.dataset_manager.formats.librispeech +import cvat.apps.dataset_manager.formats.common_voice +import cvat.apps.dataset_manager.formats.tedlium +import cvat.apps.dataset_manager.formats.Voxpopuli +import cvat.apps.dataset_manager.formats.VoxCeleb +import cvat.apps.dataset_manager.formats.VCTK_Corpus +import cvat.apps.dataset_manager.formats.LibriVox diff --git a/cvat/apps/dataset_manager/formats/tedlium.py b/cvat/apps/dataset_manager/formats/tedlium.py new file mode 100644 index 000000000000..26e05dc73aea --- /dev/null +++ b/cvat/apps/dataset_manager/formats/tedlium.py @@ -0,0 +1,104 @@ +import os.path as osp +import zipfile +from glob import glob +from cvat.apps.dataset_manager.bindings import InstanceLabelData +from cvat.apps.engine.serializers import LabeledDataSerializer +import cvat.apps.dataset_manager as dm +from cvat.apps.dataset_manager.task import PatchAction +from .registry import importer + + + + + + + + +def load_anno(file_object, annotations): + if isinstance(file_object, str): + with open(file_object, 'r', encoding='utf-8') as f: + content = f.read() + + lines = content.splitlines() + headers = lines[0].split('\t') + + label_data = InstanceLabelData(annotations.db_instance) + + + + + for line in lines[1:]: + fields = line.split('\t') + record = dict(zip(headers, fields)) + + job_id = record.get('job_id') + + start = float(record.get('start', 0)) + end = float(record.get('end', 0)) + + label_name = record.get('label') + label_id = label_data._get_label_id(label_name) + + language_id_to_locale_mapping = {0: "en"} + language_id = int(record.get('language',0)) + + + spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start, start, end, end], + "frame":0, + "occluded" : False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age",""), + "locale":language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent",""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": [ + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } ] + } + ] + + + data = { + 'shapes': shapes_data + } + + serializer = LabeledDataSerializer(data=data) + pk = int(job_id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + + + + + + + + +@importer(name='Ted-Lium', ext='TSV, ZIP', version=" ") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + is_zip = zipfile.is_zipfile(src_file) + src_file.seek(0) + if is_zip: + zipfile.ZipFile(src_file).extractall(temp_dir) + + anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) \ No newline at end of file diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 72aaccc50695..383724a16cc9 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -1062,6 +1062,7 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): for i, annotation in enumerate(annotations): entry = { + "job_id": job_id, "path": os.path.basename(annotation_audio_chunk_file_paths[i]), "sentence": annotation.get("transcript", ""), "age": annotation.get("age", ""), @@ -1100,6 +1101,7 @@ def convert_annotation_data_format(data, format_name): formatted_data = [] for entry in data: formatted_entry = { + "job_id": entry["job_id"], "chapter_id": "", "file": entry["path"], "id": str(uuid.uuid4()), @@ -1119,6 +1121,7 @@ def convert_annotation_data_format(data, format_name): formatted_data = [] for entry in data: formatted_entry = { + "job_id": entry["job_id"], "audio_id": str(uuid.uuid4()), "language": language_id_mapping.get(entry["locale"], None), "audio_path": entry["path"], @@ -1141,6 +1144,7 @@ def convert_annotation_data_format(data, format_name): formatted_data = [] for entry in data: formatted_entry = { + "job_id": entry["job_id"], "file": entry["path"], "text": entry["sentence"], "gender": entry["gender"], @@ -1159,6 +1163,7 @@ def convert_annotation_data_format(data, format_name): formatted_data = [] for entry in data: formatted_entry = { + "job_id": entry["job_id"], "file": entry["path"], "text": entry["sentence"], "gender": entry["gender"], @@ -1179,6 +1184,7 @@ def convert_annotation_data_format(data, format_name): formatted_data = [] for entry in data: formatted_entry = { + "job_id": entry["job_id"], "file": entry["path"], "text": entry["sentence"], "gender": entry["gender"], @@ -1200,6 +1206,7 @@ def convert_annotation_data_format(data, format_name): formatted_data = [] for entry in data: formatted_entry = { + "job_id": entry["job_id"], "file": entry["path"], "text": entry["sentence"], "gender": entry["gender"], From b487935956d7146372761bc0ae3934fe148c372b Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Tue, 10 Sep 2024 18:37:13 +0530 Subject: [PATCH 02/14] minor fixes and formatting changes --- cvat/apps/dataset_manager/formats/LibriVox.py | 75 +- .../dataset_manager/formats/VCTK_Corpus.py | 78 +- cvat/apps/dataset_manager/formats/VoxCeleb.py | 76 +- .../apps/dataset_manager/formats/Voxpopuli.py | 74 +- .../formats/aud_common_voice.py | 45 +- cvat/apps/dataset_manager/formats/camvid.py | 29 +- .../dataset_manager/formats/cityscapes.py | 43 +- cvat/apps/dataset_manager/formats/coco.py | 53 +- .../dataset_manager/formats/common_voice.py | 76 +- cvat/apps/dataset_manager/formats/cvat.py | 1743 ++++++++++------- cvat/apps/dataset_manager/formats/datumaro.py | 36 +- cvat/apps/dataset_manager/formats/icdar.py | 74 +- cvat/apps/dataset_manager/formats/imagenet.py | 21 +- cvat/apps/dataset_manager/formats/kitti.py | 37 +- cvat/apps/dataset_manager/formats/labelme.py | 24 +- cvat/apps/dataset_manager/formats/lfw.py | 18 +- .../dataset_manager/formats/librispeech.py | 76 +- .../dataset_manager/formats/market1501.py | 37 +- cvat/apps/dataset_manager/formats/mask.py | 34 +- cvat/apps/dataset_manager/formats/mot.py | 66 +- cvat/apps/dataset_manager/formats/mots.py | 69 +- .../dataset_manager/formats/openimages.py | 57 +- .../dataset_manager/formats/pascal_voc.py | 42 +- .../dataset_manager/formats/pointcloud.py | 42 +- cvat/apps/dataset_manager/formats/registry.py | 83 +- cvat/apps/dataset_manager/formats/tedlium.py | 77 +- .../formats/transformations.py | 58 +- cvat/apps/dataset_manager/formats/utils.py | 64 +- .../dataset_manager/formats/velodynepoint.py | 37 +- cvat/apps/dataset_manager/formats/vggface2.py | 23 +- .../apps/dataset_manager/formats/widerface.py | 20 +- cvat/apps/dataset_manager/formats/yolo.py | 39 +- 32 files changed, 1961 insertions(+), 1365 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/LibriVox.py b/cvat/apps/dataset_manager/formats/LibriVox.py index c51e1c57e676..8dce61a43284 100644 --- a/cvat/apps/dataset_manager/formats/LibriVox.py +++ b/cvat/apps/dataset_manager/formats/LibriVox.py @@ -6,99 +6,88 @@ import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer - - - - - - +from cvat.apps.engine.models import Task, Job def load_anno(file_object, annotations): if isinstance(file_object, str): - with open(file_object, 'r', encoding='utf-8') as f: + with open(file_object, "r", encoding="utf-8") as f: content = f.read() lines = content.splitlines() - headers = lines[0].split('\t') + headers = lines[0].split("\t") label_data = InstanceLabelData(annotations.db_instance) - - + task_id = annotations.db_instance.id + task = Task.objects.get(id=task_id) + jobs = Job.objects.filter(segment__task=task) for line in lines[1:]: - fields = line.split('\t') + fields = line.split("\t") record = dict(zip(headers, fields)) - job_id = record.get('job_id') + if "job_id" in record: + job_id = record.get("job_id") + else: + job_index_id = int(record.get("job index")) + job_id = jobs[job_index_id].id - start = float(record.get('start', 0)) - end = float(record.get('end', 0)) + start = float(record.get("start", 0)) + end = float(record.get("end", 0)) - label_name = record.get('label') + label_name = record.get("label") label_id = label_data._get_label_id(label_name) language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get('language',0)) - - spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) - + language_id = int(record.get("language", 0)) + spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) shapes_data = [ { "type": "rectangle", "label": record.get("label", ""), "points": [start, start, end, end], - "frame":0, - "occluded" : False, + "frame": 0, + "occluded": False, "z_order": 0, "group": None, "source": "manual", "transcript": record.get("text", ""), "gender": record.get("gender", ""), - "age": record.get("age",""), - "locale":language_id_to_locale_mapping.get(language_id, ""), - "accent": record.get("accent",""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent", ""), "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } ] + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } + ], } ] - - data = { - 'shapes': shapes_data - } + data = {"shapes": shapes_data} serializer = LabeledDataSerializer(data=data) pk = int(job_id) action = PatchAction.CREATE if serializer.is_valid(raise_exception=True): - data = dm.task.patch_job_data(pk, serializer.data, action) - - - - - - - + data = dm.task.patch_job_data(pk, serializer.data, action) -@importer(name='LibriVox', ext='TSV, ZIP', version=" ") +@importer(name="LibriVox", ext="TSV, ZIP", version=" ") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) for p in anno_paths: - load_anno(p, instance_data) \ No newline at end of file + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py index b1a202cc151a..af938773ebbb 100644 --- a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py +++ b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py @@ -5,101 +5,89 @@ from cvat.apps.engine.serializers import LabeledDataSerializer import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction -from .registry import importer - - - - - - +from .registry import importer +from cvat.apps.engine.models import Task, Job def load_anno(file_object, annotations): if isinstance(file_object, str): - with open(file_object, 'r', encoding='utf-8') as f: + with open(file_object, "r", encoding="utf-8") as f: content = f.read() lines = content.splitlines() - headers = lines[0].split('\t') + headers = lines[0].split("\t") label_data = InstanceLabelData(annotations.db_instance) - - + task_id = annotations.db_instance.id + task = Task.objects.get(id=task_id) + jobs = Job.objects.filter(segment__task=task) for line in lines[1:]: - fields = line.split('\t') + fields = line.split("\t") record = dict(zip(headers, fields)) - job_id = record.get('job_id') + if "job_id" in record: + job_id = record.get("job_id") + else: + job_index_id = int(record.get("job index")) + job_id = jobs[job_index_id].id - start = float(record.get('start', 0)) - end = float(record.get('end', 0)) + start = float(record.get("start", 0)) + end = float(record.get("end", 0)) - label_name = record.get('label') + label_name = record.get("label") label_id = label_data._get_label_id(label_name) language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get('language',0)) - - - spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) - + language_id = int(record.get("language", 0)) + spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) shapes_data = [ { "type": "rectangle", "label": record.get("label", ""), "points": [start, start, end, end], - "frame":0, - "occluded" : False, + "frame": 0, + "occluded": False, "z_order": 0, "group": None, "source": "manual", "transcript": record.get("text", ""), "gender": record.get("gender", ""), - "age": record.get("age",""), - "locale":language_id_to_locale_mapping.get(language_id, ""), - "accent": record.get("accent",""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent", ""), "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } ] + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } + ], } ] - - data = { - 'shapes': shapes_data - } + data = {"shapes": shapes_data} serializer = LabeledDataSerializer(data=data) pk = int(job_id) action = PatchAction.CREATE if serializer.is_valid(raise_exception=True): - data = dm.task.patch_job_data(pk, serializer.data, action) - - - - - - - + data = dm.task.patch_job_data(pk, serializer.data, action) -@importer(name='VCTK Corpus', ext='TSV, ZIP', version=" ") +@importer(name="VCTK Corpus", ext="TSV, ZIP", version=" ") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) for p in anno_paths: - load_anno(p, instance_data) \ No newline at end of file + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/VoxCeleb.py b/cvat/apps/dataset_manager/formats/VoxCeleb.py index 3e4ef99be05e..3a0228b59814 100644 --- a/cvat/apps/dataset_manager/formats/VoxCeleb.py +++ b/cvat/apps/dataset_manager/formats/VoxCeleb.py @@ -6,100 +6,88 @@ import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer - - - - - - +from cvat.apps.engine.models import Task, Job def load_anno(file_object, annotations): if isinstance(file_object, str): - with open(file_object, 'r', encoding='utf-8') as f: + with open(file_object, "r", encoding="utf-8") as f: content = f.read() lines = content.splitlines() - headers = lines[0].split('\t') + headers = lines[0].split("\t") label_data = InstanceLabelData(annotations.db_instance) - - + task_id = annotations.db_instance.id + task = Task.objects.get(id=task_id) + jobs = Job.objects.filter(segment__task=task) for line in lines[1:]: - fields = line.split('\t') + fields = line.split("\t") record = dict(zip(headers, fields)) - job_id = record.get('job_id') + if "job_id" in record: + job_id = record.get("job_id") + else: + job_index_id = int(record.get("job index")) + job_id = jobs[job_index_id].id - start = float(record.get('start', 0)) - end = float(record.get('end', 0)) + start = float(record.get("start", 0)) + end = float(record.get("end", 0)) - label_name = record.get('label') + label_name = record.get("label") label_id = label_data._get_label_id(label_name) language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get('language',0)) - - - spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) - + language_id = int(record.get("language", 0)) + spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) shapes_data = [ { "type": "rectangle", "label": record.get("label", ""), "points": [start, start, end, end], - "frame":0, - "occluded" : False, + "frame": 0, + "occluded": False, "z_order": 0, "group": None, "source": "manual", "transcript": record.get("text", ""), "gender": record.get("gender", ""), - "age": record.get("age",""), - "locale":language_id_to_locale_mapping.get(language_id, ""), - "accent": record.get("accent",""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent", ""), "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } ] + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } + ], } ] - - data = { - 'shapes': shapes_data - } + data = {"shapes": shapes_data} serializer = LabeledDataSerializer(data=data) pk = int(job_id) action = PatchAction.CREATE if serializer.is_valid(raise_exception=True): - data = dm.task.patch_job_data(pk, serializer.data, action) - - - - - - - + data = dm.task.patch_job_data(pk, serializer.data, action) -@importer(name='VoxCeleb', ext='TSV, ZIP', version=" ") +@importer(name="VoxCeleb", ext="TSV, ZIP", version=" ") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) for p in anno_paths: - load_anno(p, instance_data) \ No newline at end of file + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/Voxpopuli.py b/cvat/apps/dataset_manager/formats/Voxpopuli.py index 5d0928e98d61..e31c549177a2 100644 --- a/cvat/apps/dataset_manager/formats/Voxpopuli.py +++ b/cvat/apps/dataset_manager/formats/Voxpopuli.py @@ -6,98 +6,88 @@ import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer - - - - - - +from cvat.apps.engine.models import Task, Job def load_anno(file_object, annotations): if isinstance(file_object, str): - with open(file_object, 'r', encoding='utf-8') as f: + with open(file_object, "r", encoding="utf-8") as f: content = f.read() lines = content.splitlines() - headers = lines[0].split('\t') + headers = lines[0].split("\t") label_data = InstanceLabelData(annotations.db_instance) - - + task_id = annotations.db_instance.id + task = Task.objects.get(id=task_id) + jobs = Job.objects.filter(segment__task=task) for line in lines[1:]: - fields = line.split('\t') + fields = line.split("\t") record = dict(zip(headers, fields)) - job_id = record.get('job_id') + if "job_id" in record: + job_id = record.get("job_id") + else: + job_index_id = int(record.get("job index")) + job_id = jobs[job_index_id].id - start = float(record.get('start', 0)) - end = float(record.get('end', 0)) + start = float(record.get("start", 0)) + end = float(record.get("end", 0)) - label_name = record.get('label') + label_name = record.get("label") label_id = label_data._get_label_id(label_name) language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get('language',0)) - - spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + language_id = int(record.get("language", 0)) + spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) shapes_data = [ { "type": "rectangle", "label": record.get("label", ""), "points": [start, start, end, end], - "frame":0, - "occluded" : False, + "frame": 0, + "occluded": False, "z_order": 0, "group": None, "source": "manual", "transcript": record.get("normalized_text", ""), "gender": record.get("gender", ""), - "age": record.get("age",""), - "locale":language_id_to_locale_mapping.get(language_id, ""), - "accent": record.get("accent",""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent", ""), "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } ] + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } + ], } ] - - data = { - 'shapes': shapes_data - } + data = {"shapes": shapes_data} serializer = LabeledDataSerializer(data=data) pk = int(job_id) action = PatchAction.CREATE if serializer.is_valid(raise_exception=True): - data = dm.task.patch_job_data(pk, serializer.data, action) - - - - - - - + data = dm.task.patch_job_data(pk, serializer.data, action) -@importer(name='Voxpopuli', ext='TSV, ZIP', version=" ") +@importer(name="Voxpopuli", ext="TSV, ZIP", version=" ") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) for p in anno_paths: - load_anno(p, instance_data) \ No newline at end of file + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/aud_common_voice.py b/cvat/apps/dataset_manager/formats/aud_common_voice.py index bd1457d819e0..417d90965788 100644 --- a/cvat/apps/dataset_manager/formats/aud_common_voice.py +++ b/cvat/apps/dataset_manager/formats/aud_common_voice.py @@ -4,16 +4,17 @@ # SPDX-License-Identifier: MIT - - - import os.path as osp from glob import glob from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, - import_dm_annotations, match_dm_item, find_dataset_root) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + import_dm_annotations, + match_dm_item, + find_dataset_root, +) from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.extractor import DatasetItem from datumaro.components.project import Dataset @@ -21,42 +22,48 @@ from .registry import dm_env, exporter, importer from cvat.apps.engine.log import ServerLogManager + slogger = ServerLogManager(__name__) -@exporter(name='YOLO', ext='ZIP', version='1.1') +@exporter(name="YOLO", ext="ZIP", version="1.1") def _export(dst_file, temp_dir, instance_data, save_images=False): slogger.glob.info("HEYLO YOLO EXPORTER AUDINO") # slogger.glob.debug() - dataset = Dataset.from_extractors(GetCVATDataExtractor( - instance_data, include_images=save_images), env=dm_env) + dataset = Dataset.from_extractors( + GetCVATDataExtractor(instance_data, include_images=save_images), env=dm_env + ) - dataset.export(temp_dir, 'yolo', save_images=save_images) + dataset.export(temp_dir, "yolo", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='YOLO', ext='ZIP', version='1.1') + +@importer(name="YOLO", ext="ZIP", version="1.1") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) image_info = {} - frames = [YoloExtractor.name_from_path(osp.relpath(p, temp_dir)) - for p in glob(osp.join(temp_dir, '**', '*.txt'), recursive=True)] + frames = [ + YoloExtractor.name_from_path(osp.relpath(p, temp_dir)) + for p in glob(osp.join(temp_dir, "**", "*.txt"), recursive=True) + ] root_hint = find_dataset_root( - [DatasetItem(id=frame) for frame in frames], instance_data) + [DatasetItem(id=frame) for frame in frames], instance_data + ) for frame in frames: frame_info = None try: - frame_id = match_dm_item(DatasetItem(id=frame), instance_data, - root_hint=root_hint) + frame_id = match_dm_item( + DatasetItem(id=frame), instance_data, root_hint=root_hint + ) frame_info = instance_data.frame_info[frame_id] - except Exception: # nosec + except Exception: # nosec pass if frame_info is not None: - image_info[frame] = (frame_info['height'], frame_info['width']) + image_info[frame] = (frame_info["height"], frame_info["width"]) - dataset = Dataset.import_from(temp_dir, 'yolo', - env=dm_env, image_info=image_info) + dataset = Dataset.import_from(temp_dir, "yolo", env=dm_env, image_info=image_info) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/camvid.py b/cvat/apps/dataset_manager/formats/camvid.py index 75cea9e98bd4..dcbc72c94189 100644 --- a/cvat/apps/dataset_manager/formats/camvid.py +++ b/cvat/apps/dataset_manager/formats/camvid.py @@ -6,8 +6,10 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, - import_dm_annotations) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -15,30 +17,35 @@ from .utils import make_colormap -@exporter(name='CamVid', ext='ZIP', version='1.0') +@exporter(name="CamVid", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform('polygons_to_masks') - dataset.transform('boxes_to_masks') - dataset.transform('merge_instance_segments') + dataset.transform("polygons_to_masks") + dataset.transform("boxes_to_masks") + dataset.transform("merge_instance_segments") label_map = make_colormap(instance_data) - dataset.export(temp_dir, 'camvid', - save_images=save_images, apply_colormap=True, - label_map={label: label_map[label][0] for label in label_map}) + dataset.export( + temp_dir, + "camvid", + save_images=save_images, + apply_colormap=True, + label_map={label: label_map[label][0] for label in label_map}, + ) make_zip_archive(temp_dir, dst_file) -@importer(name='CamVid', ext='ZIP', version='1.0') + +@importer(name="CamVid", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) # We do not run detect_dataset before import because the Camvid format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - dataset = Dataset.import_from(temp_dir, 'camvid', env=dm_env) + dataset = Dataset.import_from(temp_dir, "camvid", env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/cityscapes.py b/cvat/apps/dataset_manager/formats/cityscapes.py index ea39578ea3f3..b82accdca22c 100644 --- a/cvat/apps/dataset_manager/formats/cityscapes.py +++ b/cvat/apps/dataset_manager/formats/cityscapes.py @@ -9,8 +9,11 @@ from datumaro.plugins.cityscapes_format import write_label_map from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - import_dm_annotations) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -18,33 +21,43 @@ from .utils import make_colormap -@exporter(name='Cityscapes', ext='ZIP', version='1.0') +@exporter(name="Cityscapes", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform('polygons_to_masks') - dataset.transform('boxes_to_masks') - dataset.transform('merge_instance_segments') + dataset.transform("polygons_to_masks") + dataset.transform("boxes_to_masks") + dataset.transform("merge_instance_segments") - dataset.export(temp_dir, 'cityscapes', save_images=save_images, - apply_colormap=True, label_map={label: info[0] - for label, info in make_colormap(instance_data).items()}) + dataset.export( + temp_dir, + "cityscapes", + save_images=save_images, + apply_colormap=True, + label_map={ + label: info[0] for label, info in make_colormap(instance_data).items() + }, + ) make_zip_archive(temp_dir, dst_file) -@importer(name='Cityscapes', ext='ZIP', version='1.0') + +@importer(name="Cityscapes", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - labelmap_file = osp.join(temp_dir, 'label_colors.txt') + labelmap_file = osp.join(temp_dir, "label_colors.txt") if not osp.isfile(labelmap_file): - colormap = {label: info[0] - for label, info in make_colormap(instance_data).items()} + colormap = { + label: info[0] for label, info in make_colormap(instance_data).items() + } write_label_map(labelmap_file, colormap) - detect_dataset(temp_dir, format_name='cityscapes', importer= dm_env.importers.get('cityscapes')) - dataset = Dataset.import_from(temp_dir, 'cityscapes', env=dm_env) + detect_dataset( + temp_dir, format_name="cityscapes", importer=dm_env.importers.get("cityscapes") + ) + dataset = Dataset.import_from(temp_dir, "cityscapes", env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 556feea104e1..4943b0c1a3e0 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -9,64 +9,77 @@ from datumaro.components.annotation import AnnotationType from datumaro.plugins.coco_format.importer import CocoImporter -from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, detect_dataset, \ - import_dm_annotations +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name='COCO', ext='ZIP', version='1.0') + +@exporter(name="COCO", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'coco_instances', save_images=save_images, - merge_images=True) + dataset.export( + temp_dir, "coco_instances", save_images=save_images, merge_images=True + ) make_zip_archive(temp_dir, dst_file) -@importer(name='COCO', ext='JSON, ZIP', version='1.0') + +@importer(name="COCO", ext="JSON, ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): if zipfile.is_zipfile(src_file): zipfile.ZipFile(src_file).extractall(temp_dir) # We use coco importer because it gives better error message - detect_dataset(temp_dir, format_name='coco', importer=CocoImporter) - dataset = Dataset.import_from(temp_dir, 'coco_instances', env=dm_env) + detect_dataset(temp_dir, format_name="coco", importer=CocoImporter) + dataset = Dataset.import_from(temp_dir, "coco_instances", env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) else: - dataset = Dataset.import_from(src_file.name, - 'coco_instances', env=dm_env) + dataset = Dataset.import_from(src_file.name, "coco_instances", env=dm_env) import_dm_annotations(dataset, instance_data) -@exporter(name='COCO Keypoints', ext='ZIP', version='1.0') + +@exporter(name="COCO Keypoints", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'coco_person_keypoints', save_images=save_images, - merge_images=True) + dataset.export( + temp_dir, + "coco_person_keypoints", + save_images=save_images, + merge_images=True, + ) make_zip_archive(temp_dir, dst_file) -@importer(name='COCO Keypoints', ext='JSON, ZIP', version='1.0') + +@importer(name="COCO Keypoints", ext="JSON, ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): def remove_extra_annotations(dataset): for item in dataset: - annotations = [ann for ann in item.annotations - if ann.type != AnnotationType.bbox] + annotations = [ + ann for ann in item.annotations if ann.type != AnnotationType.bbox + ] item.annotations = annotations if zipfile.is_zipfile(src_file): zipfile.ZipFile(src_file).extractall(temp_dir) # We use coco importer because it gives better error message - detect_dataset(temp_dir, format_name='coco', importer=CocoImporter) - dataset = Dataset.import_from(temp_dir, 'coco_person_keypoints', env=dm_env) + detect_dataset(temp_dir, format_name="coco", importer=CocoImporter) + dataset = Dataset.import_from(temp_dir, "coco_person_keypoints", env=dm_env) remove_extra_annotations(dataset) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) else: - dataset = Dataset.import_from(src_file.name, - 'coco_person_keypoints', env=dm_env) + dataset = Dataset.import_from( + src_file.name, "coco_person_keypoints", env=dm_env + ) remove_extra_annotations(dataset) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py index 5e6b7981c8fc..d0cd4cb47361 100644 --- a/cvat/apps/dataset_manager/formats/common_voice.py +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -6,100 +6,88 @@ import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer - - - - - - +from cvat.apps.engine.models import Task, Job def load_anno(file_object, annotations): if isinstance(file_object, str): - with open(file_object, 'r', encoding='utf-8') as f: + with open(file_object, "r", encoding="utf-8") as f: content = f.read() lines = content.splitlines() - headers = lines[0].split('\t') + headers = lines[0].split("\t") label_data = InstanceLabelData(annotations.db_instance) - - + task_id = annotations.db_instance.id + task = Task.objects.get(id=task_id) + jobs = Job.objects.filter(segment__task=task) for line in lines[1:]: - fields = line.split('\t') + fields = line.split("\t") record = dict(zip(headers, fields)) - job_id = record.get('job_id') + if "job_id" in record: + job_id = record.get("job_id") + else: + job_index_id = int(record.get("job index")) + job_id = jobs[job_index_id].id - start = float(record.get('start', 0)) - end = float(record.get('end', 0)) + start = float(record.get("start", 0)) + end = float(record.get("end", 0)) - label_name = record.get('label') + label_name = record.get("label") label_id = label_data._get_label_id(label_name) language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get('language',0)) - - - spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + language_id = int(record.get("language", 0)) + spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) shapes_data = [ { "type": "rectangle", "label": record.get("label", ""), "points": [start, start, end, end], - "frame":0, - "occluded" : False, + "frame": 0, + "occluded": False, "z_order": 0, "group": None, "source": "manual", "transcript": record.get("sentence", ""), "gender": record.get("gender", ""), - "age": record.get("age",""), - "locale":language_id_to_locale_mapping.get(language_id, ""), - "accent": record.get("accents",""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accents", ""), "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } ] + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } + ], } ] - - - data = { - 'shapes': shapes_data - } + data = {"shapes": shapes_data} serializer = LabeledDataSerializer(data=data) pk = int(job_id) action = PatchAction.CREATE if serializer.is_valid(raise_exception=True): - data = dm.task.patch_job_data(pk, serializer.data, action) - - - - - - - + data = dm.task.patch_job_data(pk, serializer.data, action) -@importer(name='Common Voice', ext='TSV, ZIP', version=" ") +@importer(name="Common Voice", ext="TSV, ZIP", version=" ") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) for p in anno_paths: - load_anno(p, instance_data) \ No newline at end of file + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 99293fe470d4..6f6e8b98c788 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -11,21 +11,31 @@ from io import BufferedWriter from typing import Callable -from datumaro.components.annotation import (AnnotationType, Bbox, Label, - LabelCategories, Points, Polygon, - PolyLine, Skeleton) +from datumaro.components.annotation import ( + AnnotationType, + Bbox, + Label, + LabelCategories, + Points, + Polygon, + PolyLine, + Skeleton, +) from datumaro.components.dataset import Dataset, DatasetItem -from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, Extractor, - Importer) +from datumaro.components.extractor import DEFAULT_SUBSET_NAME, Extractor, Importer from datumaro.plugins.cvat_format.extractor import CvatImporter as _CvatImporter from datumaro.util.image import Image from defusedxml import ElementTree -from cvat.apps.dataset_manager.bindings import (ProjectData, CommonData, detect_dataset, - get_defaulted_subset, - import_dm_annotations, - match_dm_item) +from cvat.apps.dataset_manager.bindings import ( + ProjectData, + CommonData, + detect_dataset, + get_defaulted_subset, + import_dm_annotations, + match_dm_item, +) from cvat.apps.dataset_manager.util import make_zip_archive from cvat.apps.engine.frame_provider import FrameProvider @@ -33,19 +43,20 @@ class CvatPath: - IMAGES_DIR = 'images' + IMAGES_DIR = "images" - MEDIA_EXTS = ('.jpg', '.jpeg', '.png') + MEDIA_EXTS = (".jpg", ".jpeg", ".png") + + BUILTIN_ATTRS = {"occluded", "outside", "keyframe", "track_id"} - BUILTIN_ATTRS = {'occluded', 'outside', 'keyframe', 'track_id'} class CvatExtractor(Extractor): - _SUPPORTED_SHAPES = ('box', 'polygon', 'polyline', 'points', 'skeleton') + _SUPPORTED_SHAPES = ("box", "polygon", "polyline", "points", "skeleton") def __init__(self, path, subsets=None): assert osp.isfile(path), path rootpath = osp.dirname(path) - images_dir = '' + images_dir = "" if osp.isdir(osp.join(rootpath, CvatPath.IMAGES_DIR)): images_dir = osp.join(rootpath, CvatPath.IMAGES_DIR) self._images_dir = images_dir @@ -71,7 +82,9 @@ def __len__(self): return len(self._items) def get(self, _id, subset=DEFAULT_SUBSET_NAME): - assert subset in self._subsets, '{} not in {}'.format(subset, ', '.join(self._subsets)) + assert subset in self._subsets, "{} not in {}".format( + subset, ", ".join(self._subsets) + ) return super().get(_id, subset) @staticmethod @@ -80,13 +93,13 @@ def _get_subsets_from_anno(path): context = iter(context) for ev, el in context: - if ev == 'start': - if el.tag == 'subsets': + if ev == "start": + if el.tag == "subsets": if el.text is not None: - subsets = el.text.split('\n') + subsets = el.text.split("\n") return subsets - if ev == 'end': - if el.tag == 'meta': + if ev == "end": + if el.tag == "meta": return [DEFAULT_SUBSET_NAME] el.clear() return [DEFAULT_SUBSET_NAME] @@ -99,15 +112,20 @@ def parse_image_dir(image_dir, subset): for file in sorted(glob(image_dir), key=osp.basename): name, ext = osp.splitext(osp.basename(file)) if ext.lower() in CvatPath.MEDIA_EXTS: - items[(subset, name)] = DatasetItem(id=name, annotations=[], - image=Image(path=file), subset=subset or DEFAULT_SUBSET_NAME, + items[(subset, name)] = DatasetItem( + id=name, + annotations=[], + image=Image(path=file), + subset=subset or DEFAULT_SUBSET_NAME, ) - if subsets == [DEFAULT_SUBSET_NAME] and not osp.isdir(osp.join(image_dir, DEFAULT_SUBSET_NAME)): - parse_image_dir(osp.join(image_dir, '*.*'), None) + if subsets == [DEFAULT_SUBSET_NAME] and not osp.isdir( + osp.join(image_dir, DEFAULT_SUBSET_NAME) + ): + parse_image_dir(osp.join(image_dir, "*.*"), None) else: for subset in subsets: - parse_image_dir(osp.join(image_dir, subset, '*.*'), subset) + parse_image_dir(osp.join(image_dir, subset, "*.*"), subset) return items @classmethod @@ -130,33 +148,36 @@ def _parse(cls, path): image = None subset = None for ev, el in context: - if ev == 'start': - if el.tag == 'track': - frame_size = tasks_info[int(el.attrib.get('task_id'))]['frame_size'] \ - if el.attrib.get('task_id') else tuple(tasks_info.values())[0]['frame_size'] + if ev == "start": + if el.tag == "track": + frame_size = ( + tasks_info[int(el.attrib.get("task_id"))]["frame_size"] + if el.attrib.get("task_id") + else tuple(tasks_info.values())[0]["frame_size"] + ) track = { - 'id': el.attrib['id'], - 'label': el.attrib.get('label'), - 'group': int(el.attrib.get('group_id', 0)), - 'height': frame_size[0], - 'width': frame_size[1], + "id": el.attrib["id"], + "label": el.attrib.get("label"), + "group": int(el.attrib.get("group_id", 0)), + "height": frame_size[0], + "width": frame_size[1], } - subset = el.attrib.get('subset') + subset = el.attrib.get("subset") track_shapes = {} - elif el.tag == 'image': + elif el.tag == "image": image = { - 'name': el.attrib.get('name'), - 'frame': el.attrib['id'], - 'width': el.attrib.get('width'), - 'height': el.attrib.get('height'), + "name": el.attrib.get("name"), + "frame": el.attrib["id"], + "width": el.attrib.get("width"), + "height": el.attrib.get("height"), } - subset = el.attrib.get('subset') + subset = el.attrib.get("subset") elif el.tag in cls._SUPPORTED_SHAPES and (track or image): - if shape and shape['type'] == 'skeleton': + if shape and shape["type"] == "skeleton": element_attributes = {} shape_element = { - 'type': 'rectangle' if el.tag == 'box' else el.tag, - 'attributes': element_attributes, + "type": "rectangle" if el.tag == "box" else el.tag, + "attributes": element_attributes, } if track: shape_element.update(track) @@ -165,150 +186,191 @@ def _parse(cls, path): else: attributes = {} shape = { - 'type': 'rectangle' if el.tag == 'box' else el.tag, - 'attributes': attributes, + "type": "rectangle" if el.tag == "box" else el.tag, + "attributes": attributes, } - shape['elements'] = [] + shape["elements"] = [] if track: shape.update(track) - shape['track_id'] = int(track['id']) - shape['frame'] = el.attrib['frame'] + shape["track_id"] = int(track["id"]) + shape["frame"] = el.attrib["frame"] track_elements = [] if image: shape.update(image) - elif el.tag == 'tag' and image: + elif el.tag == "tag" and image: attributes = {} tag = { - 'frame': image['frame'], - 'attributes': attributes, - 'group': int(el.attrib.get('group_id', 0)), - 'label': el.attrib['label'], + "frame": image["frame"], + "attributes": attributes, + "group": int(el.attrib.get("group_id", 0)), + "label": el.attrib["label"], } - subset = el.attrib.get('subset') - elif ev == 'end': - if el.tag == 'attribute' and element_attributes is not None and shape_element is not None: - attr_value = el.text or '' - attr_type = attribute_types.get(el.attrib['name']) - if el.text in ['true', 'false']: - attr_value = attr_value == 'true' - elif attr_type is not None and attr_type != 'text': + subset = el.attrib.get("subset") + elif ev == "end": + if ( + el.tag == "attribute" + and element_attributes is not None + and shape_element is not None + ): + attr_value = el.text or "" + attr_type = attribute_types.get(el.attrib["name"]) + if el.text in ["true", "false"]: + attr_value = attr_value == "true" + elif attr_type is not None and attr_type != "text": try: attr_value = float(attr_value) except ValueError: pass - element_attributes[el.attrib['name']] = attr_value - - if el.tag == 'attribute' and attributes is not None and shape_element is None: - attr_value = el.text or '' - attr_type = attribute_types.get(el.attrib['name']) - if el.text in ['true', 'false']: - attr_value = attr_value == 'true' - elif attr_type is not None and attr_type != 'text': + element_attributes[el.attrib["name"]] = attr_value + + if ( + el.tag == "attribute" + and attributes is not None + and shape_element is None + ): + attr_value = el.text or "" + attr_type = attribute_types.get(el.attrib["name"]) + if el.text in ["true", "false"]: + attr_value = attr_value == "true" + elif attr_type is not None and attr_type != "text": try: attr_value = float(attr_value) except ValueError: pass - attributes[el.attrib['name']] = attr_value - - elif el.tag in cls._SUPPORTED_SHAPES and shape["type"] == "skeleton" and el.tag != "skeleton": - shape_element['label'] = el.attrib.get('label') - shape_element['group'] = int(el.attrib.get('group_id', 0)) - - shape_element['type'] = el.tag - shape_element['z_order'] = int(el.attrib.get('z_order', 0)) - - if el.tag == 'box': - shape_element['points'] = list(map(float, [ - el.attrib['xtl'], el.attrib['ytl'], - el.attrib['xbr'], el.attrib['ybr'], - ])) + attributes[el.attrib["name"]] = attr_value + + elif ( + el.tag in cls._SUPPORTED_SHAPES + and shape["type"] == "skeleton" + and el.tag != "skeleton" + ): + shape_element["label"] = el.attrib.get("label") + shape_element["group"] = int(el.attrib.get("group_id", 0)) + + shape_element["type"] = el.tag + shape_element["z_order"] = int(el.attrib.get("z_order", 0)) + + if el.tag == "box": + shape_element["points"] = list( + map( + float, + [ + el.attrib["xtl"], + el.attrib["ytl"], + el.attrib["xbr"], + el.attrib["ybr"], + ], + ) + ) else: - shape_element['points'] = [] - for pair in el.attrib['points'].split(';'): - shape_element['points'].extend(map(float, pair.split(','))) + shape_element["points"] = [] + for pair in el.attrib["points"].split(";"): + shape_element["points"].extend(map(float, pair.split(","))) - if el.tag == 'points' and el.attrib.get('occluded') == '1': - shape_element['visibility'] = [Points.Visibility.hidden] * (len(shape_element['points']) // 2) + if el.tag == "points" and el.attrib.get("occluded") == "1": + shape_element["visibility"] = [Points.Visibility.hidden] * ( + len(shape_element["points"]) // 2 + ) else: - shape_element['occluded'] = (el.attrib.get('occluded') == '1') + shape_element["occluded"] = el.attrib.get("occluded") == "1" - if el.tag == 'points' and el.attrib.get('outside') == '1': - shape_element['visibility'] = [Points.Visibility.absent] * (len(shape_element['points']) // 2) + if el.tag == "points" and el.attrib.get("outside") == "1": + shape_element["visibility"] = [Points.Visibility.absent] * ( + len(shape_element["points"]) // 2 + ) else: - shape_element['outside'] = (el.attrib.get('outside') == '1') + shape_element["outside"] = el.attrib.get("outside") == "1" if track: - shape_element['keyframe'] = (el.attrib.get('keyframe') == '1') - if shape_element['keyframe']: + shape_element["keyframe"] = el.attrib.get("keyframe") == "1" + if shape_element["keyframe"]: track_elements.append(shape_element) else: - shape['elements'].append(shape_element) + shape["elements"].append(shape_element) shape_element = None elif el.tag in cls._SUPPORTED_SHAPES: if track is not None: - shape['frame'] = el.attrib['frame'] - shape['outside'] = (el.attrib.get('outside') == '1') - shape['keyframe'] = (el.attrib.get('keyframe') == '1') + shape["frame"] = el.attrib["frame"] + shape["outside"] = el.attrib.get("outside") == "1" + shape["keyframe"] = el.attrib.get("keyframe") == "1" if image is not None: - shape['label'] = el.attrib.get('label') - shape['group'] = int(el.attrib.get('group_id', 0)) - - shape['type'] = el.tag - shape['occluded'] = (el.attrib.get('occluded') == '1') - shape['z_order'] = int(el.attrib.get('z_order', 0)) - shape['rotation'] = float(el.attrib.get('rotation', 0)) - - if el.tag == 'box': - shape['points'] = list(map(float, [ - el.attrib['xtl'], el.attrib['ytl'], - el.attrib['xbr'], el.attrib['ybr'], - ])) - elif el.tag == 'skeleton': - shape['points'] = [] + shape["label"] = el.attrib.get("label") + shape["group"] = int(el.attrib.get("group_id", 0)) + + shape["type"] = el.tag + shape["occluded"] = el.attrib.get("occluded") == "1" + shape["z_order"] = int(el.attrib.get("z_order", 0)) + shape["rotation"] = float(el.attrib.get("rotation", 0)) + + if el.tag == "box": + shape["points"] = list( + map( + float, + [ + el.attrib["xtl"], + el.attrib["ytl"], + el.attrib["xbr"], + el.attrib["ybr"], + ], + ) + ) + elif el.tag == "skeleton": + shape["points"] = [] else: - shape['points'] = [] - for pair in el.attrib['points'].split(';'): - shape['points'].extend(map(float, pair.split(','))) + shape["points"] = [] + for pair in el.attrib["points"].split(";"): + shape["points"].extend(map(float, pair.split(","))) if track: if shape["type"] == "skeleton" and track_elements: shape["keyframe"] = True - track_shapes[shape['frame']] = shape - track_shapes[shape['frame']]['elements'] = track_elements + track_shapes[shape["frame"]] = shape + track_shapes[shape["frame"]]["elements"] = track_elements track_elements = None elif shape["type"] != "skeleton": - track_shapes[shape['frame']] = shape + track_shapes[shape["frame"]] = shape else: - frame_desc = items.get((subset, shape['frame']), {'annotations': []}) - frame_desc['annotations'].append( - cls._parse_shape_ann(shape, categories)) - items[(subset, shape['frame'])] = frame_desc + frame_desc = items.get( + (subset, shape["frame"]), {"annotations": []} + ) + frame_desc["annotations"].append( + cls._parse_shape_ann(shape, categories) + ) + items[(subset, shape["frame"])] = frame_desc shape = None - elif el.tag == 'tag': - frame_desc = items.get((subset, tag['frame']), {'annotations': []}) - frame_desc['annotations'].append( - cls._parse_tag_ann(tag, categories)) - items[(subset, tag['frame'])] = frame_desc + elif el.tag == "tag": + frame_desc = items.get((subset, tag["frame"]), {"annotations": []}) + frame_desc["annotations"].append( + cls._parse_tag_ann(tag, categories) + ) + items[(subset, tag["frame"])] = frame_desc tag = None - elif el.tag == 'track': + elif el.tag == "track": for track_shape in track_shapes.values(): - frame_desc = items.get((subset, track_shape['frame']), {'annotations': []}) - frame_desc['annotations'].append( - cls._parse_shape_ann(track_shape, categories)) - items[(subset, track_shape['frame'])] = frame_desc + frame_desc = items.get( + (subset, track_shape["frame"]), {"annotations": []} + ) + frame_desc["annotations"].append( + cls._parse_shape_ann(track_shape, categories) + ) + items[(subset, track_shape["frame"])] = frame_desc track = None - elif el.tag == 'image': - frame_desc = items.get((subset, image['frame']), {'annotations': []}) - frame_desc.update({ - 'name': image.get('name'), - 'height': image.get('height'), - 'width': image.get('width'), - 'subset': subset, - }) - items[(subset, image['frame'])] = frame_desc + elif el.tag == "image": + frame_desc = items.get( + (subset, image["frame"]), {"annotations": []} + ) + frame_desc.update( + { + "name": image.get("name"), + "height": image.get("height"), + "width": image.get("width"), + "subset": subset, + } + ) + items[(subset, image["frame"])] = frame_desc image = None el.clear() @@ -317,7 +379,7 @@ def _parse(cls, path): @staticmethod def _parse_meta(context): ev, el = next(context) - if not (ev == 'start' and el.tag == 'annotations'): + if not (ev == "start" and el.tag == "annotations"): raise Exception("Unexpected token ") categories = {} @@ -331,7 +393,8 @@ def _parse_meta(context): # Recursive descent parser el = None - states = ['annotations'] + states = ["annotations"] + def accepted(expected_state, tag, next_state=None): state = states[-1] if state == expected_state and el is not None and el.tag == tag: @@ -340,6 +403,7 @@ def accepted(expected_state, tag, next_state=None): states.append(next_state) return True return False + def consumed(expected_state, tag): state = states[-1] if state == expected_state and el is not None and el.tag == tag: @@ -348,194 +412,269 @@ def consumed(expected_state, tag): return False for ev, el in context: - if ev == 'start': - if accepted('annotations', 'meta'): pass - elif accepted('meta', 'task'): pass - elif accepted('meta', 'project'): pass - elif accepted('project', 'tasks'): pass - elif accepted('tasks', 'task'): pass - elif accepted('task', 'id', next_state='task_id'): pass - elif accepted('task', 'segment'): pass - elif accepted('task', 'mode'): pass - elif accepted('task', 'original_size'): pass - elif accepted('original_size', 'height', next_state='frame_height'): pass - elif accepted('original_size', 'width', next_state='frame_width'): pass - elif accepted('task', 'labels'): pass - elif accepted('project', 'labels'): pass - elif accepted('labels', 'label'): - label = { 'name': None, 'attributes': [] } - elif accepted('label', 'name', next_state='label_name'): pass - elif accepted('label', 'attributes'): pass - elif accepted('attributes', 'attribute'): pass - elif accepted('attribute', 'name', next_state='attr_name'): pass - elif accepted('attribute', 'input_type', next_state='attr_type'): pass - elif accepted('annotations', 'image') or \ - accepted('annotations', 'track') or \ - accepted('annotations', 'tag'): + if ev == "start": + if accepted("annotations", "meta"): + pass + elif accepted("meta", "task"): + pass + elif accepted("meta", "project"): + pass + elif accepted("project", "tasks"): + pass + elif accepted("tasks", "task"): + pass + elif accepted("task", "id", next_state="task_id"): + pass + elif accepted("task", "segment"): + pass + elif accepted("task", "mode"): + pass + elif accepted("task", "original_size"): + pass + elif accepted("original_size", "height", next_state="frame_height"): + pass + elif accepted("original_size", "width", next_state="frame_width"): + pass + elif accepted("task", "labels"): + pass + elif accepted("project", "labels"): + pass + elif accepted("labels", "label"): + label = {"name": None, "attributes": []} + elif accepted("label", "name", next_state="label_name"): + pass + elif accepted("label", "attributes"): + pass + elif accepted("attributes", "attribute"): + pass + elif accepted("attribute", "name", next_state="attr_name"): + pass + elif accepted("attribute", "input_type", next_state="attr_type"): + pass + elif ( + accepted("annotations", "image") + or accepted("annotations", "track") + or accepted("annotations", "tag") + ): break else: pass - elif ev == 'end': - if consumed('meta', 'meta'): + elif ev == "end": + if consumed("meta", "meta"): break - elif consumed('project', 'project'): pass - elif consumed('tasks', 'tasks'): pass - elif consumed('task', 'task'): + elif consumed("project", "project"): + pass + elif consumed("tasks", "tasks"): + pass + elif consumed("task", "task"): tasks_info[task_id] = { - 'frame_size': frame_size, - 'mode': mode, + "frame_size": frame_size, + "mode": mode, } frame_size = [None, None] mode = None - elif consumed('task_id', 'id'): + elif consumed("task_id", "id"): task_id = int(el.text) - elif consumed('segment', 'segment'): pass - elif consumed('mode', 'mode'): + elif consumed("segment", "segment"): + pass + elif consumed("mode", "mode"): mode = el.text - elif consumed('original_size', 'original_size'): pass - elif consumed('frame_height', 'height'): + elif consumed("original_size", "original_size"): + pass + elif consumed("frame_height", "height"): frame_size[0] = int(el.text) - elif consumed('frame_width', 'width'): + elif consumed("frame_width", "width"): frame_size[1] = int(el.text) - elif consumed('label_name', 'name'): - label['name'] = el.text - elif consumed('attr_name', 'name'): - label['attributes'].append({'name': el.text}) - elif consumed('attr_type', 'input_type'): - label['attributes'][-1]['input_type'] = el.text - elif consumed('attribute', 'attribute'): pass - elif consumed('attributes', 'attributes'): pass - elif consumed('label', 'label'): - labels[label['name']] = label['attributes'] + elif consumed("label_name", "name"): + label["name"] = el.text + elif consumed("attr_name", "name"): + label["attributes"].append({"name": el.text}) + elif consumed("attr_type", "input_type"): + label["attributes"][-1]["input_type"] = el.text + elif consumed("attribute", "attribute"): + pass + elif consumed("attributes", "attributes"): + pass + elif consumed("label", "label"): + labels[label["name"]] = label["attributes"] label = None - elif consumed('labels', 'labels'): pass + elif consumed("labels", "labels"): + pass else: pass - assert len(states) == 1 and states[0] == 'annotations', \ + assert len(states) == 1 and states[0] == "annotations", ( "Expected 'meta' section in the annotation file, path: %s" % states + ) - common_attrs = ['occluded'] - if 'interpolation' in map(lambda t: t['mode'], tasks_info.values()): - common_attrs.append('keyframe') - common_attrs.append('outside') - common_attrs.append('track_id') + common_attrs = ["occluded"] + if "interpolation" in map(lambda t: t["mode"], tasks_info.values()): + common_attrs.append("keyframe") + common_attrs.append("outside") + common_attrs.append("track_id") label_cat = LabelCategories(attributes=common_attrs) attribute_types = {} for label, attrs in labels.items(): - attr_names = {v['name'] for v in attrs} + attr_names = {v["name"] for v in attrs} label_cat.add(label, attributes=attr_names) for attr in attrs: - attribute_types[attr['name']] = attr['input_type'] + attribute_types[attr["name"]] = attr["input_type"] categories[AnnotationType.label] = label_cat return categories, tasks_info, attribute_types @classmethod def _parse_shape_ann(cls, ann, categories): - ann_id = ann.get('id', 0) - ann_type = ann['type'] - - attributes = ann.get('attributes') or {} - if 'occluded' in categories[AnnotationType.label].attributes: - attributes['occluded'] = ann.get('occluded', False) - if 'outside' in ann: - attributes['outside'] = ann['outside'] - if 'keyframe' in ann: - attributes['keyframe'] = ann['keyframe'] - if 'track_id' in ann: - attributes['track_id'] = ann['track_id'] - if 'rotation' in ann: - attributes['rotation'] = ann['rotation'] - - group = ann.get('group') - - label = ann.get('label') + ann_id = ann.get("id", 0) + ann_type = ann["type"] + + attributes = ann.get("attributes") or {} + if "occluded" in categories[AnnotationType.label].attributes: + attributes["occluded"] = ann.get("occluded", False) + if "outside" in ann: + attributes["outside"] = ann["outside"] + if "keyframe" in ann: + attributes["keyframe"] = ann["keyframe"] + if "track_id" in ann: + attributes["track_id"] = ann["track_id"] + if "rotation" in ann: + attributes["rotation"] = ann["rotation"] + + group = ann.get("group") + + label = ann.get("label") label_id = categories[AnnotationType.label].find(label)[0] - z_order = ann.get('z_order', 0) - points = ann.get('points', []) - - if ann_type == 'polyline': - return PolyLine(points, label=label_id, z_order=z_order, - id=ann_id, attributes=attributes, group=group) - - elif ann_type == 'polygon': - return Polygon(points, label=label_id, z_order=z_order, - id=ann_id, attributes=attributes, group=group) - - elif ann_type == 'points': - visibility = ann.get('visibility', None) - return Points(points, visibility, label=label_id, z_order=z_order, - id=ann_id, attributes=attributes, group=group) - - elif ann_type == 'box': + z_order = ann.get("z_order", 0) + points = ann.get("points", []) + + if ann_type == "polyline": + return PolyLine( + points, + label=label_id, + z_order=z_order, + id=ann_id, + attributes=attributes, + group=group, + ) + + elif ann_type == "polygon": + return Polygon( + points, + label=label_id, + z_order=z_order, + id=ann_id, + attributes=attributes, + group=group, + ) + + elif ann_type == "points": + visibility = ann.get("visibility", None) + return Points( + points, + visibility, + label=label_id, + z_order=z_order, + id=ann_id, + attributes=attributes, + group=group, + ) + + elif ann_type == "box": x, y = points[0], points[1] w, h = points[2] - x, points[3] - y - return Bbox(x, y, w, h, label=label_id, z_order=z_order, - id=ann_id, attributes=attributes, group=group) - - elif ann_type == 'skeleton': + return Bbox( + x, + y, + w, + h, + label=label_id, + z_order=z_order, + id=ann_id, + attributes=attributes, + group=group, + ) + + elif ann_type == "skeleton": elements = [] - for element in ann.get('elements', []): + for element in ann.get("elements", []): elements.append(cls._parse_shape_ann(element, categories)) - return Skeleton(elements, label=label_id, z_order=z_order, - id=ann_id, attributes=attributes, group=group) + return Skeleton( + elements, + label=label_id, + z_order=z_order, + id=ann_id, + attributes=attributes, + group=group, + ) else: raise NotImplementedError("Unknown annotation type '%s'" % ann_type) @classmethod def _parse_tag_ann(cls, ann, categories): - label = ann.get('label') + label = ann.get("label") label_id = categories[AnnotationType.label].find(label)[0] - group = ann.get('group') - attributes = ann.get('attributes') + group = ann.get("group") + attributes = ann.get("attributes") return Label(label_id, attributes=attributes, group=group) def _load_items(self, parsed, image_items): for (subset, frame_id), item_desc in parsed.items(): - name = item_desc.get('name', 'frame_%06d.PNG' % int(frame_id)) - image = osp.join(self._images_dir, subset, name) if subset else osp.join(self._images_dir, name) - image_size = (item_desc.get('height'), item_desc.get('width')) + name = item_desc.get("name", "frame_%06d.PNG" % int(frame_id)) + image = ( + osp.join(self._images_dir, subset, name) + if subset + else osp.join(self._images_dir, name) + ) + image_size = (item_desc.get("height"), item_desc.get("width")) if all(image_size): image = Image(path=image, size=tuple(map(int, image_size))) - di = image_items.get((subset, osp.splitext(name)[0]), DatasetItem( - id=name, annotations=[], - )) + di = image_items.get( + (subset, osp.splitext(name)[0]), + DatasetItem( + id=name, + annotations=[], + ), + ) di.subset = subset or DEFAULT_SUBSET_NAME - di.annotations = item_desc.get('annotations') - di.attributes = {'frame': int(frame_id)} + di.annotations = item_desc.get("annotations") + di.attributes = {"frame": int(frame_id)} di.media = image if isinstance(image, Image) else di.media image_items[(subset, osp.splitext(name)[0])] = di return image_items -dm_env.extractors.register('cvat', CvatExtractor) + +dm_env.extractors.register("cvat", CvatExtractor) + class CvatImporter(Importer): @classmethod def find_sources(cls, path): - return cls._find_sources_recursive(path, '.xml', 'cvat') + return cls._find_sources_recursive(path, ".xml", "cvat") + -dm_env.importers.register('cvat', CvatImporter) +dm_env.importers.register("cvat", CvatImporter) def pairwise(iterable): a = iter(iterable) return zip(a, a) + def create_xml_dumper(file_object): from xml.sax.saxutils import XMLGenerator + class XmlAnnotationWriter: def __init__(self, file): self.version = "1.1" self.file = file - self.xmlgen = XMLGenerator(self.file, 'utf-8') + self.xmlgen = XMLGenerator(self.file, "utf-8") self._level = 0 - def _indent(self, newline = True): + def _indent(self, newline=True): if newline: self.xmlgen.ignorableWhitespace("\n") self.xmlgen.ignorableWhitespace(" " * self._level) @@ -709,111 +848,163 @@ def close_root(self): def close_document(self): self.xmlgen.endDocument() - return XmlAnnotationWriter(file_object) + def dump_as_cvat_annotation(dumper, annotations): dumper.open_root() dumper.add_meta(annotations.meta) for frame_annotation in annotations.group_by_frame(include_empty=True): frame_id = frame_annotation.frame - image_attrs = OrderedDict([ - ("id", str(frame_id)), - ("name", frame_annotation.name), - ]) + image_attrs = OrderedDict( + [ + ("id", str(frame_id)), + ("name", frame_annotation.name), + ] + ) if isinstance(annotations, ProjectData): - image_attrs.update(OrderedDict([ - ("subset", frame_annotation.subset), - ("task_id", str(frame_annotation.task_id)), - ])) - image_attrs.update(OrderedDict([ - ("width", str(frame_annotation.width)), - ("height", str(frame_annotation.height)) - ])) + image_attrs.update( + OrderedDict( + [ + ("subset", frame_annotation.subset), + ("task_id", str(frame_annotation.task_id)), + ] + ) + ) + image_attrs.update( + OrderedDict( + [ + ("width", str(frame_annotation.width)), + ("height", str(frame_annotation.height)), + ] + ) + ) dumper.open_image(image_attrs) def dump_labeled_shapes(shapes, is_skeleton=False): for shape in shapes: - dump_data = OrderedDict([ - ("label", shape.label), - ("source", shape.source), - ]) + dump_data = OrderedDict( + [ + ("label", shape.label), + ("source", shape.source), + ] + ) if is_skeleton: - dump_data.update(OrderedDict([ - ("outside", str(int(shape.outside))) - ])) + dump_data.update( + OrderedDict([("outside", str(int(shape.outside)))]) + ) - if shape.type != 'skeleton': - dump_data.update(OrderedDict([ - ("occluded", str(int(shape.occluded))) - ])) + if shape.type != "skeleton": + dump_data.update( + OrderedDict([("occluded", str(int(shape.occluded)))]) + ) if shape.type == "rectangle": - dump_data.update(OrderedDict([ - ("xtl", "{:.2f}".format(shape.points[0])), - ("ytl", "{:.2f}".format(shape.points[1])), - ("xbr", "{:.2f}".format(shape.points[2])), - ("ybr", "{:.2f}".format(shape.points[3])) - ])) + dump_data.update( + OrderedDict( + [ + ("xtl", "{:.2f}".format(shape.points[0])), + ("ytl", "{:.2f}".format(shape.points[1])), + ("xbr", "{:.2f}".format(shape.points[2])), + ("ybr", "{:.2f}".format(shape.points[3])), + ] + ) + ) if shape.rotation: - dump_data.update(OrderedDict([ - ("rotation", "{:.2f}".format(shape.rotation)) - ])) + dump_data.update( + OrderedDict([("rotation", "{:.2f}".format(shape.rotation))]) + ) elif shape.type == "ellipse": - dump_data.update(OrderedDict([ - ("cx", "{:.2f}".format(shape.points[0])), - ("cy", "{:.2f}".format(shape.points[1])), - ("rx", "{:.2f}".format(shape.points[2] - shape.points[0])), - ("ry", "{:.2f}".format(shape.points[1] - shape.points[3])) - ])) + dump_data.update( + OrderedDict( + [ + ("cx", "{:.2f}".format(shape.points[0])), + ("cy", "{:.2f}".format(shape.points[1])), + ( + "rx", + "{:.2f}".format(shape.points[2] - shape.points[0]), + ), + ( + "ry", + "{:.2f}".format(shape.points[1] - shape.points[3]), + ), + ] + ) + ) if shape.rotation: - dump_data.update(OrderedDict([ - ("rotation", "{:.2f}".format(shape.rotation)) - ])) + dump_data.update( + OrderedDict([("rotation", "{:.2f}".format(shape.rotation))]) + ) elif shape.type == "cuboid": - dump_data.update(OrderedDict([ - ("xtl1", "{:.2f}".format(shape.points[0])), - ("ytl1", "{:.2f}".format(shape.points[1])), - ("xbl1", "{:.2f}".format(shape.points[2])), - ("ybl1", "{:.2f}".format(shape.points[3])), - ("xtr1", "{:.2f}".format(shape.points[4])), - ("ytr1", "{:.2f}".format(shape.points[5])), - ("xbr1", "{:.2f}".format(shape.points[6])), - ("ybr1", "{:.2f}".format(shape.points[7])), - ("xtl2", "{:.2f}".format(shape.points[8])), - ("ytl2", "{:.2f}".format(shape.points[9])), - ("xbl2", "{:.2f}".format(shape.points[10])), - ("ybl2", "{:.2f}".format(shape.points[11])), - ("xtr2", "{:.2f}".format(shape.points[12])), - ("ytr2", "{:.2f}".format(shape.points[13])), - ("xbr2", "{:.2f}".format(shape.points[14])), - ("ybr2", "{:.2f}".format(shape.points[15])) - ])) + dump_data.update( + OrderedDict( + [ + ("xtl1", "{:.2f}".format(shape.points[0])), + ("ytl1", "{:.2f}".format(shape.points[1])), + ("xbl1", "{:.2f}".format(shape.points[2])), + ("ybl1", "{:.2f}".format(shape.points[3])), + ("xtr1", "{:.2f}".format(shape.points[4])), + ("ytr1", "{:.2f}".format(shape.points[5])), + ("xbr1", "{:.2f}".format(shape.points[6])), + ("ybr1", "{:.2f}".format(shape.points[7])), + ("xtl2", "{:.2f}".format(shape.points[8])), + ("ytl2", "{:.2f}".format(shape.points[9])), + ("xbl2", "{:.2f}".format(shape.points[10])), + ("ybl2", "{:.2f}".format(shape.points[11])), + ("xtr2", "{:.2f}".format(shape.points[12])), + ("ytr2", "{:.2f}".format(shape.points[13])), + ("xbr2", "{:.2f}".format(shape.points[14])), + ("ybr2", "{:.2f}".format(shape.points[15])), + ] + ) + ) elif shape.type == "mask": - dump_data.update(OrderedDict([ - ("rle", f"{list(int (v) for v in shape.points[:-4])}"[1:-1]), - ("left", f"{int(shape.points[-4])}"), - ("top", f"{int(shape.points[-3])}"), - ("width", f"{int(shape.points[-2] - shape.points[-4]) + 1}"), - ("height", f"{int(shape.points[-1] - shape.points[-3]) + 1}"), - ])) - elif shape.type != 'skeleton': - dump_data.update(OrderedDict([ - ("points", ';'.join(( - ','.join(( - "{:.2f}".format(x), - "{:.2f}".format(y) - )) for x, y in pairwise(shape.points)) - )), - ])) + dump_data.update( + OrderedDict( + [ + ( + "rle", + f"{list(int (v) for v in shape.points[:-4])}"[1:-1], + ), + ("left", f"{int(shape.points[-4])}"), + ("top", f"{int(shape.points[-3])}"), + ( + "width", + f"{int(shape.points[-2] - shape.points[-4]) + 1}", + ), + ( + "height", + f"{int(shape.points[-1] - shape.points[-3]) + 1}", + ), + ] + ) + ) + elif shape.type != "skeleton": + dump_data.update( + OrderedDict( + [ + ( + "points", + ";".join( + ( + ",".join( + ("{:.2f}".format(x), "{:.2f}".format(y)) + ) + for x, y in pairwise(shape.points) + ) + ), + ), + ] + ) + ) if not is_skeleton: - dump_data['z_order'] = str(shape.z_order) + dump_data["z_order"] = str(shape.z_order) if shape.group: - dump_data['group_id'] = str(shape.group) + dump_data["group_id"] = str(shape.group) if shape.type == "rectangle": dumper.open_box(dump_data) @@ -836,10 +1027,9 @@ def dump_labeled_shapes(shapes, is_skeleton=False): raise NotImplementedError("unknown shape type") for attr in shape.attributes: - dumper.add_attribute(OrderedDict([ - ("name", attr.name), - ("value", attr.value) - ])) + dumper.add_attribute( + OrderedDict([("name", attr.name), ("value", attr.value)]) + ) if shape.type == "rectangle": dumper.close_box() @@ -863,25 +1053,27 @@ def dump_labeled_shapes(shapes, is_skeleton=False): dump_labeled_shapes(frame_annotation.labeled_shapes) for tag in frame_annotation.tags: - tag_data = OrderedDict([ - ("label", tag.label), - ("source", tag.source), - ]) + tag_data = OrderedDict( + [ + ("label", tag.label), + ("source", tag.source), + ] + ) if tag.group: tag_data["group_id"] = str(tag.group) dumper.open_tag(tag_data) for attr in tag.attributes: - dumper.add_attribute(OrderedDict([ - ("name", attr.name), - ("value", attr.value) - ])) + dumper.add_attribute( + OrderedDict([("name", attr.name), ("value", attr.value)]) + ) dumper.close_tag() dumper.close_image() dumper.close_root() + def dump_as_cvat_interpolation(dumper, annotations): dumper.open_root() dumper.add_meta(annotations.meta) @@ -889,79 +1081,122 @@ def dump_as_cvat_interpolation(dumper, annotations): def dump_shape(shape, element_shapes=None, label=None): dump_data = OrderedDict() if label is None: - dump_data.update(OrderedDict([ - ("frame", str(shape.frame)), - ])) + dump_data.update( + OrderedDict( + [ + ("frame", str(shape.frame)), + ] + ) + ) else: - dump_data.update(OrderedDict([ - ("label", label), - ])) - dump_data.update(OrderedDict([ - ("keyframe", str(int(shape.keyframe))), - ])) + dump_data.update( + OrderedDict( + [ + ("label", label), + ] + ) + ) + dump_data.update( + OrderedDict( + [ + ("keyframe", str(int(shape.keyframe))), + ] + ) + ) if shape.type != "skeleton": - dump_data.update(OrderedDict([ - ("outside", str(int(shape.outside))), - ("occluded", str(int(shape.occluded))), - ])) + dump_data.update( + OrderedDict( + [ + ("outside", str(int(shape.outside))), + ("occluded", str(int(shape.occluded))), + ] + ) + ) if shape.type == "rectangle": - dump_data.update(OrderedDict([ - ("xtl", "{:.2f}".format(shape.points[0])), - ("ytl", "{:.2f}".format(shape.points[1])), - ("xbr", "{:.2f}".format(shape.points[2])), - ("ybr", "{:.2f}".format(shape.points[3])), - ])) + dump_data.update( + OrderedDict( + [ + ("xtl", "{:.2f}".format(shape.points[0])), + ("ytl", "{:.2f}".format(shape.points[1])), + ("xbr", "{:.2f}".format(shape.points[2])), + ("ybr", "{:.2f}".format(shape.points[3])), + ] + ) + ) if shape.rotation: - dump_data.update(OrderedDict([ - ("rotation", "{:.2f}".format(shape.rotation)) - ])) + dump_data.update( + OrderedDict([("rotation", "{:.2f}".format(shape.rotation))]) + ) elif shape.type == "ellipse": - dump_data.update(OrderedDict([ - ("cx", "{:.2f}".format(shape.points[0])), - ("cy", "{:.2f}".format(shape.points[1])), - ("rx", "{:.2f}".format(shape.points[2] - shape.points[0])), - ("ry", "{:.2f}".format(shape.points[1] - shape.points[3])) - ])) + dump_data.update( + OrderedDict( + [ + ("cx", "{:.2f}".format(shape.points[0])), + ("cy", "{:.2f}".format(shape.points[1])), + ("rx", "{:.2f}".format(shape.points[2] - shape.points[0])), + ("ry", "{:.2f}".format(shape.points[1] - shape.points[3])), + ] + ) + ) if shape.rotation: - dump_data.update(OrderedDict([ - ("rotation", "{:.2f}".format(shape.rotation)) - ])) + dump_data.update( + OrderedDict([("rotation", "{:.2f}".format(shape.rotation))]) + ) elif shape.type == "mask": - dump_data.update(OrderedDict([ - ("rle", f"{list(int (v) for v in shape.points[:-4])}"[1:-1]), - ("left", f"{int(shape.points[-4])}"), - ("top", f"{int(shape.points[-3])}"), - ("width", f"{int(shape.points[-2] - shape.points[-4]) + 1}"), - ("height", f"{int(shape.points[-1] - shape.points[-3]) + 1}"), - ])) + dump_data.update( + OrderedDict( + [ + ("rle", f"{list(int (v) for v in shape.points[:-4])}"[1:-1]), + ("left", f"{int(shape.points[-4])}"), + ("top", f"{int(shape.points[-3])}"), + ("width", f"{int(shape.points[-2] - shape.points[-4]) + 1}"), + ("height", f"{int(shape.points[-1] - shape.points[-3]) + 1}"), + ] + ) + ) elif shape.type == "cuboid": - dump_data.update(OrderedDict([ - ("xtl1", "{:.2f}".format(shape.points[0])), - ("ytl1", "{:.2f}".format(shape.points[1])), - ("xbl1", "{:.2f}".format(shape.points[2])), - ("ybl1", "{:.2f}".format(shape.points[3])), - ("xtr1", "{:.2f}".format(shape.points[4])), - ("ytr1", "{:.2f}".format(shape.points[5])), - ("xbr1", "{:.2f}".format(shape.points[6])), - ("ybr1", "{:.2f}".format(shape.points[7])), - ("xtl2", "{:.2f}".format(shape.points[8])), - ("ytl2", "{:.2f}".format(shape.points[9])), - ("xbl2", "{:.2f}".format(shape.points[10])), - ("ybl2", "{:.2f}".format(shape.points[11])), - ("xtr2", "{:.2f}".format(shape.points[12])), - ("ytr2", "{:.2f}".format(shape.points[13])), - ("xbr2", "{:.2f}".format(shape.points[14])), - ("ybr2", "{:.2f}".format(shape.points[15])) - ])) + dump_data.update( + OrderedDict( + [ + ("xtl1", "{:.2f}".format(shape.points[0])), + ("ytl1", "{:.2f}".format(shape.points[1])), + ("xbl1", "{:.2f}".format(shape.points[2])), + ("ybl1", "{:.2f}".format(shape.points[3])), + ("xtr1", "{:.2f}".format(shape.points[4])), + ("ytr1", "{:.2f}".format(shape.points[5])), + ("xbr1", "{:.2f}".format(shape.points[6])), + ("ybr1", "{:.2f}".format(shape.points[7])), + ("xtl2", "{:.2f}".format(shape.points[8])), + ("ytl2", "{:.2f}".format(shape.points[9])), + ("xbl2", "{:.2f}".format(shape.points[10])), + ("ybl2", "{:.2f}".format(shape.points[11])), + ("xtr2", "{:.2f}".format(shape.points[12])), + ("ytr2", "{:.2f}".format(shape.points[13])), + ("xbr2", "{:.2f}".format(shape.points[14])), + ("ybr2", "{:.2f}".format(shape.points[15])), + ] + ) + ) elif shape.type != "skeleton": - dump_data.update(OrderedDict([ - ("points", ';'.join(['{:.2f},{:.2f}'.format(x, y) - for x,y in pairwise(shape.points)])) - ])) + dump_data.update( + OrderedDict( + [ + ( + "points", + ";".join( + [ + "{:.2f},{:.2f}".format(x, y) + for x, y in pairwise(shape.points) + ] + ), + ) + ] + ) + ) if label is None: dump_data["z_order"] = str(shape.z_order) @@ -976,11 +1211,11 @@ def dump_shape(shape, element_shapes=None, label=None): dumper.open_polyline(dump_data) elif shape.type == "points": dumper.open_points(dump_data) - elif shape.type == 'mask': + elif shape.type == "mask": dumper.open_mask(dump_data) elif shape.type == "cuboid": dumper.open_cuboid(dump_data) - elif shape.type == 'skeleton': + elif shape.type == "skeleton": if element_shapes and element_shapes.get(shape.frame): dumper.open_skeleton(dump_data) for element_shape, label in element_shapes.get(shape.frame, []): @@ -988,13 +1223,16 @@ def dump_shape(shape, element_shapes=None, label=None): else: raise NotImplementedError("unknown shape type") - if shape.type == "skeleton" and element_shapes \ - and element_shapes.get(shape.frame) or shape.type != "skeleton": + if ( + shape.type == "skeleton" + and element_shapes + and element_shapes.get(shape.frame) + or shape.type != "skeleton" + ): for attr in shape.attributes: - dumper.add_attribute(OrderedDict([ - ("name", attr.name), - ("value", attr.value) - ])) + dumper.add_attribute( + OrderedDict([("name", attr.name), ("value", attr.value)]) + ) if shape.type == "rectangle": dumper.close_box() @@ -1006,7 +1244,7 @@ def dump_shape(shape, element_shapes=None, label=None): dumper.close_polyline() elif shape.type == "points": dumper.close_points() - elif shape.type == 'mask': + elif shape.type == "mask": dumper.close_mask() elif shape.type == "cuboid": dumper.close_cuboid() @@ -1018,21 +1256,30 @@ def dump_shape(shape, element_shapes=None, label=None): def dump_track(idx, track): track_id = idx - dump_data = OrderedDict([ - ("id", str(track_id)), - ("label", track.label), - ("source", track.source), - ]) - - if hasattr(track, 'task_id'): - task, = filter(lambda task: task.id == track.task_id, annotations.tasks) - dump_data.update(OrderedDict([ - ('task_id', str(track.task_id)), - ('subset', get_defaulted_subset(task.subset, annotations.subsets)), - ])) + dump_data = OrderedDict( + [ + ("id", str(track_id)), + ("label", track.label), + ("source", track.source), + ] + ) + + if hasattr(track, "task_id"): + (task,) = filter(lambda task: task.id == track.task_id, annotations.tasks) + dump_data.update( + OrderedDict( + [ + ("task_id", str(track.task_id)), + ( + "subset", + get_defaulted_subset(task.subset, annotations.subsets), + ), + ] + ) + ) if track.group: - dump_data['group_id'] = str(track.group) + dump_data["group_id"] = str(track.group) dumper.open_track(dump_data) element_shapes = {} @@ -1040,7 +1287,9 @@ def dump_track(idx, track): for element_shape in element_track.shapes: if element_shape.frame not in element_shapes: element_shapes[element_shape.frame] = [] - element_shapes[element_shape.frame].append((element_shape, element_track.label)) + element_shapes[element_shape.frame].append( + (element_shape, element_track.label) + ) for shape in track.shapes: dump_shape(shape, element_shapes) @@ -1053,297 +1302,366 @@ def dump_track(idx, track): counter += 1 for shape in annotations.shapes: - frame_step = annotations.frame_step if not isinstance(annotations, ProjectData) \ + frame_step = ( + annotations.frame_step + if not isinstance(annotations, ProjectData) else annotations.frame_step[shape.task_id] + ) if not isinstance(annotations, ProjectData): - stop_frame = int(annotations.meta[annotations.META_FIELD]['stop_frame']) + stop_frame = int(annotations.meta[annotations.META_FIELD]["stop_frame"]) else: - task_meta = list(filter(lambda task: int(task[1]['id']) == shape.task_id, - annotations.meta[annotations.META_FIELD]['tasks']))[0][1] - stop_frame = int(task_meta['stop_frame']) + task_meta = list( + filter( + lambda task: int(task[1]["id"]) == shape.task_id, + annotations.meta[annotations.META_FIELD]["tasks"], + ) + )[0][1] + stop_frame = int(task_meta["stop_frame"]) track = { - 'label': shape.label, - 'group': shape.group, - 'source': shape.source, - 'shapes': [annotations.TrackedShape( - type=shape.type, - points=shape.points, - rotation=shape.rotation, - occluded=shape.occluded, - outside=False, - keyframe=True, - z_order=shape.z_order, - frame=shape.frame, - attributes=shape.attributes, - )] + - ( # add a finishing frame if it does not hop over the last frame - [annotations.TrackedShape( - type=shape.type, - points=shape.points, - rotation=shape.rotation, - occluded=shape.occluded, - outside=True, - keyframe=True, - z_order=shape.z_order, - frame=shape.frame + frame_step, - attributes=shape.attributes, - )] if shape.frame + frame_step < \ - stop_frame \ - else [] - ), - 'elements': [annotations.Track( - label=element.label, - group=element.group, - source=element.source, - shapes=[annotations.TrackedShape( - type=element.type, - points=element.points, - rotation=element.rotation, - occluded=element.occluded, - outside=element.outside, - keyframe=True, - z_order=element.z_order, - frame=element.frame, - attributes=element.attributes, - )] + - ( # add a finishing frame if it does not hop over the last frame - [annotations.TrackedShape( - type=element.type, - points=element.points, - rotation=element.rotation, - occluded=element.occluded, - outside=True, + "label": shape.label, + "group": shape.group, + "source": shape.source, + "shapes": [ + annotations.TrackedShape( + type=shape.type, + points=shape.points, + rotation=shape.rotation, + occluded=shape.occluded, + outside=False, keyframe=True, - z_order=element.z_order, - frame=element.frame + frame_step, - attributes=element.attributes, - )] if element.frame + frame_step < \ - stop_frame \ + z_order=shape.z_order, + frame=shape.frame, + attributes=shape.attributes, + ) + ] + + ( # add a finishing frame if it does not hop over the last frame + [ + annotations.TrackedShape( + type=shape.type, + points=shape.points, + rotation=shape.rotation, + occluded=shape.occluded, + outside=True, + keyframe=True, + z_order=shape.z_order, + frame=shape.frame + frame_step, + attributes=shape.attributes, + ) + ] + if shape.frame + frame_step < stop_frame else [] - ), - elements=[], - ) for element in shape.elements] + ), + "elements": [ + annotations.Track( + label=element.label, + group=element.group, + source=element.source, + shapes=[ + annotations.TrackedShape( + type=element.type, + points=element.points, + rotation=element.rotation, + occluded=element.occluded, + outside=element.outside, + keyframe=True, + z_order=element.z_order, + frame=element.frame, + attributes=element.attributes, + ) + ] + + ( # add a finishing frame if it does not hop over the last frame + [ + annotations.TrackedShape( + type=element.type, + points=element.points, + rotation=element.rotation, + occluded=element.occluded, + outside=True, + keyframe=True, + z_order=element.z_order, + frame=element.frame + frame_step, + attributes=element.attributes, + ) + ] + if element.frame + frame_step < stop_frame + else [] + ), + elements=[], + ) + for element in shape.elements + ], } if isinstance(annotations, ProjectData): - track['task_id'] = shape.task_id - for element in track['elements']: + track["task_id"] = shape.task_id + for element in track["elements"]: element.task_id = shape.task_id dump_track(counter, annotations.Track(**track)) counter += 1 dumper.close_root() + def load_anno(file_object, annotations): - supported_shapes = ('box', 'ellipse', 'polygon', 'polyline', 'points', 'cuboid', 'skeleton', 'mask') + supported_shapes = ( + "box", + "ellipse", + "polygon", + "polyline", + "points", + "cuboid", + "skeleton", + "mask", + ) context = ElementTree.iterparse(file_object, events=("start", "end")) context = iter(context) next(context) track = None shape = None - shape_element=None + shape_element = None tag = None image_is_opened = False attributes = None elem_attributes = None track_elements = None for ev, el in context: - if ev == 'start': - if el.tag == 'track': + if ev == "start": + if el.tag == "track": track = annotations.Track( - label=el.attrib['label'], - group=int(el.attrib.get('group_id', 0)), - source='file', + label=el.attrib["label"], + group=int(el.attrib.get("group_id", 0)), + source="file", shapes=[], elements=[], ) - elif el.tag == 'image': + elif el.tag == "image": image_is_opened = True - frame_id = annotations.abs_frame_id(match_dm_item( - DatasetItem(id=osp.splitext(el.attrib['name'])[0], - attributes={'frame': el.attrib['id']}, - image=el.attrib['name'] - ), - instance_data=annotations - )) + frame_id = annotations.abs_frame_id( + match_dm_item( + DatasetItem( + id=osp.splitext(el.attrib["name"])[0], + attributes={"frame": el.attrib["id"]}, + image=el.attrib["name"], + ), + instance_data=annotations, + ) + ) elif el.tag in supported_shapes and (track is not None or image_is_opened): - if shape and shape['type'] == 'skeleton': + if shape and shape["type"] == "skeleton": elem_attributes = [] shape_element = { - 'attributes': elem_attributes, - 'points': [], - 'type': 'rectangle' if el.tag == 'box' else el.tag + "attributes": elem_attributes, + "points": [], + "type": "rectangle" if el.tag == "box" else el.tag, } - if track is not None and el.attrib['label'] not in track_elements: - track_elements[el.attrib['label']] = annotations.Track( - label=el.attrib['label'], + if track is not None and el.attrib["label"] not in track_elements: + track_elements[el.attrib["label"]] = annotations.Track( + label=el.attrib["label"], group=0, - source='file', + source="file", shapes=[], elements=[], ) else: attributes = [] shape = { - 'attributes': attributes, - 'points': [], - 'type': 'rectangle' if el.tag == 'box' else el.tag + "attributes": attributes, + "points": [], + "type": "rectangle" if el.tag == "box" else el.tag, } if track is None: - shape['elements'] = [] - elif shape['type'] == 'skeleton': - shape['frame'] = el.attrib['frame'] + shape["elements"] = [] + elif shape["type"] == "skeleton": + shape["frame"] = el.attrib["frame"] if track_elements is None: track_elements = {} - elif el.tag == 'tag' and image_is_opened: + elif el.tag == "tag" and image_is_opened: attributes = [] tag = { - 'frame': frame_id, - 'label': el.attrib['label'], - 'group': int(el.attrib.get('group_id', 0)), - 'attributes': attributes, - 'source': 'file', + "frame": frame_id, + "label": el.attrib["label"], + "group": int(el.attrib.get("group_id", 0)), + "attributes": attributes, + "source": "file", } - elif ev == 'end': - if el.tag == 'attribute' and elem_attributes is not None and shape_element is not None: - elem_attributes.append(annotations.Attribute( - name=el.attrib['name'], - value=el.text or "", - )) - if el.tag == 'attribute' and attributes is not None and shape_element is None: - attributes.append(annotations.Attribute( - name=el.attrib['name'], - value=el.text or "", - )) - if el.tag in supported_shapes and shape['type'] == 'skeleton' and el.tag != 'skeleton': - shape_element['label'] = el.attrib['label'] - - shape_element['occluded'] = el.attrib['occluded'] == '1' - shape_element['outside'] = el.attrib['outside'] == '1' - shape_element['elements'] = [] - - if el.tag == 'box': - shape_element['points'].append(el.attrib['xtl']) - shape_element['points'].append(el.attrib['ytl']) - shape_element['points'].append(el.attrib['xbr']) - shape_element['points'].append(el.attrib['ybr']) - elif el.tag == 'ellipse': - shape_element['points'].append(el.attrib['cx']) - shape_element['points'].append(el.attrib['cy']) - shape_element['points'].append("{:.2f}".format(float(el.attrib['cx']) + float(el.attrib['rx']))) - shape_element['points'].append("{:.2f}".format(float(el.attrib['cy']) - float(el.attrib['ry']))) - elif el.tag == 'cuboid': - shape_element['points'].append(el.attrib['xtl1']) - shape_element['points'].append(el.attrib['ytl1']) - shape_element['points'].append(el.attrib['xbl1']) - shape_element['points'].append(el.attrib['ybl1']) - shape_element['points'].append(el.attrib['xtr1']) - shape_element['points'].append(el.attrib['ytr1']) - shape_element['points'].append(el.attrib['xbr1']) - shape_element['points'].append(el.attrib['ybr1']) - - shape_element['points'].append(el.attrib['xtl2']) - shape_element['points'].append(el.attrib['ytl2']) - shape_element['points'].append(el.attrib['xbl2']) - shape_element['points'].append(el.attrib['ybl2']) - shape_element['points'].append(el.attrib['xtr2']) - shape_element['points'].append(el.attrib['ytr2']) - shape_element['points'].append(el.attrib['xbr2']) - shape_element['points'].append(el.attrib['ybr2']) + elif ev == "end": + if ( + el.tag == "attribute" + and elem_attributes is not None + and shape_element is not None + ): + elem_attributes.append( + annotations.Attribute( + name=el.attrib["name"], + value=el.text or "", + ) + ) + if ( + el.tag == "attribute" + and attributes is not None + and shape_element is None + ): + attributes.append( + annotations.Attribute( + name=el.attrib["name"], + value=el.text or "", + ) + ) + if ( + el.tag in supported_shapes + and shape["type"] == "skeleton" + and el.tag != "skeleton" + ): + shape_element["label"] = el.attrib["label"] + + shape_element["occluded"] = el.attrib["occluded"] == "1" + shape_element["outside"] = el.attrib["outside"] == "1" + shape_element["elements"] = [] + + if el.tag == "box": + shape_element["points"].append(el.attrib["xtl"]) + shape_element["points"].append(el.attrib["ytl"]) + shape_element["points"].append(el.attrib["xbr"]) + shape_element["points"].append(el.attrib["ybr"]) + elif el.tag == "ellipse": + shape_element["points"].append(el.attrib["cx"]) + shape_element["points"].append(el.attrib["cy"]) + shape_element["points"].append( + "{:.2f}".format(float(el.attrib["cx"]) + float(el.attrib["rx"])) + ) + shape_element["points"].append( + "{:.2f}".format(float(el.attrib["cy"]) - float(el.attrib["ry"])) + ) + elif el.tag == "cuboid": + shape_element["points"].append(el.attrib["xtl1"]) + shape_element["points"].append(el.attrib["ytl1"]) + shape_element["points"].append(el.attrib["xbl1"]) + shape_element["points"].append(el.attrib["ybl1"]) + shape_element["points"].append(el.attrib["xtr1"]) + shape_element["points"].append(el.attrib["ytr1"]) + shape_element["points"].append(el.attrib["xbr1"]) + shape_element["points"].append(el.attrib["ybr1"]) + + shape_element["points"].append(el.attrib["xtl2"]) + shape_element["points"].append(el.attrib["ytl2"]) + shape_element["points"].append(el.attrib["xbl2"]) + shape_element["points"].append(el.attrib["ybl2"]) + shape_element["points"].append(el.attrib["xtr2"]) + shape_element["points"].append(el.attrib["ytr2"]) + shape_element["points"].append(el.attrib["xbr2"]) + shape_element["points"].append(el.attrib["ybr2"]) else: - for pair in el.attrib['points'].split(';'): - shape_element['points'].extend(map(float, pair.split(','))) + for pair in el.attrib["points"].split(";"): + shape_element["points"].extend(map(float, pair.split(","))) if track is None: - shape_element['frame'] = frame_id - shape_element['source'] = 'file' - shape['elements'].append(annotations.LabeledShape(**shape_element)) + shape_element["frame"] = frame_id + shape_element["source"] = "file" + shape["elements"].append(annotations.LabeledShape(**shape_element)) else: - shape_element["frame"] = shape['frame'] - shape_element['keyframe'] = el.attrib['keyframe'] == "1" - if shape_element['keyframe']: - track_elements[el.attrib['label']].shapes.append(annotations.TrackedShape(**shape_element)) + shape_element["frame"] = shape["frame"] + shape_element["keyframe"] = el.attrib["keyframe"] == "1" + if shape_element["keyframe"]: + track_elements[el.attrib["label"]].shapes.append( + annotations.TrackedShape(**shape_element) + ) shape_element = None elif el.tag in supported_shapes: if track is not None: - shape['frame'] = el.attrib['frame'] - shape['outside'] = el.attrib.get('outside', "0") == "1" - shape['keyframe'] = el.attrib['keyframe'] == "1" + shape["frame"] = el.attrib["frame"] + shape["outside"] = el.attrib.get("outside", "0") == "1" + shape["keyframe"] = el.attrib["keyframe"] == "1" else: - shape['frame'] = frame_id - shape['label'] = el.attrib['label'] - shape['group'] = int(el.attrib.get('group_id', 0)) - shape['source'] = 'file' - shape['outside'] = False - - shape['occluded'] = el.attrib.get('occluded', "0") == '1' - shape['z_order'] = int(el.attrib.get('z_order', 0)) - shape['rotation'] = float(el.attrib.get('rotation', 0)) - - if el.tag == 'box': - shape['points'].append(el.attrib['xtl']) - shape['points'].append(el.attrib['ytl']) - shape['points'].append(el.attrib['xbr']) - shape['points'].append(el.attrib['ybr']) - elif el.tag == 'ellipse': - shape['points'].append(el.attrib['cx']) - shape['points'].append(el.attrib['cy']) - shape['points'].append("{:.2f}".format(float(el.attrib['cx']) + float(el.attrib['rx']))) - shape['points'].append("{:.2f}".format(float(el.attrib['cy']) - float(el.attrib['ry']))) - elif el.tag == 'mask': - shape['points'] = el.attrib['rle'].split(',') - shape['points'].append(el.attrib['left']) - shape['points'].append(el.attrib['top']) - shape['points'].append("{}".format(int(el.attrib['left']) + int(el.attrib['width']) - 1)) - shape['points'].append("{}".format(int(el.attrib['top']) + int(el.attrib['height']) - 1)) - elif el.tag == 'cuboid': - shape['points'].append(el.attrib['xtl1']) - shape['points'].append(el.attrib['ytl1']) - shape['points'].append(el.attrib['xbl1']) - shape['points'].append(el.attrib['ybl1']) - shape['points'].append(el.attrib['xtr1']) - shape['points'].append(el.attrib['ytr1']) - shape['points'].append(el.attrib['xbr1']) - shape['points'].append(el.attrib['ybr1']) - - shape['points'].append(el.attrib['xtl2']) - shape['points'].append(el.attrib['ytl2']) - shape['points'].append(el.attrib['xbl2']) - shape['points'].append(el.attrib['ybl2']) - shape['points'].append(el.attrib['xtr2']) - shape['points'].append(el.attrib['ytr2']) - shape['points'].append(el.attrib['xbr2']) - shape['points'].append(el.attrib['ybr2']) - elif el.tag == 'skeleton': + shape["frame"] = frame_id + shape["label"] = el.attrib["label"] + shape["group"] = int(el.attrib.get("group_id", 0)) + shape["source"] = "file" + shape["outside"] = False + + shape["occluded"] = el.attrib.get("occluded", "0") == "1" + shape["z_order"] = int(el.attrib.get("z_order", 0)) + shape["rotation"] = float(el.attrib.get("rotation", 0)) + + if el.tag == "box": + shape["points"].append(el.attrib["xtl"]) + shape["points"].append(el.attrib["ytl"]) + shape["points"].append(el.attrib["xbr"]) + shape["points"].append(el.attrib["ybr"]) + elif el.tag == "ellipse": + shape["points"].append(el.attrib["cx"]) + shape["points"].append(el.attrib["cy"]) + shape["points"].append( + "{:.2f}".format(float(el.attrib["cx"]) + float(el.attrib["rx"])) + ) + shape["points"].append( + "{:.2f}".format(float(el.attrib["cy"]) - float(el.attrib["ry"])) + ) + elif el.tag == "mask": + shape["points"] = el.attrib["rle"].split(",") + shape["points"].append(el.attrib["left"]) + shape["points"].append(el.attrib["top"]) + shape["points"].append( + "{}".format( + int(el.attrib["left"]) + int(el.attrib["width"]) - 1 + ) + ) + shape["points"].append( + "{}".format( + int(el.attrib["top"]) + int(el.attrib["height"]) - 1 + ) + ) + elif el.tag == "cuboid": + shape["points"].append(el.attrib["xtl1"]) + shape["points"].append(el.attrib["ytl1"]) + shape["points"].append(el.attrib["xbl1"]) + shape["points"].append(el.attrib["ybl1"]) + shape["points"].append(el.attrib["xtr1"]) + shape["points"].append(el.attrib["ytr1"]) + shape["points"].append(el.attrib["xbr1"]) + shape["points"].append(el.attrib["ybr1"]) + + shape["points"].append(el.attrib["xtl2"]) + shape["points"].append(el.attrib["ytl2"]) + shape["points"].append(el.attrib["xbl2"]) + shape["points"].append(el.attrib["ybl2"]) + shape["points"].append(el.attrib["xtr2"]) + shape["points"].append(el.attrib["ytr2"]) + shape["points"].append(el.attrib["xbr2"]) + shape["points"].append(el.attrib["ybr2"]) + elif el.tag == "skeleton": pass else: - for pair in el.attrib['points'].split(';'): - shape['points'].extend(map(float, pair.split(','))) + for pair in el.attrib["points"].split(";"): + shape["points"].extend(map(float, pair.split(","))) if track is not None: - if shape['keyframe']: + if shape["keyframe"]: track.shapes.append(annotations.TrackedShape(**shape)) else: annotations.add_shape(annotations.LabeledShape(**shape)) shape = None - elif el.tag == 'track': - if track.shapes[0].type == 'mask': + elif el.tag == "track": + if track.shapes[0].type == "mask": # convert mask tracks to shapes # because mask track are not supported - annotations.add_shape(annotations.LabeledShape(**{ - 'attributes': track.shapes[0].attributes, - 'points': track.shapes[0].points, - 'type': track.shapes[0].type, - 'occluded': track.shapes[0].occluded, - 'frame': track.shapes[0].frame, - 'source': track.shapes[0].source, - 'rotation': track.shapes[0].rotation, - 'z_order': track.shapes[0].z_order, - 'group': track.shapes[0].group, - 'label': track.label, - })) + annotations.add_shape( + annotations.LabeledShape( + **{ + "attributes": track.shapes[0].attributes, + "points": track.shapes[0].points, + "type": track.shapes[0].type, + "occluded": track.shapes[0].occluded, + "frame": track.shapes[0].frame, + "source": track.shapes[0].source, + "rotation": track.shapes[0].rotation, + "z_order": track.shapes[0].z_order, + "group": track.shapes[0].group, + "label": track.label, + } + ) + ) else: if track_elements is not None: for element in track_elements.values(): @@ -1351,89 +1669,138 @@ def load_anno(file_object, annotations): track_elements = None annotations.add_track(track) track = None - elif el.tag == 'image': + elif el.tag == "image": image_is_opened = False - elif el.tag == 'tag': + elif el.tag == "tag": annotations.add_tag(annotations.Tag(**tag)) tag = None el.clear() + def dump_task_or_job_anno(dst_file, instance_data, callback): dumper = create_xml_dumper(dst_file) dumper.open_document() callback(dumper, instance_data) dumper.close_document() -def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callback: Callable): + +def dump_project_anno( + dst_file: BufferedWriter, project_data: ProjectData, callback: Callable +): dumper = create_xml_dumper(dst_file) dumper.open_document() callback(dumper, project_data) dumper.close_document() -def dump_media_files(instance_data: CommonData, img_dir: str, project_data: ProjectData = None): - ext = '' - if instance_data.meta[instance_data.META_FIELD]['mode'] == 'interpolation': + +def dump_media_files( + instance_data: CommonData, img_dir: str, project_data: ProjectData = None +): + ext = "" + if instance_data.meta[instance_data.META_FIELD]["mode"] == "interpolation": ext = FrameProvider.VIDEO_FRAME_EXT frame_provider = FrameProvider(instance_data.db_data) frames = frame_provider.get_frames( - instance_data.start, instance_data.stop, + instance_data.start, + instance_data.stop, frame_provider.Quality.ORIGINAL, - frame_provider.Type.BUFFER) + frame_provider.Type.BUFFER, + ) for frame_id, (frame_data, _) in zip(instance_data.rel_range, frames): - if (project_data is not None and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames) \ - or frame_id in instance_data.deleted_frames: + if ( + project_data is not None + and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames + ) or frame_id in instance_data.deleted_frames: continue - frame_name = instance_data.frame_info[frame_id]['path'] if project_data is None \ - else project_data.frame_info[(instance_data.db_instance.id, frame_id)]['path'] + frame_name = ( + instance_data.frame_info[frame_id]["path"] + if project_data is None + else project_data.frame_info[(instance_data.db_instance.id, frame_id)][ + "path" + ] + ) img_path = osp.join(img_dir, frame_name + ext) os.makedirs(osp.dirname(img_path), exist_ok=True) - with open(img_path, 'wb') as f: + with open(img_path, "wb") as f: f.write(frame_data.getvalue()) -def _export_task_or_job(dst_file, temp_dir, instance_data, anno_callback, save_images=False): - with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: + +def _export_task_or_job( + dst_file, temp_dir, instance_data, anno_callback, save_images=False +): + with open(osp.join(temp_dir, "annotations.xml"), "wb") as f: dump_task_or_job_anno(f, instance_data, anno_callback) if save_images: - dump_media_files(instance_data, osp.join(temp_dir, 'images')) + dump_media_files(instance_data, osp.join(temp_dir, "images")) make_zip_archive(temp_dir, dst_file) -def _export_project(dst_file: str, temp_dir: str, project_data: ProjectData, - anno_callback: Callable, save_images: bool=False + +def _export_project( + dst_file: str, + temp_dir: str, + project_data: ProjectData, + anno_callback: Callable, + save_images: bool = False, ): - with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: + with open(osp.join(temp_dir, "annotations.xml"), "wb") as f: dump_project_anno(f, project_data, anno_callback) if save_images: for task_data in project_data.task_data: - subset = get_defaulted_subset(task_data.db_instance.subset, project_data.subsets) - subset_dir = osp.join(temp_dir, 'images', subset) + subset = get_defaulted_subset( + task_data.db_instance.subset, project_data.subsets + ) + subset_dir = osp.join(temp_dir, "images", subset) os.makedirs(subset_dir, exist_ok=True) dump_media_files(task_data, subset_dir, project_data) make_zip_archive(temp_dir, dst_file) -@exporter(name='CVAT for video', ext='ZIP', version='1.1') + +@exporter(name="CVAT for video", ext="ZIP", version="1.1") def _export_video(dst_file, temp_dir, instance_data, save_images=False): if isinstance(instance_data, ProjectData): - _export_project(dst_file, temp_dir, instance_data, - anno_callback=dump_as_cvat_interpolation, save_images=save_images) + _export_project( + dst_file, + temp_dir, + instance_data, + anno_callback=dump_as_cvat_interpolation, + save_images=save_images, + ) else: - _export_task_or_job(dst_file, temp_dir, instance_data, - anno_callback=dump_as_cvat_interpolation, save_images=save_images) + _export_task_or_job( + dst_file, + temp_dir, + instance_data, + anno_callback=dump_as_cvat_interpolation, + save_images=save_images, + ) -@exporter(name='CVAT for images', ext='ZIP', version='1.1') + +@exporter(name="CVAT for images", ext="ZIP", version="1.1") def _export_images(dst_file, temp_dir, instance_data, save_images=False): if isinstance(instance_data, ProjectData): - _export_project(dst_file, temp_dir, instance_data, - anno_callback=dump_as_cvat_annotation, save_images=save_images) + _export_project( + dst_file, + temp_dir, + instance_data, + anno_callback=dump_as_cvat_annotation, + save_images=save_images, + ) else: - _export_task_or_job(dst_file, temp_dir, instance_data, - anno_callback=dump_as_cvat_annotation, save_images=save_images) + _export_task_or_job( + dst_file, + temp_dir, + instance_data, + anno_callback=dump_as_cvat_annotation, + save_images=save_images, + ) + -@importer(name='CVAT', ext='XML, ZIP', version='1.1') +@importer(name="CVAT", ext="XML, ZIP", version="1.1") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) @@ -1441,13 +1808,13 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs zipfile.ZipFile(src_file).extractall(temp_dir) if isinstance(instance_data, ProjectData): - detect_dataset(temp_dir, format_name='cvat', importer=_CvatImporter) - dataset = Dataset.import_from(temp_dir, 'cvat', env=dm_env) + detect_dataset(temp_dir, format_name="cvat", importer=_CvatImporter) + dataset = Dataset.import_from(temp_dir, "cvat", env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) else: - anno_paths = glob(osp.join(temp_dir, '**', '*.xml'), recursive=True) + anno_paths = glob(osp.join(temp_dir, "**", "*.xml"), recursive=True) for p in anno_paths: load_anno(p, instance_data) else: diff --git a/cvat/apps/dataset_manager/formats/datumaro.py b/cvat/apps/dataset_manager/formats/datumaro.py index 090397b7a471..b3cede391d36 100644 --- a/cvat/apps/dataset_manager/formats/datumaro.py +++ b/cvat/apps/dataset_manager/formats/datumaro.py @@ -9,62 +9,76 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - import_dm_annotations) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from cvat.apps.engine.models import DimensionType from .registry import dm_env, exporter, importer + class DeleteImagePath(ItemTransform): def transform_item(self, item): image = None if item.has_image and item.image.has_data: image = Image(data=item.image.data, size=item.image.size) - return item.wrap(image=image, point_cloud='', related_images=[]) + return item.wrap(image=image, point_cloud="", related_images=[]) @exporter(name="Datumaro", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor(instance_data=instance_data, include_images=save_images) as extractor: + with GetCVATDataExtractor( + instance_data=instance_data, include_images=save_images + ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) if not save_images: dataset.transform(DeleteImagePath) - dataset.export(temp_dir, 'datumaro', save_images=save_images) + dataset.export(temp_dir, "datumaro", save_images=save_images) make_zip_archive(temp_dir, dst_file) + @importer(name="Datumaro", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro')) - dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env) + detect_dataset( + temp_dir, format_name="datumaro", importer=dm_env.importers.get("datumaro") + ) + dataset = Dataset.import_from(temp_dir, "datumaro", env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) + @exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor( - instance_data=instance_data, include_images=save_images, + instance_data=instance_data, + include_images=save_images, dimension=DimensionType.DIM_3D, ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) if not save_images: dataset.transform(DeleteImagePath) - dataset.export(temp_dir, 'datumaro', save_images=save_images) + dataset.export(temp_dir, "datumaro", save_images=save_images) make_zip_archive(temp_dir, dst_file) + @importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro')) - dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env) + detect_dataset( + temp_dir, format_name="datumaro", importer=dm_env.importers.get("datumaro") + ) + dataset = Dataset.import_from(temp_dir, "datumaro", env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/icdar.py b/cvat/apps/dataset_manager/formats/icdar.py index 5d031eef82b0..bf0968d4ae22 100644 --- a/cvat/apps/dataset_manager/formats/icdar.py +++ b/cvat/apps/dataset_manager/formats/icdar.py @@ -5,13 +5,19 @@ import zipfile -from datumaro.components.annotation import (AnnotationType, Caption, Label, - LabelCategories) +from datumaro.components.annotation import ( + AnnotationType, + Caption, + Label, + LabelCategories, +) from datumaro.components.dataset import Dataset from datumaro.components.extractor import ItemTransform -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, - import_dm_annotations) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -36,11 +42,15 @@ def categories(self): def transform_item(self, item): annotations = item.annotations for ann in annotations: - if ann.type in [AnnotationType.polygon, - AnnotationType.bbox, AnnotationType.mask]: + if ann.type in [ + AnnotationType.polygon, + AnnotationType.bbox, + AnnotationType.mask, + ]: ann.label = self._label return item.wrap(annotations=annotations) + class CaptionToLabel(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -58,90 +68,92 @@ def categories(self): def transform_item(self, item): annotations = item.annotations - captions = [ann for ann in annotations - if ann.type == AnnotationType.caption] + captions = [ann for ann in annotations if ann.type == AnnotationType.caption] for ann in captions: - annotations.append(Label(self._label, - attributes={'text': ann.caption})) + annotations.append(Label(self._label, attributes={"text": ann.caption})) annotations.remove(ann) return item.wrap(annotations=annotations) + class LabelToCaption(ItemTransform): def transform_item(self, item): annotations = item.annotations - anns = [p for p in annotations - if 'text' in p.attributes] + anns = [p for p in annotations if "text" in p.attributes] for ann in anns: - annotations.append(Caption(ann.attributes['text'])) + annotations.append(Caption(ann.attributes["text"])) annotations.remove(ann) return item.wrap(annotations=annotations) -@exporter(name='ICDAR Recognition', ext='ZIP', version='1.0') + +@exporter(name="ICDAR Recognition", ext="ZIP", version="1.0") def _export_recognition(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(LabelToCaption) - dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images) + dataset.export(temp_dir, "icdar_word_recognition", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='ICDAR Recognition', ext='ZIP', version='1.0') + +@importer(name="ICDAR Recognition", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) # We do not run detect_dataset before import because the ICDAR format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - dataset = Dataset.import_from(temp_dir, 'icdar_word_recognition', env=dm_env) - dataset.transform(CaptionToLabel, label='icdar') + dataset = Dataset.import_from(temp_dir, "icdar_word_recognition", env=dm_env) + dataset.transform(CaptionToLabel, label="icdar") if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) -@exporter(name='ICDAR Localization', ext='ZIP', version='1.0') +@exporter(name="ICDAR Localization", ext="ZIP", version="1.0") def _export_localization(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'icdar_text_localization', save_images=save_images) + dataset.export(temp_dir, "icdar_text_localization", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='ICDAR Localization', ext='ZIP', version='1.0') + +@importer(name="ICDAR Localization", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) # We do not run detect_dataset before import because the ICDAR format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - dataset = Dataset.import_from(temp_dir, 'icdar_text_localization', env=dm_env) - dataset.transform(AddLabelToAnns, label='icdar') + dataset = Dataset.import_from(temp_dir, "icdar_text_localization", env=dm_env) + dataset.transform(AddLabelToAnns, label="icdar") if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) -@exporter(name='ICDAR Segmentation', ext='ZIP', version='1.0') +@exporter(name="ICDAR Segmentation", ext="ZIP", version="1.0") def _export_segmentation(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform('polygons_to_masks') - dataset.transform('boxes_to_masks') - dataset.transform('merge_instance_segments') - dataset.export(temp_dir, 'icdar_text_segmentation', save_images=save_images) + dataset.transform("polygons_to_masks") + dataset.transform("boxes_to_masks") + dataset.transform("merge_instance_segments") + dataset.export(temp_dir, "icdar_text_segmentation", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='ICDAR Segmentation', ext='ZIP', version='1.0') + +@importer(name="ICDAR Segmentation", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) # We do not run detect_dataset before import because the ICDAR format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - dataset = Dataset.import_from(temp_dir, 'icdar_text_segmentation', env=dm_env) - dataset.transform(AddLabelToAnns, label='icdar') + dataset = Dataset.import_from(temp_dir, "icdar_text_segmentation", env=dm_env) + dataset.transform(AddLabelToAnns, label="icdar") dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/imagenet.py b/cvat/apps/dataset_manager/formats/imagenet.py index fd5e9a99a176..45da41715547 100644 --- a/cvat/apps/dataset_manager/formats/imagenet.py +++ b/cvat/apps/dataset_manager/formats/imagenet.py @@ -9,35 +9,38 @@ from datumaro.components.dataset import Dataset -from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, \ - import_dm_annotations +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name='ImageNet', ext='ZIP', version='1.0') +@exporter(name="ImageNet", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) if save_images: - dataset.export(temp_dir, 'imagenet', save_images=save_images) + dataset.export(temp_dir, "imagenet", save_images=save_images) else: - dataset.export(temp_dir, 'imagenet_txt', save_images=save_images) + dataset.export(temp_dir, "imagenet_txt", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='ImageNet', ext='ZIP', version='1.0') + +@importer(name="ImageNet", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) # We do not run detect_dataset before import because the Imagenet format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - if glob(osp.join(temp_dir, '*.txt')): - dataset = Dataset.import_from(temp_dir, 'imagenet_txt', env=dm_env) + if glob(osp.join(temp_dir, "*.txt")): + dataset = Dataset.import_from(temp_dir, "imagenet_txt", env=dm_env) else: - dataset = Dataset.import_from(temp_dir, 'imagenet', env=dm_env) + dataset = Dataset.import_from(temp_dir, "imagenet", env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/kitti.py b/cvat/apps/dataset_manager/formats/kitti.py index 01e1cd3fc4bc..1784cdbbf670 100644 --- a/cvat/apps/dataset_manager/formats/kitti.py +++ b/cvat/apps/dataset_manager/formats/kitti.py @@ -10,7 +10,11 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, import_dm_annotations) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -18,22 +22,26 @@ from .utils import make_colormap -@exporter(name='KITTI', ext='ZIP', version='1.0') +@exporter(name="KITTI", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform('polygons_to_masks') - dataset.transform('merge_instance_segments') - dataset.export(temp_dir, format='kitti', + dataset.transform("polygons_to_masks") + dataset.transform("merge_instance_segments") + dataset.export( + temp_dir, + format="kitti", label_map={k: v[0] for k, v in make_colormap(instance_data).items()}, - apply_colormap=True, save_images=save_images + apply_colormap=True, + save_images=save_images, ) make_zip_archive(temp_dir, dst_file) -@importer(name='KITTI', ext='ZIP', version='1.0') + +@importer(name="KITTI", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) @@ -42,12 +50,15 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs if not osp.isfile(color_map_path): write_label_map(color_map_path, color_map) - detect_dataset(temp_dir, format_name='kitti', importer=dm_env.importers.get('kitti')) - dataset = Dataset.import_from(temp_dir, format='kitti', env=dm_env) - labels_meta = instance_data.meta[instance_data.META_FIELD]['labels'] - if 'background' not in [label['name'] for _, label in labels_meta]: - dataset.filter('/item/annotation[label != "background"]', - filter_annotations=True) + detect_dataset( + temp_dir, format_name="kitti", importer=dm_env.importers.get("kitti") + ) + dataset = Dataset.import_from(temp_dir, format="kitti", env=dm_env) + labels_meta = instance_data.meta[instance_data.META_FIELD]["labels"] + if "background" not in [label["name"] for _, label in labels_meta]: + dataset.filter( + '/item/annotation[label != "background"]', filter_annotations=True + ) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index be9679f268e8..a797f0cd492f 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -6,28 +6,36 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - import_dm_annotations) -from cvat.apps.dataset_manager.formats.transformations import MaskToPolygonTransformation +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) +from cvat.apps.dataset_manager.formats.transformations import ( + MaskToPolygonTransformation, +) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name='LabelMe', ext='ZIP', version='3.0') +@exporter(name="LabelMe", ext="ZIP", version="3.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'label_me', save_images=save_images) + dataset.export(temp_dir, "label_me", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='LabelMe', ext='ZIP', version='3.0') + +@importer(name="LabelMe", ext="ZIP", version="3.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name='label_me', importer=dm_env.importers.get('label_me')) - dataset = Dataset.import_from(temp_dir, 'label_me', env=dm_env) + detect_dataset( + temp_dir, format_name="label_me", importer=dm_env.importers.get("label_me") + ) + dataset = Dataset.import_from(temp_dir, "label_me", env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/lfw.py b/cvat/apps/dataset_manager/formats/lfw.py index 0af356332bb5..b6aa35a4ef13 100644 --- a/cvat/apps/dataset_manager/formats/lfw.py +++ b/cvat/apps/dataset_manager/formats/lfw.py @@ -6,27 +6,31 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - import_dm_annotations) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@importer(name='LFW', ext='ZIP', version='1.0') +@importer(name="LFW", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name='lfw', importer=dm_env.importers.get('lfw')) - dataset = Dataset.import_from(temp_dir, 'lfw') + detect_dataset(temp_dir, format_name="lfw", importer=dm_env.importers.get("lfw")) + dataset = Dataset.import_from(temp_dir, "lfw") if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) -@exporter(name='LFW', ext='ZIP', version='1.0') + +@exporter(name="LFW", ext="ZIP", version="1.0") def _exporter(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, format='lfw', save_images=save_images) + dataset.export(temp_dir, format="lfw", save_images=save_images) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/librispeech.py b/cvat/apps/dataset_manager/formats/librispeech.py index 73c46cdcb49d..76cb51c68ee4 100644 --- a/cvat/apps/dataset_manager/formats/librispeech.py +++ b/cvat/apps/dataset_manager/formats/librispeech.py @@ -6,100 +6,88 @@ import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer - - - - - - +from cvat.apps.engine.models import Task, Job def load_anno(file_object, annotations): if isinstance(file_object, str): - with open(file_object, 'r', encoding='utf-8') as f: + with open(file_object, "r", encoding="utf-8") as f: content = f.read() lines = content.splitlines() - headers = lines[0].split('\t') + headers = lines[0].split("\t") label_data = InstanceLabelData(annotations.db_instance) - - + task_id = annotations.db_instance.id + task = Task.objects.get(id=task_id) + jobs = Job.objects.filter(segment__task=task) for line in lines[1:]: - fields = line.split('\t') + fields = line.split("\t") record = dict(zip(headers, fields)) - job_id = record.get('job_id') + if "job_id" in record: + job_id = record.get("job_id") + else: + job_index_id = int(record.get("job index")) + job_id = jobs[job_index_id].id - start = float(record.get('start', 0)) - end = float(record.get('end', 0)) + start = float(record.get("start", 0)) + end = float(record.get("end", 0)) - label_name = record.get('label') + label_name = record.get("label") label_id = label_data._get_label_id(label_name) - spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) - - + spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get('language',0)) - + language_id = int(record.get("language", 0)) shapes_data = [ { "type": "rectangle", "label": record.get("label", ""), "points": [start, start, end, end], - "frame":0, - "occluded" : False, + "frame": 0, + "occluded": False, "z_order": 0, "group": None, "source": "manual", "transcript": record.get("text", ""), "gender": record.get("gender", ""), - "age": record.get("age",""), - "locale":language_id_to_locale_mapping.get(language_id, ""), - "accent": record.get("accent",""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent", ""), "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } ] + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } + ], } ] - - data = { - 'shapes': shapes_data - } + data = {"shapes": shapes_data} serializer = LabeledDataSerializer(data=data) pk = int(job_id) action = PatchAction.CREATE if serializer.is_valid(raise_exception=True): - data = dm.task.patch_job_data(pk, serializer.data, action) - - - - - - - + data = dm.task.patch_job_data(pk, serializer.data, action) -@importer(name='Librispeech', ext='TSV, ZIP', version=" ") +@importer(name="Librispeech", ext="TSV, ZIP", version=" ") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) for p in anno_paths: - load_anno(p, instance_data) \ No newline at end of file + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/market1501.py b/cvat/apps/dataset_manager/formats/market1501.py index 6be8b2fcf75f..a7e255a92747 100644 --- a/cvat/apps/dataset_manager/formats/market1501.py +++ b/cvat/apps/dataset_manager/formats/market1501.py @@ -5,17 +5,20 @@ import zipfile -from datumaro.components.annotation import (AnnotationType, Label, - LabelCategories) +from datumaro.components.annotation import AnnotationType, Label, LabelCategories from datumaro.components.dataset import Dataset from datumaro.components.extractor import ItemTransform -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - import_dm_annotations) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer + class AttrToLabelAttr(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -39,6 +42,7 @@ def transform_item(self, item): attributes = {} return item.wrap(annotations=annotations, attributes=attributes) + class LabelAttrToAttr(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -51,32 +55,37 @@ def transform_item(self, item): annotations = list(item.annotations) attributes = dict(item.attributes) if self._label is not None: - labels = [ann for ann in annotations - if ann.type == AnnotationType.label \ - and ann.label == self._label] + labels = [ + ann + for ann in annotations + if ann.type == AnnotationType.label and ann.label == self._label + ] if len(labels) == 1: attributes.update(labels[0].attributes) annotations.remove(labels[0]) return item.wrap(annotations=annotations, attributes=attributes) -@exporter(name='Market-1501', ext='ZIP', version='1.0') +@exporter(name="Market-1501", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.transform(LabelAttrToAttr, label='market-1501') - dataset.export(temp_dir, 'market1501', save_images=save_images) + dataset.transform(LabelAttrToAttr, label="market-1501") + dataset.export(temp_dir, "market1501", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='Market-1501', ext='ZIP', version='1.0') + +@importer(name="Market-1501", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset(temp_dir, format_name='market1501', importer=dm_env.importers.get('market1501')) - dataset = Dataset.import_from(temp_dir, 'market1501', env=dm_env) - dataset.transform(AttrToLabelAttr, label='market-1501') + detect_dataset( + temp_dir, format_name="market1501", importer=dm_env.importers.get("market1501") + ) + dataset = Dataset.import_from(temp_dir, "market1501", env=dm_env) + dataset.transform(AttrToLabelAttr, label="market-1501") if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index f003f68383e7..400ed8399c06 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -6,34 +6,44 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - import_dm_annotations) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons from .registry import dm_env, exporter, importer from .utils import make_colormap -@exporter(name='Segmentation mask', ext='ZIP', version='1.1') + +@exporter(name="Segmentation mask", ext="ZIP", version="1.1") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform('polygons_to_masks') - dataset.transform('boxes_to_masks') - dataset.transform('merge_instance_segments') - - dataset.export(temp_dir, 'voc_segmentation', save_images=save_images, - apply_colormap=True, label_map=make_colormap(instance_data)) + dataset.transform("polygons_to_masks") + dataset.transform("boxes_to_masks") + dataset.transform("merge_instance_segments") + + dataset.export( + temp_dir, + "voc_segmentation", + save_images=save_images, + apply_colormap=True, + label_map=make_colormap(instance_data), + ) make_zip_archive(temp_dir, dst_file) -@importer(name='Segmentation mask', ext='ZIP', version='1.1') + +@importer(name="Segmentation mask", ext="ZIP", version="1.1") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name='voc', importer=dm_env.importers.get('voc')) - dataset = Dataset.import_from(temp_dir, 'voc', env=dm_env) + detect_dataset(temp_dir, format_name="voc", importer=dm_env.importers.get("voc")) + dataset = Dataset.import_from(temp_dir, "voc", env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index 4030d865c742..f3ede7150299 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -26,29 +26,31 @@ def _import_to_task(dataset, instance_data): if ann.type != dm.AnnotationType.bbox: continue - occluded = ann.attributes.pop('occluded', False) is True - track_id = ann.attributes.pop('track_id', None) + occluded = ann.attributes.pop("occluded", False) is True + track_id = ann.attributes.pop("track_id", None) attributes = [ instance_data.Attribute(name=n, value=str(v)) for n, v in ann.attributes.items() ] if track_id is None: # Extension. Import regular boxes: - instance_data.add_shape(instance_data.LabeledShape( - type='rectangle', - label=label_cat.items[ann.label].name, - points=ann.points, - occluded=occluded, - z_order=ann.z_order, - group=0, - frame=frame_number, - attributes=attributes, - source='manual', - )) + instance_data.add_shape( + instance_data.LabeledShape( + type="rectangle", + label=label_cat.items[ann.label].name, + points=ann.points, + occluded=occluded, + z_order=ann.z_order, + group=0, + frame=frame_number, + attributes=attributes, + source="manual", + ) + ) continue shape = instance_data.TrackedShape( - type='rectangle', + type="rectangle", points=ann.points, occluded=occluded, outside=False, @@ -56,13 +58,14 @@ def _import_to_task(dataset, instance_data): z_order=ann.z_order, frame=frame_number, attributes=attributes, - source='manual', + source="manual", ) # build trajectories as lists of shapes in track dict if track_id not in tracks: tracks[track_id] = instance_data.Track( - label_cat.items[ann.label].name, 0, 'manual', []) + label_cat.items[ann.label].name, 0, "manual", [] + ) tracks[track_id].shapes.append(shape) for track in tracks.values(): @@ -75,8 +78,9 @@ def _import_to_task(dataset, instance_data): for shape in track.shapes[1:]: has_skip = instance_data.frame_step < shape.frame - prev_shape.frame if has_skip and not prev_shape.outside: - prev_shape = prev_shape._replace(outside=True, - frame=prev_shape.frame + instance_data.frame_step) + prev_shape = prev_shape._replace( + outside=True, frame=prev_shape.frame + instance_data.frame_step + ) prev_shape_idx += 1 track.shapes.insert(prev_shape_idx, prev_shape) prev_shape = shape @@ -84,34 +88,40 @@ def _import_to_task(dataset, instance_data): # Append a shape with outside=True to finish the track last_shape = track.shapes[-1] - if last_shape.frame + instance_data.frame_step <= \ - int(instance_data.meta[instance_data.META_FIELD]['stop_frame']): - track.shapes.append(last_shape._replace(outside=True, - frame=last_shape.frame + instance_data.frame_step) + if last_shape.frame + instance_data.frame_step <= int( + instance_data.meta[instance_data.META_FIELD]["stop_frame"] + ): + track.shapes.append( + last_shape._replace( + outside=True, frame=last_shape.frame + instance_data.frame_step + ) ) instance_data.add_track(track) -@exporter(name='MOT', ext='ZIP', version='1.1') +@exporter(name="MOT", ext="ZIP", version="1.1") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = dm.Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images) + dataset.export(temp_dir, "mot_seq_gt", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='MOT', ext='ZIP', version='1.1') + +@importer(name="MOT", ext="ZIP", version="1.1") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name='mot_seq', importer=dm_env.importers.get('mot_seq')) - dataset = dm.Dataset.import_from(temp_dir, 'mot_seq', env=dm_env) + detect_dataset( + temp_dir, format_name="mot_seq", importer=dm_env.importers.get("mot_seq") + ) + dataset = dm.Dataset.import_from(temp_dir, "mot_seq", env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) # Dirty way to determine instance type to avoid circular dependency - if hasattr(instance_data, '_db_project'): + if hasattr(instance_data, "_db_project"): for sub_dataset, task_data in instance_data.split_dataset(dataset): _import_to_task(sub_dataset, task_data) else: diff --git a/cvat/apps/dataset_manager/formats/mots.py b/cvat/apps/dataset_manager/formats/mots.py index 9ed156e6cd4e..0ed88842012b 100644 --- a/cvat/apps/dataset_manager/formats/mots.py +++ b/cvat/apps/dataset_manager/formats/mots.py @@ -8,8 +8,12 @@ from datumaro.components.extractor import ItemTransform from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - find_dataset_root, match_dm_item) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + find_dataset_root, + match_dm_item, +) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -18,8 +22,10 @@ class KeepTracks(ItemTransform): def transform_item(self, item): - return item.wrap(annotations=[a for a in item.annotations - if 'track_id' in a.attributes]) + return item.wrap( + annotations=[a for a in item.annotations if "track_id" in a.attributes] + ) + def _import_to_task(dataset, instance_data): tracks = {} @@ -30,7 +36,8 @@ def _import_to_task(dataset, instance_data): shift = 0 for item in dataset: frame_number = instance_data.abs_frame_id( - match_dm_item(item, instance_data, root_hint=root_hint)) + match_dm_item(item, instance_data, root_hint=root_hint) + ) track_ids = set() @@ -38,7 +45,7 @@ def _import_to_task(dataset, instance_data): if ann.type != AnnotationType.polygon: continue - track_id = ann.attributes['track_id'] + track_id = ann.attributes["track_id"] group_id = track_id if track_id in track_ids: @@ -49,22 +56,23 @@ def _import_to_task(dataset, instance_data): track_ids.add(track_id) shape = instance_data.TrackedShape( - type='polygon', + type="polygon", points=ann.points, - occluded=ann.attributes.get('occluded') is True, + occluded=ann.attributes.get("occluded") is True, outside=False, keyframe=True, z_order=ann.z_order, frame=frame_number, attributes=[], - source='manual', - group=group_id + source="manual", + group=group_id, ) # build trajectories as lists of shapes in track dict if track_id not in tracks: tracks[track_id] = instance_data.Track( - label_cat.items[ann.label].name, 0, 'manual', []) + label_cat.items[ann.label].name, 0, "manual", [] + ) tracks[track_id].shapes.append(shape) for track in tracks.values(): @@ -76,8 +84,9 @@ def _import_to_task(dataset, instance_data): for shape in track.shapes[1:]: has_skip = instance_data.frame_step < shape.frame - prev_shape.frame if has_skip and not prev_shape.outside: - prev_shape = prev_shape._replace(outside=True, - frame=prev_shape.frame + instance_data.frame_step) + prev_shape = prev_shape._replace( + outside=True, frame=prev_shape.frame + instance_data.frame_step + ) prev_shape_idx += 1 track.shapes.insert(prev_shape_idx, prev_shape) prev_shape = shape @@ -85,41 +94,45 @@ def _import_to_task(dataset, instance_data): # Append a shape with outside=True to finish the track last_shape = track.shapes[-1] - if last_shape.frame + instance_data.frame_step <= \ - int(instance_data.meta[instance_data.META_FIELD]['stop_frame']): - track.shapes.append(last_shape._replace(outside=True, - frame=last_shape.frame + instance_data.frame_step) + if last_shape.frame + instance_data.frame_step <= int( + instance_data.meta[instance_data.META_FIELD]["stop_frame"] + ): + track.shapes.append( + last_shape._replace( + outside=True, frame=last_shape.frame + instance_data.frame_step + ) ) instance_data.add_track(track) -@exporter(name='MOTS PNG', ext='ZIP', version='1.0') + +@exporter(name="MOTS PNG", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.transform(KeepTracks) # can only export tracks + dataset.transform(KeepTracks) # can only export tracks dataset.transform(RotatedBoxesToPolygons) - dataset.transform('polygons_to_masks') - dataset.transform('boxes_to_masks') - dataset.transform('merge_instance_segments') + dataset.transform("polygons_to_masks") + dataset.transform("boxes_to_masks") + dataset.transform("merge_instance_segments") - dataset.export(temp_dir, 'mots_png', save_images=save_images) + dataset.export(temp_dir, "mots_png", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='MOTS PNG', ext='ZIP', version='1.0') + +@importer(name="MOTS PNG", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name='mots', importer=dm_env.importers.get('mots')) - dataset = Dataset.import_from(temp_dir, 'mots', env=dm_env) + detect_dataset(temp_dir, format_name="mots", importer=dm_env.importers.get("mots")) + dataset = Dataset.import_from(temp_dir, "mots", env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) # Dirty way to determine instance type to avoid circular dependency - if hasattr(instance_data, '_db_project'): + if hasattr(instance_data, "_db_project"): for sub_dataset, task_data in instance_data.split_dataset(dataset): _import_to_task(sub_dataset, task_data) else: _import_to_task(dataset, instance_data) - diff --git a/cvat/apps/dataset_manager/formats/openimages.py b/cvat/apps/dataset_manager/formats/openimages.py index 51fcee29a2fb..ae299386a74a 100644 --- a/cvat/apps/dataset_manager/formats/openimages.py +++ b/cvat/apps/dataset_manager/formats/openimages.py @@ -11,8 +11,13 @@ from datumaro.util.image import DEFAULT_IMAGE_META_FILE_NAME from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - find_dataset_root, import_dm_annotations, match_dm_item) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + find_dataset_root, + import_dm_annotations, + match_dm_item, +) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -22,7 +27,7 @@ def find_item_ids(path): image_desc_patterns = ( OpenImagesPath.FULL_IMAGE_DESCRIPTION_FILE_NAME, - *OpenImagesPath.SUBSET_IMAGE_DESCRIPTION_FILE_PATTERNS + *OpenImagesPath.SUBSET_IMAGE_DESCRIPTION_FILE_PATTERNS, ) image_desc_patterns = ( @@ -32,29 +37,32 @@ def find_item_ids(path): for pattern in image_desc_patterns: for path in glob.glob(pattern): - with open(path, 'r') as desc: + with open(path, "r") as desc: next(desc) for row in desc: - yield row.split(',')[0] + yield row.split(",")[0] -@exporter(name='Open Images V6', ext='ZIP', version='1.0') + +@exporter(name="Open Images V6", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, task_data, save_images=False): with GetCVATDataExtractor(task_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform('polygons_to_masks') - dataset.transform('merge_instance_segments') + dataset.transform("polygons_to_masks") + dataset.transform("merge_instance_segments") - dataset.export(temp_dir, 'open_images', save_images=save_images) + dataset.export(temp_dir, "open_images", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='Open Images V6', ext='ZIP', version='1.0') + +@importer(name="Open Images V6", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - image_meta_path = osp.join(temp_dir, OpenImagesPath.ANNOTATIONS_DIR, - DEFAULT_IMAGE_META_FILE_NAME) + image_meta_path = osp.join( + temp_dir, OpenImagesPath.ANNOTATIONS_DIR, DEFAULT_IMAGE_META_FILE_NAME + ) image_meta = None if not osp.isfile(image_meta_path): @@ -62,25 +70,30 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs item_ids = list(find_item_ids(temp_dir)) root_hint = find_dataset_root( - [DatasetItem(id=item_id) for item_id in item_ids], instance_data) + [DatasetItem(id=item_id) for item_id in item_ids], instance_data + ) for item_id in item_ids: frame_info = None try: - frame_id = match_dm_item(DatasetItem(id=item_id), - instance_data, root_hint) + frame_id = match_dm_item( + DatasetItem(id=item_id), instance_data, root_hint + ) frame_info = instance_data.frame_info[frame_id] - except Exception: # nosec + except Exception: # nosec pass if frame_info is not None: - image_meta[item_id] = (frame_info['height'], frame_info['width']) + image_meta[item_id] = (frame_info["height"], frame_info["width"]) - detect_dataset(temp_dir, format_name='open_images', importer=dm_env.importers.get('open_images')) - dataset = Dataset.import_from(temp_dir, 'open_images', - image_meta=image_meta, env=dm_env) + detect_dataset( + temp_dir, + format_name="open_images", + importer=dm_env.importers.get("open_images"), + ) + dataset = Dataset.import_from( + temp_dir, "open_images", image_meta=image_meta, env=dm_env + ) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) - - diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index a0d84b745d73..1a16d62350d8 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -11,51 +11,57 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, import_dm_annotations) -from cvat.apps.dataset_manager.formats.transformations import MaskToPolygonTransformation +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) +from cvat.apps.dataset_manager.formats.transformations import ( + MaskToPolygonTransformation, +) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name='PASCAL VOC', ext='ZIP', version='1.1') +@exporter(name="PASCAL VOC", ext="ZIP", version="1.1") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'voc', save_images=save_images, - label_map='source') + dataset.export(temp_dir, "voc", save_images=save_images, label_map="source") make_zip_archive(temp_dir, dst_file) -@importer(name='PASCAL VOC', ext='ZIP', version='1.1') + +@importer(name="PASCAL VOC", ext="ZIP", version="1.1") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) # put label map from the task if not present - labelmap_file = osp.join(temp_dir, 'labelmap.txt') + labelmap_file = osp.join(temp_dir, "labelmap.txt") if not osp.isfile(labelmap_file): - labels_meta = instance_data.meta[instance_data.META_FIELD]['labels'] - labels = (label['name'] + ':::' for _, label in labels_meta) - with open(labelmap_file, 'w') as f: - f.write('\n'.join(labels)) + labels_meta = instance_data.meta[instance_data.META_FIELD]["labels"] + labels = (label["name"] + ":::" for _, label in labels_meta) + with open(labelmap_file, "w") as f: + f.write("\n".join(labels)) # support flat archive layout - anno_dir = osp.join(temp_dir, 'Annotations') + anno_dir = osp.join(temp_dir, "Annotations") if not osp.isdir(anno_dir): - anno_files = glob(osp.join(temp_dir, '**', '*.xml'), recursive=True) - subsets_dir = osp.join(temp_dir, 'ImageSets', 'Main') + anno_files = glob(osp.join(temp_dir, "**", "*.xml"), recursive=True) + subsets_dir = osp.join(temp_dir, "ImageSets", "Main") os.makedirs(subsets_dir, exist_ok=True) - with open(osp.join(subsets_dir, 'train.txt'), 'w') as subset_file: + with open(osp.join(subsets_dir, "train.txt"), "w") as subset_file: for f in anno_files: - subset_file.write(osp.splitext(osp.basename(f))[0] + '\n') + subset_file.write(osp.splitext(osp.basename(f))[0] + "\n") os.makedirs(anno_dir, exist_ok=True) for f in anno_files: shutil.move(f, anno_dir) - detect_dataset(temp_dir, format_name='voc', importer=dm_env.importers.get('voc')) - dataset = Dataset.import_from(temp_dir, 'voc', env=dm_env) + detect_dataset(temp_dir, format_name="voc", importer=dm_env.importers.get("voc")) + dataset = Dataset.import_from(temp_dir, "voc", env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/pointcloud.py b/cvat/apps/dataset_manager/formats/pointcloud.py index 6ddfbb495427..106611993469 100644 --- a/cvat/apps/dataset_manager/formats/pointcloud.py +++ b/cvat/apps/dataset_manager/formats/pointcloud.py @@ -7,35 +7,59 @@ from datumaro.components.dataset import Dataset -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - import_dm_annotations) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from cvat.apps.engine.models import DimensionType from .registry import dm_env, exporter, importer -@exporter(name='Sly Point Cloud Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) +@exporter( + name="Sly Point Cloud Format", + ext="ZIP", + version="1.0", + dimension=DimensionType.DIM_3D, +) def _export_images(dst_file, temp_dir, task_data, save_images=False): with GetCVATDataExtractor( - task_data, include_images=save_images, format_type='sly_pointcloud', + task_data, + include_images=save_images, + format_type="sly_pointcloud", dimension=DimensionType.DIM_3D, ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'sly_pointcloud', save_images=save_images, allow_undeclared_attrs=True) + dataset.export( + temp_dir, + "sly_pointcloud", + save_images=save_images, + allow_undeclared_attrs=True, + ) make_zip_archive(temp_dir, dst_file) -@importer(name='Sly Point Cloud Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) +@importer( + name="Sly Point Cloud Format", + ext="ZIP", + version="1.0", + dimension=DimensionType.DIM_3D, +) def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): if zipfile.is_zipfile(src_file): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset(temp_dir, format_name='sly_pointcloud', importer=dm_env.importers.get('sly_pointcloud')) - dataset = Dataset.import_from(temp_dir, 'sly_pointcloud', env=dm_env) + detect_dataset( + temp_dir, + format_name="sly_pointcloud", + importer=dm_env.importers.get("sly_pointcloud"), + ) + dataset = Dataset.import_from(temp_dir, "sly_pointcloud", env=dm_env) else: - dataset = Dataset.import_from(src_file.name, 'sly_pointcloud', env=dm_env) + dataset = Dataset.import_from(src_file.name, "sly_pointcloud", env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/registry.py b/cvat/apps/dataset_manager/formats/registry.py index c8fec4159ef5..5c0a7550f736 100644 --- a/cvat/apps/dataset_manager/formats/registry.py +++ b/cvat/apps/dataset_manager/formats/registry.py @@ -8,28 +8,45 @@ dm_env = Environment() + class _Format: - NAME = '' - EXT = '' - VERSION = '' - DISPLAY_NAME = '{NAME} {VERSION}' + NAME = "" + EXT = "" + VERSION = "" + DISPLAY_NAME = "{NAME} {VERSION}" ENABLED = True + class Exporter(_Format): def __call__(self, dst_file, temp_dir, instance_data, **options): raise NotImplementedError() + class Importer(_Format): - def __call__(self, src_file, temp_dir, instance_data, load_data_callback=None, **options): + def __call__( + self, src_file, temp_dir, instance_data, load_data_callback=None, **options + ): raise NotImplementedError() -def _wrap_format(f_or_cls, klass, name, version, ext, display_name, enabled, dimension=DimensionType.DIM_2D): + +def _wrap_format( + f_or_cls, + klass, + name, + version, + ext, + display_name, + enabled, + dimension=DimensionType.DIM_2D, +): import inspect + assert inspect.isclass(f_or_cls) or inspect.isfunction(f_or_cls) if inspect.isclass(f_or_cls): - assert hasattr(f_or_cls, '__call__') + assert hasattr(f_or_cls, "__call__") target = f_or_cls elif inspect.isfunction(f_or_cls): + class wrapper(klass): # pylint: disable=arguments-differ def __call__(self, *args, **kwargs): @@ -43,7 +60,8 @@ def __call__(self, *args, **kwargs): target.VERSION = version or klass.VERSION target.EXT = ext or klass.EXT target.DISPLAY_NAME = (display_name or klass.DISPLAY_NAME).format( - NAME=name, VERSION=version, EXT=ext) + NAME=name, VERSION=version, EXT=ext + ) assert all([target.NAME, target.VERSION, target.EXT, target.DISPLAY_NAME]) target.DIMENSION = dimension target.ENABLED = enabled @@ -64,33 +82,59 @@ def format_for(export_format, mode): return format_name -def exporter(name, version, ext, display_name=None, enabled=True, dimension=DimensionType.DIM_2D): +def exporter( + name, version, ext, display_name=None, enabled=True, dimension=DimensionType.DIM_2D +): assert name not in EXPORT_FORMATS, "Export format '%s' already registered" % name + def wrap_with_params(f_or_cls): - t = _wrap_format(f_or_cls, Exporter, - name=name, ext=ext, version=version, display_name=display_name, - enabled=enabled, dimension=dimension) + t = _wrap_format( + f_or_cls, + Exporter, + name=name, + ext=ext, + version=version, + display_name=display_name, + enabled=enabled, + dimension=dimension, + ) key = t.DISPLAY_NAME assert key not in EXPORT_FORMATS, "Export format '%s' already registered" % name EXPORT_FORMATS[key] = t return t + return wrap_with_params + IMPORT_FORMATS = {} -def importer(name, version, ext, display_name=None, enabled=True, dimension=DimensionType.DIM_2D): + + +def importer( + name, version, ext, display_name=None, enabled=True, dimension=DimensionType.DIM_2D +): def wrap_with_params(f_or_cls): - t = _wrap_format(f_or_cls, Importer, - name=name, ext=ext, version=version, display_name=display_name, - enabled=enabled, dimension=dimension) + t = _wrap_format( + f_or_cls, + Importer, + name=name, + ext=ext, + version=version, + display_name=display_name, + enabled=enabled, + dimension=dimension, + ) key = t.DISPLAY_NAME assert key not in IMPORT_FORMATS, "Import format '%s' already registered" % name IMPORT_FORMATS[key] = t return t + return wrap_with_params + def make_importer(name): return IMPORT_FORMATS[name]() + def make_exporter(name): return EXPORT_FORMATS[name]() @@ -104,6 +148,7 @@ def make_exporter(name): import cvat.apps.dataset_manager.formats.mot import cvat.apps.dataset_manager.formats.mots import cvat.apps.dataset_manager.formats.pascal_voc + # import cvat.apps.dataset_manager.formats.yolo import cvat.apps.dataset_manager.formats.imagenet import cvat.apps.dataset_manager.formats.camvid @@ -117,9 +162,11 @@ def make_exporter(name): import cvat.apps.dataset_manager.formats.lfw import cvat.apps.dataset_manager.formats.cityscapes import cvat.apps.dataset_manager.formats.openimages -#Audino Export Formats + +# Audino Export Formats import cvat.apps.dataset_manager.formats.aud_common_voice -#Audino Import Formats + +# Audino Import Formats import cvat.apps.dataset_manager.formats.librispeech import cvat.apps.dataset_manager.formats.common_voice import cvat.apps.dataset_manager.formats.tedlium diff --git a/cvat/apps/dataset_manager/formats/tedlium.py b/cvat/apps/dataset_manager/formats/tedlium.py index 26e05dc73aea..00cae3fed7c0 100644 --- a/cvat/apps/dataset_manager/formats/tedlium.py +++ b/cvat/apps/dataset_manager/formats/tedlium.py @@ -5,100 +5,89 @@ from cvat.apps.engine.serializers import LabeledDataSerializer import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction -from .registry import importer - - - - - - +from .registry import importer +from cvat.apps.engine.models import Task, Job def load_anno(file_object, annotations): if isinstance(file_object, str): - with open(file_object, 'r', encoding='utf-8') as f: + with open(file_object, "r", encoding="utf-8") as f: content = f.read() lines = content.splitlines() - headers = lines[0].split('\t') + headers = lines[0].split("\t") label_data = InstanceLabelData(annotations.db_instance) - - + task_id = annotations.db_instance.id + task = Task.objects.get(id=task_id) + jobs = Job.objects.filter(segment__task=task) for line in lines[1:]: - fields = line.split('\t') + fields = line.split("\t") record = dict(zip(headers, fields)) - job_id = record.get('job_id') + if "job_id" in record: + job_id = record.get("job_id") + else: + job_index_id = int(record.get("job index")) + job_id = jobs[job_index_id].id - start = float(record.get('start', 0)) - end = float(record.get('end', 0)) + start = float(record.get("start", 0)) + end = float(record.get("end", 0)) - label_name = record.get('label') + label_name = record.get("label") label_id = label_data._get_label_id(label_name) language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get('language',0)) - - - spec_id = label_data._get_attribute_id(label_id,record.get("attribute_1_name")) + language_id = int(record.get("language", 0)) + spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) shapes_data = [ { "type": "rectangle", "label": record.get("label", ""), "points": [start, start, end, end], - "frame":0, - "occluded" : False, + "frame": 0, + "occluded": False, "z_order": 0, "group": None, "source": "manual", "transcript": record.get("text", ""), "gender": record.get("gender", ""), - "age": record.get("age",""), - "locale":language_id_to_locale_mapping.get(language_id, ""), - "accent": record.get("accent",""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get(language_id, ""), + "accent": record.get("accent", ""), "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } ] + { + "spec_id": spec_id, + "value": record.get("attribute_1_value", ""), + } + ], } ] - - data = { - 'shapes': shapes_data - } + data = {"shapes": shapes_data} serializer = LabeledDataSerializer(data=data) pk = int(job_id) action = PatchAction.CREATE if serializer.is_valid(raise_exception=True): - data = dm.task.patch_job_data(pk, serializer.data, action) - - - - - - - + data = dm.task.patch_job_data(pk, serializer.data, action) -@importer(name='Ted-Lium', ext='TSV, ZIP', version=" ") +@importer(name="Ted-Lium", ext="TSV, ZIP", version=" ") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, '**', '*.tsv'), recursive=True) + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) for p in anno_paths: - load_anno(p, instance_data) \ No newline at end of file + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/transformations.py b/cvat/apps/dataset_manager/formats/transformations.py index 99d754252378..0b37bcdf7b4e 100644 --- a/cvat/apps/dataset_manager/formats/transformations.py +++ b/cvat/apps/dataset_manager/formats/transformations.py @@ -21,22 +21,38 @@ def _rotate_point(self, p, angle, cx, cy): def transform_item(self, item): annotations = item.annotations[:] - anns = [p for p in annotations if p.type == dm.AnnotationType.bbox and p.attributes['rotation']] + anns = [ + p + for p in annotations + if p.type == dm.AnnotationType.bbox and p.attributes["rotation"] + ] for ann in anns: - rotation = math.radians(ann.attributes['rotation']) + rotation = math.radians(ann.attributes["rotation"]) x0, y0, x1, y1 = ann.points [cx, cy] = [(x0 + (x1 - x0) / 2), (y0 + (y1 - y0) / 2)] - anno_points = list(chain.from_iterable( - map(lambda p: self._rotate_point(p, rotation, cx, cy), [(x0, y0), (x1, y0), (x1, y1), (x0, y1)]) - )) + anno_points = list( + chain.from_iterable( + map( + lambda p: self._rotate_point(p, rotation, cx, cy), + [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], + ) + ) + ) annotations.remove(ann) - annotations.append(dm.Polygon(anno_points, - label=ann.label, attributes=ann.attributes, group=ann.group, - z_order=ann.z_order)) + annotations.append( + dm.Polygon( + anno_points, + label=ann.label, + attributes=ann.attributes, + group=ann.group, + z_order=ann.z_order, + ) + ) return item.wrap(annotations=annotations) + class MaskConverter: @staticmethod def cvat_rle_to_dm_rle(shape, img_h: int, img_w: int) -> dm.RleMask: @@ -61,8 +77,13 @@ def cvat_rle_to_dm_rle(shape, img_h: int, img_w: int) -> dm.RleMask: # obtain RLE coco_rle = mask_utils.encode(np.asfortranarray(full_mask)) - return dm.RleMask(rle=coco_rle, label=shape.label, z_order=shape.z_order, - attributes=shape.attributes, group=shape.group) + return dm.RleMask( + rle=coco_rle, + label=shape.label, + z_order=shape.z_order, + attributes=shape.attributes, + group=shape.group, + ) @classmethod def dm_mask_to_cvat_rle(cls, dm_mask: dm.Mask) -> list[int]: @@ -100,6 +121,7 @@ def rle(cls, arr: np.ndarray) -> list[int]: return cvat_rle + class EllipsesToMasks: @staticmethod def convert_ellipse(ellipse, img_h, img_w): @@ -112,8 +134,14 @@ def convert_ellipse(ellipse, img_h, img_w): mat = np.zeros((img_h, img_w), dtype=np.uint8) cv2.ellipse(mat, center, axis, angle, 0, 360, 255, thickness=-1) rle = mask_utils.encode(np.asfortranarray(mat)) - return dm.RleMask(rle=rle, label=ellipse.label, z_order=ellipse.z_order, - attributes=ellipse.attributes, group=ellipse.group) + return dm.RleMask( + rle=rle, + label=ellipse.label, + z_order=ellipse.z_order, + attributes=ellipse.attributes, + group=ellipse.group, + ) + class MaskToPolygonTransformation: """ @@ -123,10 +151,10 @@ class MaskToPolygonTransformation: @classmethod def declare_arg_names(cls): - return ['conv_mask_to_poly'] + return ["conv_mask_to_poly"] @classmethod def convert_dataset(cls, dataset, **kwargs): - if kwargs.get('conv_mask_to_poly', True): - dataset.transform('masks_to_polygons') + if kwargs.get("conv_mask_to_poly", True): + dataset.transform("masks_to_polygons") return dataset diff --git a/cvat/apps/dataset_manager/formats/utils.py b/cvat/apps/dataset_manager/formats/utils.py index 7811fbbfc902..6392a90a13b2 100644 --- a/cvat/apps/dataset_manager/formats/utils.py +++ b/cvat/apps/dataset_manager/formats/utils.py @@ -9,6 +9,7 @@ from datumaro.util.os_util import make_file_name + def get_color_from_index(index): def get_bit(number, index): return (number >> index) & 1 @@ -22,7 +23,10 @@ def get_bit(number, index): return tuple(color) -DEFAULT_COLORMAP_PATH = osp.join(osp.dirname(__file__), 'predefined_colors.txt') + +DEFAULT_COLORMAP_PATH = osp.join(osp.dirname(__file__), "predefined_colors.txt") + + def parse_default_colors(file_path=None): if file_path is None: file_path = DEFAULT_COLORMAP_PATH @@ -31,58 +35,72 @@ def parse_default_colors(file_path=None): with open(file_path) as f: for line in f: line = line.strip() - if not line or line[0] == '#': + if not line or line[0] == "#": continue - _, label, color = line.split(':') - colors[label] = tuple(map(int, color.split(','))) + _, label, color = line.split(":") + colors[label] = tuple(map(int, color.split(","))) return colors + def normalize_label(label): - label = make_file_name(label) # basically, convert to ASCII lowercase - label = label.replace('-', '_') + label = make_file_name(label) # basically, convert to ASCII lowercase + label = label.replace("-", "_") return label + def rgb2hex(color): - return '#{0:02x}{1:02x}{2:02x}'.format(*color) + return "#{0:02x}{1:02x}{2:02x}".format(*color) + def hex2rgb(color): - return tuple(int(color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)) + return tuple(int(color.lstrip("#")[i : i + 2], 16) for i in (0, 2, 4)) + def make_colormap(instance_data): - labels = [label for _, label in instance_data.meta[instance_data.META_FIELD]['labels']] - label_names = [label['name'] for label in labels] - - if 'background' not in label_names: - labels.insert(0, { - 'name': 'background', - 'color': '#000000', - } + labels = [ + label for _, label in instance_data.meta[instance_data.META_FIELD]["labels"] + ] + label_names = [label["name"] for label in labels] + + if "background" not in label_names: + labels.insert( + 0, + { + "name": "background", + "color": "#000000", + }, ) - return {label['name']: [hex2rgb(label['color']), [], []] for label in labels} + return {label["name"]: [hex2rgb(label["color"]), [], []] for label in labels} + def generate_color(color, used_colors): def tint_shade_color(): for added_color in (255, 0): for factor in range(1, 10): - yield tuple(map(lambda c: int(c + (added_color - c) * factor / 10), color)) + yield tuple( + map(lambda c: int(c + (added_color - c) * factor / 10), color) + ) def get_unused_color(): def get_avg_color(index): sorted_colors = sorted(used_colors, key=operator.itemgetter(index)) - max_dist_pair = max(zip(sorted_colors, sorted_colors[1:]), - key=lambda c_pair: c_pair[1][index] - c_pair[0][index]) + max_dist_pair = max( + zip(sorted_colors, sorted_colors[1:]), + key=lambda c_pair: c_pair[1][index] - c_pair[0][index], + ) return (max_dist_pair[0][index] + max_dist_pair[1][index]) // 2 return tuple(get_avg_color(i) for i in range(3)) - #try to tint and shade color firstly + # try to tint and shade color firstly for new_color in tint_shade_color(): if new_color not in used_colors: return new_color return get_unused_color() + def get_label_color(label_name, label_colors): predefined = parse_default_colors() label_colors = tuple(hex2rgb(c) for c in label_colors if c) @@ -91,7 +109,9 @@ def get_label_color(label_name, label_colors): color = predefined.get(normalized_name, None) if color is None: - name_hash = int.from_bytes(blake2s(normalized_name.encode(), digest_size=3).digest(), byteorder="big") + name_hash = int.from_bytes( + blake2s(normalized_name.encode(), digest_size=3).digest(), byteorder="big" + ) color = get_color_from_index(name_hash) if color in label_colors: diff --git a/cvat/apps/dataset_manager/formats/velodynepoint.py b/cvat/apps/dataset_manager/formats/velodynepoint.py index 9912d0b1d67b..e373b5de8697 100644 --- a/cvat/apps/dataset_manager/formats/velodynepoint.py +++ b/cvat/apps/dataset_manager/formats/velodynepoint.py @@ -8,8 +8,11 @@ from datumaro.components.dataset import Dataset from datumaro.components.extractor import ItemTransform -from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, detect_dataset, \ - import_dm_annotations +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from .registry import dm_env from cvat.apps.dataset_manager.util import make_zip_archive @@ -17,35 +20,47 @@ from .registry import exporter, importer + class RemoveTrackingInformation(ItemTransform): def transform_item(self, item): annotations = list(item.annotations) for anno in annotations: - if hasattr(anno, 'attributes') and 'track_id' in anno.attributes: - del anno.attributes['track_id'] + if hasattr(anno, "attributes") and "track_id" in anno.attributes: + del anno.attributes["track_id"] return item.wrap(annotations=annotations) -@exporter(name='Kitti Raw Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) + +@exporter( + name="Kitti Raw Format", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D +) def _export_images(dst_file, temp_dir, task_data, save_images=False): with GetCVATDataExtractor( - task_data, include_images=save_images, format_type="kitti_raw", + task_data, + include_images=save_images, + format_type="kitti_raw", dimension=DimensionType.DIM_3D, ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RemoveTrackingInformation) - dataset.export(temp_dir, 'kitti_raw', save_images=save_images, reindex=True) + dataset.export(temp_dir, "kitti_raw", save_images=save_images, reindex=True) make_zip_archive(temp_dir, dst_file) -@importer(name='Kitti Raw Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) +@importer( + name="Kitti Raw Format", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D +) def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): if zipfile.is_zipfile(src_file): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset(temp_dir, format_name='kitti_raw', importer=dm_env.importers.get('kitti_raw')) - dataset = Dataset.import_from(temp_dir, 'kitti_raw', env=dm_env) + detect_dataset( + temp_dir, + format_name="kitti_raw", + importer=dm_env.importers.get("kitti_raw"), + ) + dataset = Dataset.import_from(temp_dir, "kitti_raw", env=dm_env) else: - dataset = Dataset.import_from(src_file.name, 'kitti_raw', env=dm_env) + dataset = Dataset.import_from(src_file.name, "kitti_raw", env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/vggface2.py b/cvat/apps/dataset_manager/formats/vggface2.py index 642171f0f8d9..623076cbb10e 100644 --- a/cvat/apps/dataset_manager/formats/vggface2.py +++ b/cvat/apps/dataset_manager/formats/vggface2.py @@ -7,29 +7,36 @@ from datumaro.components.dataset import Dataset -from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, TaskData, detect_dataset, \ - import_dm_annotations +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + TaskData, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name='VGGFace2', ext='ZIP', version='1.0') +@exporter(name="VGGFace2", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'vgg_face2', save_images=save_images) + dataset.export(temp_dir, "vgg_face2", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='VGGFace2', ext='ZIP', version='1.0') + +@importer(name="VGGFace2", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset(temp_dir, format_name='vgg_face2', importer=dm_env.importers.get('vgg_face2')) - dataset = Dataset.import_from(temp_dir, 'vgg_face2', env=dm_env) + detect_dataset( + temp_dir, format_name="vgg_face2", importer=dm_env.importers.get("vgg_face2") + ) + dataset = Dataset.import_from(temp_dir, "vgg_face2", env=dm_env) if isinstance(instance_data, TaskData): - dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|") + dataset.transform("rename", regex=r"|([^/]+/)?(.+)|\2|") if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/widerface.py b/cvat/apps/dataset_manager/formats/widerface.py index 12a9bf0d21e5..1aeb25353145 100644 --- a/cvat/apps/dataset_manager/formats/widerface.py +++ b/cvat/apps/dataset_manager/formats/widerface.py @@ -7,27 +7,33 @@ from datumaro.components.dataset import Dataset -from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, detect_dataset, \ - import_dm_annotations +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, +) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name='WiderFace', ext='ZIP', version='1.0') +@exporter(name="WiderFace", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'wider_face', save_images=save_images) + dataset.export(temp_dir, "wider_face", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='WiderFace', ext='ZIP', version='1.0') + +@importer(name="WiderFace", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset(temp_dir, format_name='wider_face', importer=dm_env.importers.get('wider_face')) - dataset = Dataset.import_from(temp_dir, 'wider_face', env=dm_env) + detect_dataset( + temp_dir, format_name="wider_face", importer=dm_env.importers.get("wider_face") + ) + dataset = Dataset.import_from(temp_dir, "wider_face", env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index 9f0e46558117..14a197d5a1d0 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -8,8 +8,13 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, - import_dm_annotations, match_dm_item, find_dataset_root) +from cvat.apps.dataset_manager.bindings import ( + GetCVATDataExtractor, + detect_dataset, + import_dm_annotations, + match_dm_item, + find_dataset_root, +) from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.extractor import DatasetItem from datumaro.components.project import Dataset @@ -18,37 +23,41 @@ from .registry import dm_env, exporter, importer -@exporter(name='YOLO', ext='ZIP', version='1.1') +@exporter(name="YOLO", ext="ZIP", version="1.1") def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, 'yolo', save_images=save_images) + dataset.export(temp_dir, "yolo", save_images=save_images) make_zip_archive(temp_dir, dst_file) -@importer(name='YOLO', ext='ZIP', version='1.1') + +@importer(name="YOLO", ext="ZIP", version="1.1") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) image_info = {} - frames = [YoloExtractor.name_from_path(osp.relpath(p, temp_dir)) - for p in glob(osp.join(temp_dir, '**', '*.txt'), recursive=True)] + frames = [ + YoloExtractor.name_from_path(osp.relpath(p, temp_dir)) + for p in glob(osp.join(temp_dir, "**", "*.txt"), recursive=True) + ] root_hint = find_dataset_root( - [DatasetItem(id=frame) for frame in frames], instance_data) + [DatasetItem(id=frame) for frame in frames], instance_data + ) for frame in frames: frame_info = None try: - frame_id = match_dm_item(DatasetItem(id=frame), instance_data, - root_hint=root_hint) + frame_id = match_dm_item( + DatasetItem(id=frame), instance_data, root_hint=root_hint + ) frame_info = instance_data.frame_info[frame_id] - except Exception: # nosec + except Exception: # nosec pass if frame_info is not None: - image_info[frame] = (frame_info['height'], frame_info['width']) + image_info[frame] = (frame_info["height"], frame_info["width"]) - detect_dataset(temp_dir, format_name='yolo', importer=dm_env.importers.get('yolo')) - dataset = Dataset.import_from(temp_dir, 'yolo', - env=dm_env, image_info=image_info) + detect_dataset(temp_dir, format_name="yolo", importer=dm_env.importers.get("yolo")) + dataset = Dataset.import_from(temp_dir, "yolo", env=dm_env, image_info=image_info) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) From f0ae2fd029c1010e70004b6ef505f70fb49c7541 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Thu, 12 Sep 2024 17:53:44 +0530 Subject: [PATCH 03/14] fix for adding multiple attributes --- cvat/apps/dataset_manager/formats/LibriVox.py | 28 ++++++++++++++----- .../dataset_manager/formats/VCTK_Corpus.py | 28 ++++++++++++++----- cvat/apps/dataset_manager/formats/VoxCeleb.py | 28 ++++++++++++++----- .../apps/dataset_manager/formats/Voxpopuli.py | 28 ++++++++++++++----- .../dataset_manager/formats/common_voice.py | 28 ++++++++++++++----- .../dataset_manager/formats/librispeech.py | 28 ++++++++++++++----- cvat/apps/dataset_manager/formats/tedlium.py | 28 ++++++++++++++----- 7 files changed, 147 insertions(+), 49 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/LibriVox.py b/cvat/apps/dataset_manager/formats/LibriVox.py index 8dce61a43284..ad6abd78e59e 100644 --- a/cvat/apps/dataset_manager/formats/LibriVox.py +++ b/cvat/apps/dataset_manager/formats/LibriVox.py @@ -42,7 +42,26 @@ def load_anno(file_object, annotations): language_id_to_locale_mapping = {0: "en"} language_id = int(record.get("language", 0)) - spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) + attributes = [] + + for i in range(1, len(headers)): + attribute_name_key = f"attribute_{i}_name" + attribute_value_key = f"attribute_{i}_value" + + if attribute_name_key in record and attribute_value_key in record: + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + + spec_id = label_data._get_attribute_id(label_id, attribute_name) + + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) shapes_data = [ { @@ -62,12 +81,7 @@ def load_anno(file_object, annotations): "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, - "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } - ], + "attributes": attributes, } ] diff --git a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py index af938773ebbb..fa17a33647ca 100644 --- a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py +++ b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py @@ -42,7 +42,26 @@ def load_anno(file_object, annotations): language_id_to_locale_mapping = {0: "en"} language_id = int(record.get("language", 0)) - spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) + attributes = [] + + for i in range(1, len(headers)): + attribute_name_key = f"attribute_{i}_name" + attribute_value_key = f"attribute_{i}_value" + + if attribute_name_key in record and attribute_value_key in record: + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + + spec_id = label_data._get_attribute_id(label_id, attribute_name) + + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) shapes_data = [ { @@ -62,12 +81,7 @@ def load_anno(file_object, annotations): "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, - "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } - ], + "attributes": attributes, } ] diff --git a/cvat/apps/dataset_manager/formats/VoxCeleb.py b/cvat/apps/dataset_manager/formats/VoxCeleb.py index 3a0228b59814..b924612c32ab 100644 --- a/cvat/apps/dataset_manager/formats/VoxCeleb.py +++ b/cvat/apps/dataset_manager/formats/VoxCeleb.py @@ -42,7 +42,26 @@ def load_anno(file_object, annotations): language_id_to_locale_mapping = {0: "en"} language_id = int(record.get("language", 0)) - spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) + attributes = [] + + for i in range(1, len(headers)): + attribute_name_key = f"attribute_{i}_name" + attribute_value_key = f"attribute_{i}_value" + + if attribute_name_key in record and attribute_value_key in record: + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + + spec_id = label_data._get_attribute_id(label_id, attribute_name) + + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) shapes_data = [ { @@ -62,12 +81,7 @@ def load_anno(file_object, annotations): "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, - "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } - ], + "attributes": attributes, } ] diff --git a/cvat/apps/dataset_manager/formats/Voxpopuli.py b/cvat/apps/dataset_manager/formats/Voxpopuli.py index e31c549177a2..69954e4f5e7a 100644 --- a/cvat/apps/dataset_manager/formats/Voxpopuli.py +++ b/cvat/apps/dataset_manager/formats/Voxpopuli.py @@ -42,7 +42,26 @@ def load_anno(file_object, annotations): language_id_to_locale_mapping = {0: "en"} language_id = int(record.get("language", 0)) - spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) + attributes = [] + + for i in range(1, len(headers)): + attribute_name_key = f"attribute_{i}_name" + attribute_value_key = f"attribute_{i}_value" + + if attribute_name_key in record and attribute_value_key in record: + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + + spec_id = label_data._get_attribute_id(label_id, attribute_name) + + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) shapes_data = [ { @@ -62,12 +81,7 @@ def load_anno(file_object, annotations): "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, - "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } - ], + "attributes": attributes, } ] diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py index d0cd4cb47361..eebeb8f189ac 100644 --- a/cvat/apps/dataset_manager/formats/common_voice.py +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -42,7 +42,26 @@ def load_anno(file_object, annotations): language_id_to_locale_mapping = {0: "en"} language_id = int(record.get("language", 0)) - spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) + attributes = [] + + for i in range(1, len(headers)): + attribute_name_key = f"attribute_{i}_name" + attribute_value_key = f"attribute_{i}_value" + + if attribute_name_key in record and attribute_value_key in record: + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + + spec_id = label_data._get_attribute_id(label_id, attribute_name) + + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) shapes_data = [ { @@ -62,12 +81,7 @@ def load_anno(file_object, annotations): "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, - "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } - ], + "attributes": attributes, } ] diff --git a/cvat/apps/dataset_manager/formats/librispeech.py b/cvat/apps/dataset_manager/formats/librispeech.py index 76cb51c68ee4..0b3383f2c294 100644 --- a/cvat/apps/dataset_manager/formats/librispeech.py +++ b/cvat/apps/dataset_manager/formats/librispeech.py @@ -39,7 +39,26 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) + attributes = [] + + for i in range(1, len(headers)): + attribute_name_key = f"attribute_{i}_name" + attribute_value_key = f"attribute_{i}_value" + + if attribute_name_key in record and attribute_value_key in record: + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + + spec_id = label_data._get_attribute_id(label_id, attribute_name) + + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) language_id_to_locale_mapping = {0: "en"} language_id = int(record.get("language", 0)) @@ -62,12 +81,7 @@ def load_anno(file_object, annotations): "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, - "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } - ], + "attributes": attributes, } ] diff --git a/cvat/apps/dataset_manager/formats/tedlium.py b/cvat/apps/dataset_manager/formats/tedlium.py index 00cae3fed7c0..91facd51887e 100644 --- a/cvat/apps/dataset_manager/formats/tedlium.py +++ b/cvat/apps/dataset_manager/formats/tedlium.py @@ -42,7 +42,26 @@ def load_anno(file_object, annotations): language_id_to_locale_mapping = {0: "en"} language_id = int(record.get("language", 0)) - spec_id = label_data._get_attribute_id(label_id, record.get("attribute_1_name")) + attributes = [] + + for i in range(1, len(headers)): + attribute_name_key = f"attribute_{i}_name" + attribute_value_key = f"attribute_{i}_value" + + if attribute_name_key in record and attribute_value_key in record: + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + + spec_id = label_data._get_attribute_id(label_id, attribute_name) + + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) shapes_data = [ { @@ -62,12 +81,7 @@ def load_anno(file_object, annotations): "emotion": record.get("emotion", ""), "rotation": 0.0, "label_id": label_id, - "attributes": [ - { - "spec_id": spec_id, - "value": record.get("attribute_1_value", ""), - } - ], + "attributes": attributes, } ] From d1928361420aa252802e8ba141863fe05222c227 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Sat, 14 Sep 2024 10:14:22 +0530 Subject: [PATCH 04/14] added more languages for locale --- cvat/apps/dataset_manager/formats/LibriVox.py | 6 ++++-- cvat/apps/dataset_manager/formats/VCTK_Corpus.py | 6 ++++-- cvat/apps/dataset_manager/formats/VoxCeleb.py | 6 ++++-- cvat/apps/dataset_manager/formats/Voxpopuli.py | 6 ++++-- cvat/apps/dataset_manager/formats/common_voice.py | 6 ++++-- cvat/apps/dataset_manager/formats/librispeech.py | 6 ++++-- cvat/apps/dataset_manager/formats/tedlium.py | 6 ++++-- cvat/apps/dataset_manager/task.py | 4 ++-- 8 files changed, 30 insertions(+), 16 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/LibriVox.py b/cvat/apps/dataset_manager/formats/LibriVox.py index ad6abd78e59e..4ebbe26ce06c 100644 --- a/cvat/apps/dataset_manager/formats/LibriVox.py +++ b/cvat/apps/dataset_manager/formats/LibriVox.py @@ -39,8 +39,10 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get("language", 0)) + language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = int(record.get("language",-1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py index fa17a33647ca..220433ab3a96 100644 --- a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py +++ b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py @@ -39,8 +39,10 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get("language", 0)) + language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = int(record.get("language",-1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/VoxCeleb.py b/cvat/apps/dataset_manager/formats/VoxCeleb.py index b924612c32ab..5882ff546603 100644 --- a/cvat/apps/dataset_manager/formats/VoxCeleb.py +++ b/cvat/apps/dataset_manager/formats/VoxCeleb.py @@ -39,8 +39,10 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get("language", 0)) + language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = int(record.get("language",-1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/Voxpopuli.py b/cvat/apps/dataset_manager/formats/Voxpopuli.py index 69954e4f5e7a..2abb303d8d1e 100644 --- a/cvat/apps/dataset_manager/formats/Voxpopuli.py +++ b/cvat/apps/dataset_manager/formats/Voxpopuli.py @@ -39,8 +39,10 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get("language", 0)) + language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = int(record.get("language",-1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py index eebeb8f189ac..c9195079dde0 100644 --- a/cvat/apps/dataset_manager/formats/common_voice.py +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -39,8 +39,10 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get("language", 0)) + language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = int(record.get("language",-1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/librispeech.py b/cvat/apps/dataset_manager/formats/librispeech.py index 0b3383f2c294..056f76c4ff38 100644 --- a/cvat/apps/dataset_manager/formats/librispeech.py +++ b/cvat/apps/dataset_manager/formats/librispeech.py @@ -60,8 +60,10 @@ def load_anno(file_object, annotations): } ) - language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get("language", 0)) + language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = int(record.get("language",-1)) shapes_data = [ { diff --git a/cvat/apps/dataset_manager/formats/tedlium.py b/cvat/apps/dataset_manager/formats/tedlium.py index 91facd51887e..98c3252abbb7 100644 --- a/cvat/apps/dataset_manager/formats/tedlium.py +++ b/cvat/apps/dataset_manager/formats/tedlium.py @@ -39,8 +39,10 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en"} - language_id = int(record.get("language", 0)) + language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = int(record.get("language",-1)) attributes = [] diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index 383724a16cc9..a57edfb2acef 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -1117,7 +1117,7 @@ def convert_annotation_data_format(data, format_name): formatted_data.append(formatted_entry) return formatted_data elif format_name == "VoxPopuli": - language_id_mapping = {"en": 0} + language_id_mapping = {"en-US": 0,"es-ES":1,"fr-FR":2,"zh-CN":3,"hi-IN":4,"ar-EG":5,"pt-BR":6,"ja-JP":7,"de-DE":8,"ru-RU":9} formatted_data = [] for entry in data: formatted_entry = { @@ -1202,7 +1202,7 @@ def convert_annotation_data_format(data, format_name): formatted_data.append(formatted_entry) return formatted_data elif format_name == "LibriVox": - language_id_mapping = {"en": 0} + language_id_mapping = {"en-US": 0,"es-ES":1,"fr-FR":2,"zh-CN":3,"hi-IN":4,"ar-EG":5,"pt-BR":6,"ja-JP":7,"de-DE":8,"ru-RU":9} formatted_data = [] for entry in data: formatted_entry = { From 16047a4cb3a84a638d5c87e7c3b08451869f5e92 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Sat, 14 Sep 2024 10:17:22 +0530 Subject: [PATCH 05/14] formatting changes --- cvat/apps/dataset_manager/formats/LibriVox.py | 15 +++++++++++++-- cvat/apps/dataset_manager/formats/VCTK_Corpus.py | 15 +++++++++++++-- cvat/apps/dataset_manager/formats/VoxCeleb.py | 15 +++++++++++++-- cvat/apps/dataset_manager/formats/Voxpopuli.py | 15 +++++++++++++-- cvat/apps/dataset_manager/formats/common_voice.py | 15 +++++++++++++-- cvat/apps/dataset_manager/formats/librispeech.py | 15 +++++++++++++-- cvat/apps/dataset_manager/formats/tedlium.py | 15 +++++++++++++-- 7 files changed, 91 insertions(+), 14 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/LibriVox.py b/cvat/apps/dataset_manager/formats/LibriVox.py index 4ebbe26ce06c..55680859bab8 100644 --- a/cvat/apps/dataset_manager/formats/LibriVox.py +++ b/cvat/apps/dataset_manager/formats/LibriVox.py @@ -39,10 +39,21 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language",-1)) + language_id = int(record.get("language", -1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py index 220433ab3a96..5057e16e9d6f 100644 --- a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py +++ b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py @@ -39,10 +39,21 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language",-1)) + language_id = int(record.get("language", -1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/VoxCeleb.py b/cvat/apps/dataset_manager/formats/VoxCeleb.py index 5882ff546603..139b7473a30d 100644 --- a/cvat/apps/dataset_manager/formats/VoxCeleb.py +++ b/cvat/apps/dataset_manager/formats/VoxCeleb.py @@ -39,10 +39,21 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language",-1)) + language_id = int(record.get("language", -1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/Voxpopuli.py b/cvat/apps/dataset_manager/formats/Voxpopuli.py index 2abb303d8d1e..1f6905f6e9e0 100644 --- a/cvat/apps/dataset_manager/formats/Voxpopuli.py +++ b/cvat/apps/dataset_manager/formats/Voxpopuli.py @@ -39,10 +39,21 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language",-1)) + language_id = int(record.get("language", -1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py index c9195079dde0..bc525c91cd8e 100644 --- a/cvat/apps/dataset_manager/formats/common_voice.py +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -39,10 +39,21 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language",-1)) + language_id = int(record.get("language", -1)) attributes = [] diff --git a/cvat/apps/dataset_manager/formats/librispeech.py b/cvat/apps/dataset_manager/formats/librispeech.py index 056f76c4ff38..4ec6e7543699 100644 --- a/cvat/apps/dataset_manager/formats/librispeech.py +++ b/cvat/apps/dataset_manager/formats/librispeech.py @@ -60,10 +60,21 @@ def load_anno(file_object, annotations): } ) - language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language",-1)) + language_id = int(record.get("language", -1)) shapes_data = [ { diff --git a/cvat/apps/dataset_manager/formats/tedlium.py b/cvat/apps/dataset_manager/formats/tedlium.py index 98c3252abbb7..3cb49283d674 100644 --- a/cvat/apps/dataset_manager/formats/tedlium.py +++ b/cvat/apps/dataset_manager/formats/tedlium.py @@ -39,10 +39,21 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = {0: "en-US", 1: "es-ES", 2: "fr-FR", 3: "zh-CN", 4: "hi-IN", 5: "ar-EG", 6: "pt-BR", 7: "ja-JP", 8: "de-DE", 9: "ru-RU"} + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language",-1)) + language_id = int(record.get("language", -1)) attributes = [] From 42dea7c64cbe794c57a09196edf4350fe845d5f4 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Sat, 14 Sep 2024 10:23:52 +0530 Subject: [PATCH 06/14] formatting changes --- cvat/apps/dataset_manager/task.py | 601 ++++++++++++++++++++---------- 1 file changed, 394 insertions(+), 207 deletions(-) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index a57edfb2acef..d27223f3334b 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -18,7 +18,11 @@ from copy import deepcopy from enum import Enum from tempfile import TemporaryDirectory -from datumaro.components.errors import DatasetError, DatasetImportError, DatasetNotFoundError +from datumaro.components.errors import ( + DatasetError, + DatasetImportError, + DatasetNotFoundError, +) from django.db import transaction from django.db.models.query import Prefetch @@ -35,21 +39,29 @@ from cvat.apps.profiler import silk_profile from cvat.apps.dataset_manager.annotation import AnnotationIR, AnnotationManager -from cvat.apps.dataset_manager.bindings import TaskData, JobData, CvatImportError, CvatDatasetNotFoundError +from cvat.apps.dataset_manager.bindings import ( + TaskData, + JobData, + CvatImportError, + CvatDatasetNotFoundError, +) from cvat.apps.dataset_manager.formats.registry import make_exporter, make_importer from cvat.apps.dataset_manager.util import add_prefetch_fields, bulk_create, get_cached dlogger = DatasetLogManager() slogger = ServerLogManager(__name__) + class dotdict(OrderedDict): """dot.notation access to dictionary attributes""" + __getattr__ = OrderedDict.get __setattr__ = OrderedDict.__setitem__ __delattr__ = OrderedDict.__delitem__ __eq__ = lambda self, other: self.id == other.id __hash__ = lambda self: self.id + class PatchAction(str, Enum): CREATE = "create" UPDATE = "update" @@ -62,6 +74,7 @@ def values(cls): def __str__(self): return self.value + def merge_table_rows(rows, keys_for_merge, field_id): # It is necessary to keep a stable order of original rows # (e.g. for tracked boxes). Otherwise prev_box.frame can be bigger @@ -78,7 +91,7 @@ def merge_table_rows(rows, keys_for_merge, field_id): merged_rows[row_id][key] = [] for key in keys_for_merge: - item = dotdict({v.split('__', 1)[-1]:row[v] for v in keys_for_merge[key]}) + item = dotdict({v.split("__", 1)[-1]: row[v] for v in keys_for_merge[key]}) if item.id is not None: merged_rows[row_id][key].append(item) @@ -90,35 +103,39 @@ def merge_table_rows(rows, keys_for_merge, field_id): return list(merged_rows.values()) + class JobAnnotation: @classmethod def add_prefetch_info(cls, queryset): assert issubclass(queryset.model, models.Job) - label_qs = add_prefetch_fields(models.Label.objects.all(), [ - 'skeleton', - 'parent', - 'attributespec_set', - ]) + label_qs = add_prefetch_fields( + models.Label.objects.all(), + [ + "skeleton", + "parent", + "attributespec_set", + ], + ) label_qs = JobData.add_prefetch_info(label_qs) return queryset.select_related( - 'segment', - 'segment__task', + "segment", + "segment__task", ).prefetch_related( - 'segment__task__project', - 'segment__task__owner', - 'segment__task__assignee', - 'segment__task__project__owner', - 'segment__task__project__assignee', - - Prefetch('segment__task__data', - queryset=models.Data.objects.select_related('video').prefetch_related( - Prefetch('images', queryset=models.Image.objects.order_by('frame')) - )), - - Prefetch('segment__task__label_set', queryset=label_qs), - Prefetch('segment__task__project__label_set', queryset=label_qs), + "segment__task__project", + "segment__task__owner", + "segment__task__assignee", + "segment__task__project__owner", + "segment__task__project__assignee", + Prefetch( + "segment__task__data", + queryset=models.Data.objects.select_related("video").prefetch_related( + Prefetch("images", queryset=models.Image.objects.order_by("frame")) + ), + ), + Prefetch("segment__task__label_set", queryset=label_qs), + Prefetch("segment__task__project__label_set", queryset=label_qs), ) def __init__(self, pk, *, is_prefetched=False, queryset=None): @@ -126,9 +143,9 @@ def __init__(self, pk, *, is_prefetched=False, queryset=None): queryset = self.add_prefetch_info(models.Job.objects) if is_prefetched: - self.db_job: models.Job = queryset.select_related( - 'segment__task' - ).select_for_update().get(id=pk) + self.db_job: models.Job = ( + queryset.select_related("segment__task").select_for_update().get(id=pk) + ) else: self.db_job: models.Job = get_cached(queryset, pk=int(pk)) @@ -137,9 +154,14 @@ def __init__(self, pk, *, is_prefetched=False, queryset=None): self.stop_frame = db_segment.stop_frame self.ir_data = AnnotationIR(db_segment.task.dimension) - self.db_labels = {db_label.id:db_label - for db_label in (db_segment.task.project.label_set.all() - if db_segment.task.project_id else db_segment.task.label_set.all())} + self.db_labels = { + db_label.id: db_label + for db_label in ( + db_segment.task.project.label_set.all() + if db_segment.task.project_id + else db_segment.task.label_set.all() + ) + } self.db_attributes = {} for db_label in self.db_labels.values(): @@ -149,14 +171,20 @@ def __init__(self, pk, *, is_prefetched=False, queryset=None): "all": OrderedDict(), } for db_attr in db_label.attributespec_set.all(): - default_value = dotdict([ - ('spec_id', db_attr.id), - ('value', db_attr.default_value), - ]) + default_value = dotdict( + [ + ("spec_id", db_attr.id), + ("value", db_attr.default_value), + ] + ) if db_attr.mutable: - self.db_attributes[db_label.id]["mutable"][db_attr.id] = default_value + self.db_attributes[db_label.id]["mutable"][ + db_attr.id + ] = default_value else: - self.db_attributes[db_label.id]["immutable"][db_attr.id] = default_value + self.db_attributes[db_label.id]["immutable"][ + db_attr.id + ] = default_value self.db_attributes[db_label.id]["all"][db_attr.id] = default_value @@ -239,14 +267,20 @@ def create_tracks(tracks, parent_track=None): track_attributes = track.pop("attributes", []) shapes = track.pop("shapes") elements = track.pop("elements", []) - db_track = models.LabeledTrack(job=self.db_job, parent=parent_track, **track) + db_track = models.LabeledTrack( + job=self.db_job, parent=parent_track, **track + ) self._validate_label_for_existence(db_track.label_id) for attr in track_attributes: - db_attr_val = models.LabeledTrackAttributeVal(**attr, track_id=len(db_tracks)) + db_attr_val = models.LabeledTrackAttributeVal( + **attr, track_id=len(db_tracks) + ) - self._validate_attribute_for_existence(db_attr_val, db_track.label_id, "immutable") + self._validate_attribute_for_existence( + db_attr_val, db_track.label_id, "immutable" + ) db_track_attr_vals.append(db_attr_val) @@ -255,9 +289,13 @@ def create_tracks(tracks, parent_track=None): db_shape = models.TrackedShape(**shape, track_id=len(db_tracks)) for attr in shape_attributes: - db_attr_val = models.TrackedShapeAttributeVal(**attr, shape_id=len(db_shapes)) + db_attr_val = models.TrackedShapeAttributeVal( + **attr, shape_id=len(db_shapes) + ) - self._validate_attribute_for_existence(db_attr_val, db_track.label_id, "mutable") + self._validate_attribute_for_existence( + db_attr_val, db_track.label_id, "mutable" + ) db_shape_attr_vals.append(db_attr_val) @@ -274,7 +312,7 @@ def create_tracks(tracks, parent_track=None): db_tracks = bulk_create( db_model=models.LabeledTrack, objects=db_tracks, - flt_param={"job_id": self.db_job.id} + flt_param={"job_id": self.db_job.id}, ) for db_attr_val in db_track_attr_vals: @@ -283,7 +321,7 @@ def create_tracks(tracks, parent_track=None): bulk_create( db_model=models.LabeledTrackAttributeVal, objects=db_track_attr_vals, - flt_param={} + flt_param={}, ) for db_shape in db_shapes: @@ -292,7 +330,7 @@ def create_tracks(tracks, parent_track=None): db_shapes = bulk_create( db_model=models.TrackedShape, objects=db_shapes, - flt_param={"track__job_id": self.db_job.id} + flt_param={"track__job_id": self.db_job.id}, ) for db_attr_val in db_shape_attr_vals: @@ -301,7 +339,7 @@ def create_tracks(tracks, parent_track=None): bulk_create( db_model=models.TrackedShapeAttributeVal, objects=db_shape_attr_vals, - flt_param={} + flt_param={}, ) shape_idx = 0 @@ -326,14 +364,20 @@ def create_shapes(shapes, parent_shape=None): shape_elements = shape.pop("elements", []) # FIXME: need to clamp points (be sure that all of them inside the image) # Should we check here or implement a validator? - db_shape = models.LabeledShape(job=self.db_job, parent=parent_shape, **shape) + db_shape = models.LabeledShape( + job=self.db_job, parent=parent_shape, **shape + ) self._validate_label_for_existence(db_shape.label_id) for attr in attributes: - db_attr_val = models.LabeledShapeAttributeVal(**attr, shape_id=len(db_shapes)) + db_attr_val = models.LabeledShapeAttributeVal( + **attr, shape_id=len(db_shapes) + ) - self._validate_attribute_for_existence(db_attr_val, db_shape.label_id, "all") + self._validate_attribute_for_existence( + db_attr_val, db_shape.label_id, "all" + ) db_attr_vals.append(db_attr_val) @@ -345,7 +389,7 @@ def create_shapes(shapes, parent_shape=None): db_shapes = bulk_create( db_model=models.LabeledShape, objects=db_shapes, - flt_param={"job_id": self.db_job.id} + flt_param={"job_id": self.db_job.id}, ) for db_attr_val in db_attr_vals: @@ -354,7 +398,7 @@ def create_shapes(shapes, parent_shape=None): bulk_create( db_model=models.LabeledShapeAttributeVal, objects=db_attr_vals, - flt_param={} + flt_param={}, ) for shape, db_shape in zip(shapes, db_shapes): @@ -378,7 +422,9 @@ def _save_tags_to_db(self, tags): for attr in attributes: db_attr_val = models.LabeledImageAttributeVal(**attr) - self._validate_attribute_for_existence(db_attr_val, db_tag.label_id, "all") + self._validate_attribute_for_existence( + db_attr_val, db_tag.label_id, "all" + ) db_attr_val.tag_id = len(db_tags) db_attr_vals.append(db_attr_val) @@ -389,16 +435,14 @@ def _save_tags_to_db(self, tags): db_tags = bulk_create( db_model=models.LabeledImage, objects=db_tags, - flt_param={"job_id": self.db_job.id} + flt_param={"job_id": self.db_job.id}, ) for db_attr_val in db_attr_vals: db_attr_val.image_id = db_tags[db_attr_val.tag_id].id bulk_create( - db_model=models.LabeledImageAttributeVal, - objects=db_attr_vals, - flt_param={} + db_model=models.LabeledImageAttributeVal, objects=db_attr_vals, flt_param={} ) for tag, db_tag in zip(tags, db_tags): @@ -473,9 +517,9 @@ def _delete(self, data=None): # It is not important for us that data had some "invalid" objects # which were skipped (not actually deleted). The main idea is to # say that all requested objects are absent in DB after the method. - self.ir_data.tags = data['tags'] - self.ir_data.shapes = data['shapes'] - self.ir_data.tracks = data['tracks'] + self.ir_data.tags = data["tags"] + self.ir_data.shapes = data["shapes"] + self.ir_data.tracks = data["tracks"] labeledimage_set.delete() labeledshape_set.delete() @@ -501,40 +545,50 @@ def _extend_attributes(attributeval_set, default_attribute_values): shape_attribute_specs_set = set(attr.spec_id for attr in attributeval_set) for db_attr in default_attribute_values: if db_attr.spec_id not in shape_attribute_specs_set: - attributeval_set.append(dotdict([ - ('spec_id', db_attr.spec_id), - ('value', db_attr.value), - ])) + attributeval_set.append( + dotdict( + [ + ("spec_id", db_attr.spec_id), + ("value", db_attr.value), + ] + ) + ) def _init_tags_from_db(self): # NOTE: do not use .prefetch_related() with .values() since it's useless: # https://github.com/cvat-ai/cvat/pull/7748#issuecomment-2063695007 - db_tags = self.db_job.labeledimage_set.values( - 'id', - 'frame', - 'label_id', - 'group', - 'source', - 'labeledimageattributeval__spec_id', - 'labeledimageattributeval__value', - 'labeledimageattributeval__id', - ).order_by('frame').iterator(chunk_size=2000) + db_tags = ( + self.db_job.labeledimage_set.values( + "id", + "frame", + "label_id", + "group", + "source", + "labeledimageattributeval__spec_id", + "labeledimageattributeval__value", + "labeledimageattributeval__id", + ) + .order_by("frame") + .iterator(chunk_size=2000) + ) db_tags = merge_table_rows( rows=db_tags, keys_for_merge={ "labeledimageattributeval_set": [ - 'labeledimageattributeval__spec_id', - 'labeledimageattributeval__value', - 'labeledimageattributeval__id', + "labeledimageattributeval__spec_id", + "labeledimageattributeval__value", + "labeledimageattributeval__id", ], }, - field_id='id', + field_id="id", ) for db_tag in db_tags: - self._extend_attributes(db_tag.labeledimageattributeval_set, - self.db_attributes[db_tag.label_id]["all"].values()) + self._extend_attributes( + db_tag.labeledimageattributeval_set, + self.db_attributes[db_tag.label_id]["all"].values(), + ) serializer = serializers.LabeledImageSerializerFromDB(db_tags, many=True) self.ir_data.tags = serializer.data @@ -542,47 +596,53 @@ def _init_tags_from_db(self): def _init_shapes_from_db(self): # NOTE: do not use .prefetch_related() with .values() since it's useless: # https://github.com/cvat-ai/cvat/pull/7748#issuecomment-2063695007 - db_shapes = self.db_job.labeledshape_set.values( - 'id', - 'label_id', - 'type', - 'frame', - 'group', - 'source', - 'occluded', - 'outside', - 'z_order', - 'rotation', - 'points', - 'parent', - 'transcript', - 'gender', - 'age', - 'locale', - 'accent', - 'emotion', - 'labeledshapeattributeval__spec_id', - 'labeledshapeattributeval__value', - 'labeledshapeattributeval__id', - ).order_by('frame').iterator(chunk_size=2000) + db_shapes = ( + self.db_job.labeledshape_set.values( + "id", + "label_id", + "type", + "frame", + "group", + "source", + "occluded", + "outside", + "z_order", + "rotation", + "points", + "parent", + "transcript", + "gender", + "age", + "locale", + "accent", + "emotion", + "labeledshapeattributeval__spec_id", + "labeledshapeattributeval__value", + "labeledshapeattributeval__id", + ) + .order_by("frame") + .iterator(chunk_size=2000) + ) db_shapes = merge_table_rows( rows=db_shapes, keys_for_merge={ - 'labeledshapeattributeval_set': [ - 'labeledshapeattributeval__spec_id', - 'labeledshapeattributeval__value', - 'labeledshapeattributeval__id', + "labeledshapeattributeval_set": [ + "labeledshapeattributeval__spec_id", + "labeledshapeattributeval__value", + "labeledshapeattributeval__id", ], }, - field_id='id', + field_id="id", ) shapes = {} elements = {} for db_shape in db_shapes: - self._extend_attributes(db_shape.labeledshapeattributeval_set, - self.db_attributes[db_shape.label_id]["all"].values()) + self._extend_attributes( + db_shape.labeledshapeattributeval_set, + self.db_attributes[db_shape.label_id]["all"].values(), + ) if db_shape.parent is None: db_shape.elements = [] @@ -595,34 +655,40 @@ def _init_shapes_from_db(self): for shape_id, shape_elements in elements.items(): shapes[shape_id].elements = shape_elements - serializer = serializers.LabeledShapeSerializerFromDB(list(shapes.values()), many=True) + serializer = serializers.LabeledShapeSerializerFromDB( + list(shapes.values()), many=True + ) self.ir_data.shapes = serializer.data def _init_tracks_from_db(self): # NOTE: do not use .prefetch_related() with .values() since it's useless: # https://github.com/cvat-ai/cvat/pull/7748#issuecomment-2063695007 - db_tracks = self.db_job.labeledtrack_set.values( - "id", - "frame", - "label_id", - "group", - "source", - "parent", - "labeledtrackattributeval__spec_id", - "labeledtrackattributeval__value", - "labeledtrackattributeval__id", - "trackedshape__type", - "trackedshape__occluded", - "trackedshape__z_order", - "trackedshape__rotation", - "trackedshape__points", - "trackedshape__id", - "trackedshape__frame", - "trackedshape__outside", - "trackedshape__trackedshapeattributeval__spec_id", - "trackedshape__trackedshapeattributeval__value", - "trackedshape__trackedshapeattributeval__id", - ).order_by('id', 'trackedshape__frame').iterator(chunk_size=2000) + db_tracks = ( + self.db_job.labeledtrack_set.values( + "id", + "frame", + "label_id", + "group", + "source", + "parent", + "labeledtrackattributeval__spec_id", + "labeledtrackattributeval__value", + "labeledtrackattributeval__id", + "trackedshape__type", + "trackedshape__occluded", + "trackedshape__z_order", + "trackedshape__rotation", + "trackedshape__points", + "trackedshape__id", + "trackedshape__frame", + "trackedshape__outside", + "trackedshape__trackedshapeattributeval__spec_id", + "trackedshape__trackedshapeattributeval__value", + "trackedshape__trackedshapeattributeval__id", + ) + .order_by("id", "trackedshape__frame") + .iterator(chunk_size=2000) + ) db_tracks = merge_table_rows( rows=db_tracks, @@ -632,7 +698,7 @@ def _init_tracks_from_db(self): "labeledtrackattributeval__value", "labeledtrackattributeval__id", ], - "trackedshape_set":[ + "trackedshape_set": [ "trackedshape__type", "trackedshape__occluded", "trackedshape__z_order", @@ -652,28 +718,40 @@ def _init_tracks_from_db(self): tracks = {} elements = {} for db_track in db_tracks: - db_track["trackedshape_set"] = merge_table_rows(db_track["trackedshape_set"], { - 'trackedshapeattributeval_set': [ - 'trackedshapeattributeval__value', - 'trackedshapeattributeval__spec_id', - 'trackedshapeattributeval__id', - ] - }, 'id') + db_track["trackedshape_set"] = merge_table_rows( + db_track["trackedshape_set"], + { + "trackedshapeattributeval_set": [ + "trackedshapeattributeval__value", + "trackedshapeattributeval__spec_id", + "trackedshapeattributeval__id", + ] + }, + "id", + ) # A result table can consist many equal rows for track/shape attributes # We need filter unique attributes manually - db_track["labeledtrackattributeval_set"] = list(set(db_track["labeledtrackattributeval_set"])) - self._extend_attributes(db_track.labeledtrackattributeval_set, - self.db_attributes[db_track.label_id]["immutable"].values()) + db_track["labeledtrackattributeval_set"] = list( + set(db_track["labeledtrackattributeval_set"]) + ) + self._extend_attributes( + db_track.labeledtrackattributeval_set, + self.db_attributes[db_track.label_id]["immutable"].values(), + ) - default_attribute_values = self.db_attributes[db_track.label_id]["mutable"].values() + default_attribute_values = self.db_attributes[db_track.label_id][ + "mutable" + ].values() for db_shape in db_track["trackedshape_set"]: db_shape["trackedshapeattributeval_set"] = list( set(db_shape["trackedshapeattributeval_set"]) ) # in case of trackedshapes need to interpolate attriute values and extend it # by previous shape attribute values (not default values) - self._extend_attributes(db_shape["trackedshapeattributeval_set"], default_attribute_values) + self._extend_attributes( + db_shape["trackedshapeattributeval_set"], default_attribute_values + ) default_attribute_values = db_shape["trackedshapeattributeval_set"] if db_track.parent is None: @@ -687,11 +765,13 @@ def _init_tracks_from_db(self): for track_id, track_elements in elements.items(): tracks[track_id].elements = track_elements - serializer = serializers.LabeledTrackSerializerFromDB(list(tracks.values()), many=True) + serializer = serializers.LabeledTrackSerializerFromDB( + list(tracks.values()), many=True + ) self.ir_data.tracks = serializer.data def _init_version_from_db(self): - self.ir_data.version = 0 # FIXME: should be removed in the future + self.ir_data.version = 0 # FIXME: should be removed in the future def init_from_db(self): self._init_tags_from_db() @@ -703,7 +783,7 @@ def init_from_db(self): def data(self): return self.ir_data.data - def export(self, dst_file, exporter, host='', **options): + def export(self, dst_file, exporter, host="", **options): job_data = JobData( annotation_ir=self.ir_data, db_job=self.db_job, @@ -742,30 +822,40 @@ def import_annotations(self, src_file, importer, **options): self.create(job_data.data.slice(self.start_frame, self.stop_frame).serialize()) + class TaskAnnotation: def __init__(self, pk): self.db_task = models.Task.objects.prefetch_related( - Prefetch('data__images', queryset=models.Image.objects.order_by('frame')) + Prefetch("data__images", queryset=models.Image.objects.order_by("frame")) ).get(id=pk) # Postgres doesn't guarantee an order by default without explicit order_by - self.db_jobs = models.Job.objects.select_related("segment").filter( - segment__task_id=pk, type=models.JobType.ANNOTATION.value, - ).order_by('id') + self.db_jobs = ( + models.Job.objects.select_related("segment") + .filter( + segment__task_id=pk, + type=models.JobType.ANNOTATION.value, + ) + .order_by("id") + ) self.ir_data = AnnotationIR(self.db_task.dimension) def reset(self): self.ir_data.reset() def _patch_data(self, data, action): - _data = data if isinstance(data, AnnotationIR) else AnnotationIR(self.db_task.dimension, data) + _data = ( + data + if isinstance(data, AnnotationIR) + else AnnotationIR(self.db_task.dimension, data) + ) splitted_data = {} jobs = {} for db_job in self.db_jobs: jid = db_job.id start = db_job.segment.start_frame stop = db_job.segment.stop_frame - jobs[jid] = { "start": start, "stop": stop } + jobs[jid] = {"start": start, "stop": stop} splitted_data[jid] = _data.slice(start, stop) for jid, job_data in splitted_data.items(): @@ -776,7 +866,9 @@ def _patch_data(self, data, action): _data.data = patch_job_data(jid, job_data, action) if _data.version > self.ir_data.version: self.ir_data.version = _data.version - self._merge_data(_data, jobs[jid]["start"], self.db_task.overlap, self.db_task.dimension) + self._merge_data( + _data, jobs[jid]["start"], self.db_task.overlap, self.db_task.dimension + ) def _merge_data(self, data, start_frame, overlap, dimension): annotation_manager = AnnotationManager(self.ir_data) @@ -815,7 +907,7 @@ def init_from_db(self): dimension = self.db_task.dimension self._merge_data(annotation.ir_data, start_frame, overlap, dimension) - def export(self, dst_file, exporter, host='', **options): + def export(self, dst_file, exporter, host="", **options): task_data = TaskData( annotation_ir=self.ir_data, db_task=self.db_task, @@ -867,6 +959,7 @@ def get_job_data(pk): return annotation.data + @silk_profile(name="POST job data") @transaction.atomic def put_job_data(pk, data): @@ -875,6 +968,7 @@ def put_job_data(pk, data): return annotation.data + @silk_profile(name="UPDATE job data") @plugin_decorator @transaction.atomic @@ -889,12 +983,14 @@ def patch_job_data(pk, data, action): return annotation.data + @silk_profile(name="DELETE job data") @transaction.atomic def delete_job_data(pk): annotation = JobAnnotation(pk) annotation.delete() + def export_job(job_id, dst_file, format_name, server_url=None, save_images=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. @@ -906,18 +1002,24 @@ def export_job(job_id, dst_file, format_name, server_url=None, save_images=False job.init_from_db() exporter = make_exporter(format_name) - with open(dst_file, 'wb') as f: + with open(dst_file, "wb") as f: job.export(f, exporter, host=server_url, save_images=save_images) -def jobChunkPathGetter(db_data, start, stop, task_dimension, data_quality, data_num, job): + +def jobChunkPathGetter( + db_data, start, stop, task_dimension, data_quality, data_num, job +): # db_data = Task Data frame_provider = FrameProvider(db_data, task_dimension) # self.type = data_type number = int(data_num) if data_num is not None else None - quality = FrameProvider.Quality.COMPRESSED \ - if data_quality == 'compressed' else FrameProvider.Quality.ORIGINAL + quality = ( + FrameProvider.Quality.COMPRESSED + if data_quality == "compressed" + else FrameProvider.Quality.ORIGINAL + ) path = os.path.realpath(frame_provider.get_chunk(number, quality)) # pylint: disable=superfluous-parens @@ -926,10 +1028,13 @@ def jobChunkPathGetter(db_data, start, stop, task_dimension, data_quality, data_ return path + def chunk_annotation_audio(concat_array, output_folder, annotations): # Convert NumPy array to AudioSegment - sr = 44100 # sampling rate - audio_segment = AudioSegment(concat_array.tobytes(), frame_rate=sr, channels=1, sample_width=4) + sr = 44100 # sampling rate + audio_segment = AudioSegment( + concat_array.tobytes(), frame_rate=sr, channels=1, sample_width=4 + ) try: y = audio_segment.get_array_of_samples() @@ -940,8 +1045,8 @@ def chunk_annotation_audio(concat_array, output_folder, annotations): for _, shape in enumerate(annotations, 1): - start_time = min(shape['points'][:2]) - end_time = max(shape['points'][2:]) + start_time = min(shape["points"][:2]) + end_time = max(shape["points"][2:]) # Convert time points to sample indices start_sample = int(start_time * sr) @@ -958,9 +1063,12 @@ def chunk_annotation_audio(concat_array, output_folder, annotations): return data -def create_annotation_clips_zip(annotation_audio_chunk_file_paths, meta_data_file_path, output_folder, dst_file): - data_folder = os.path.join(output_folder, 'data') - clips_folder = os.path.join(data_folder, 'clips') + +def create_annotation_clips_zip( + annotation_audio_chunk_file_paths, meta_data_file_path, output_folder, dst_file +): + data_folder = os.path.join(output_folder, "data") + clips_folder = os.path.join(data_folder, "clips") os.makedirs(clips_folder, exist_ok=True) # Copy audio files to clips folder @@ -972,8 +1080,8 @@ def create_annotation_clips_zip(annotation_audio_chunk_file_paths, meta_data_fil shutil.copy(meta_data_file_path, os.path.join(data_folder, "data.tsv")) # Create zip file - zip_filename = os.path.join(output_folder, 'common_voice.zip') - with zipfile.ZipFile(zip_filename, 'w') as zipf: + zip_filename = os.path.join(output_folder, "common_voice.zip") + with zipfile.ZipFile(zip_filename, "w") as zipf: for root, _, files in os.walk(data_folder): for file in files: file_path = os.path.join(root, file) @@ -985,6 +1093,7 @@ def create_annotation_clips_zip(annotation_audio_chunk_file_paths, meta_data_fil # Move the zip to the dst_file location shutil.move(zip_filename, dst_file) + def get_np_audio_array_from_job(job_id): with transaction.atomic(): @@ -994,17 +1103,25 @@ def get_np_audio_array_from_job(job_id): job_data_chunk_size = job.db_job.segment.task.data.chunk_size task_dimension = job.db_job.segment.task.dimension - start = job.start_frame/job_data_chunk_size - stop = job.stop_frame/job_data_chunk_size + start = job.start_frame / job_data_chunk_size + stop = job.stop_frame / job_data_chunk_size audio_array_buffer = [] - for i in range(math.trunc(start), math.trunc(stop)+1): + for i in range(math.trunc(start), math.trunc(stop) + 1): db_job = job.db_job # data_type = "chunk" data_num = i - data_quality = 'compressed' - - chunk_path = jobChunkPathGetter(job.db_job.segment.task.data, job.start_frame, job.stop_frame, task_dimension, data_quality, data_num, db_job) + data_quality = "compressed" + + chunk_path = jobChunkPathGetter( + job.db_job.segment.task.data, + job.start_frame, + job.stop_frame, + task_dimension, + data_quality, + data_num, + db_job, + ) _, audio_data = wavfile.read(chunk_path) @@ -1017,6 +1134,7 @@ def get_np_audio_array_from_job(job_id): return concat_array + def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): concat_array = get_np_audio_array_from_job(job_id) @@ -1047,7 +1165,6 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): slogger.glob.debug("JOB LABELS ATTRIBUTES") slogger.glob.debug(json.dumps(attributes_list)) - slogger.glob.debug("JOB LABELS") slogger.glob.debug(json.dumps(labels_list)) @@ -1058,7 +1175,9 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): # wave_file.setframerate(44100) # wave_file.writeframes(concat_array) - annotation_audio_chunk_file_paths = chunk_annotation_audio(concat_array, temp_dir, annotations) + annotation_audio_chunk_file_paths = chunk_annotation_audio( + concat_array, temp_dir, annotations + ) for i, annotation in enumerate(annotations): entry = { @@ -1072,13 +1191,15 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): "emotion": annotation.get("emotion", ""), "label": labels_mapping[annotation["label_id"]]["name"], "start": annotation["points"][0], - "end": annotation["points"][3] + "end": annotation["points"][3], } attributes = annotation.get("attributes", []) for idx, attr in enumerate(attributes): annotation_attribute_id = attr.get("spec_id", "") - label_attributes = labels_mapping[annotation["label_id"]].get("attributes", {}) + label_attributes = labels_mapping[annotation["label_id"]].get( + "attributes", {} + ) annotation_attribute = label_attributes.get(annotation_attribute_id, {}) attribute_name = annotation_attribute.get("name", f"attribute_{idx}_name") attribute_val = attr.get("value", "") @@ -1094,6 +1215,7 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): slogger.glob.debug(json.dumps(annotations)) return final_data, annotation_audio_chunk_file_paths + def convert_annotation_data_format(data, format_name): if format_name == "Common Voice": return data @@ -1109,15 +1231,28 @@ def convert_annotation_data_format(data, format_name): "text": entry["sentence"], "label": entry["label"], "start": entry["start"], - "end": entry["end"] + "end": entry["end"], } - attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] + attribute_keys = [ + key for key in entry.keys() if key.startswith("attribute_") + ] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) return formatted_data elif format_name == "VoxPopuli": - language_id_mapping = {"en-US": 0,"es-ES":1,"fr-FR":2,"zh-CN":3,"hi-IN":4,"ar-EG":5,"pt-BR":6,"ja-JP":7,"de-DE":8,"ru-RU":9} + language_id_mapping = { + "en-US": 0, + "es-ES": 1, + "fr-FR": 2, + "zh-CN": 3, + "hi-IN": 4, + "ar-EG": 5, + "pt-BR": 6, + "ja-JP": 7, + "de-DE": 8, + "ru-RU": 9, + } formatted_data = [] for entry in data: formatted_entry = { @@ -1133,9 +1268,11 @@ def convert_annotation_data_format(data, format_name): "accent": entry["accents"], "label": entry["label"], "start": entry["start"], - "end": entry["end"] + "end": entry["end"], } - attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] + attribute_keys = [ + key for key in entry.keys() if key.startswith("attribute_") + ] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) @@ -1152,9 +1289,11 @@ def convert_annotation_data_format(data, format_name): "speaker_id": "", "label": entry["label"], "start": entry["start"], - "end": entry["end"] + "end": entry["end"], } - attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] + attribute_keys = [ + key for key in entry.keys() if key.startswith("attribute_") + ] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) @@ -1167,15 +1306,17 @@ def convert_annotation_data_format(data, format_name): "file": entry["path"], "text": entry["sentence"], "gender": entry["gender"], - "nationality" : "", + "nationality": "", "age": entry["age"], "id": str(uuid.uuid4()), "speaker_id": "", "label": entry["label"], "start": entry["start"], - "end": entry["end"] + "end": entry["end"], } - attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] + attribute_keys = [ + key for key in entry.keys() if key.startswith("attribute_") + ] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) @@ -1194,15 +1335,28 @@ def convert_annotation_data_format(data, format_name): "speaker_id": "", "label": entry["label"], "start": entry["start"], - "end": entry["end"] + "end": entry["end"], } - attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] + attribute_keys = [ + key for key in entry.keys() if key.startswith("attribute_") + ] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) return formatted_data elif format_name == "LibriVox": - language_id_mapping = {"en-US": 0,"es-ES":1,"fr-FR":2,"zh-CN":3,"hi-IN":4,"ar-EG":5,"pt-BR":6,"ja-JP":7,"de-DE":8,"ru-RU":9} + language_id_mapping = { + "en-US": 0, + "es-ES": 1, + "fr-FR": 2, + "zh-CN": 3, + "hi-IN": 4, + "ar-EG": 5, + "pt-BR": 6, + "ja-JP": 7, + "de-DE": 8, + "ru-RU": 9, + } formatted_data = [] for entry in data: formatted_entry = { @@ -1218,9 +1372,11 @@ def convert_annotation_data_format(data, format_name): "speaker_id": "", "label": entry["label"], "start": entry["start"], - "end": entry["end"] + "end": entry["end"], } - attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] + attribute_keys = [ + key for key in entry.keys() if key.startswith("attribute_") + ] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) @@ -1228,7 +1384,10 @@ def convert_annotation_data_format(data, format_name): return data -def export_audino_job(job_id, dst_file, format_name, server_url=None, save_images=False): + +def export_audino_job( + job_id, dst_file, format_name, server_url=None, save_images=False +): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. @@ -1246,7 +1405,9 @@ def export_audino_job(job_id, dst_file, format_name, server_url=None, save_image with TemporaryDirectory(dir=temp_dir_base) as temp_dir: - final_data, annotation_audio_chunk_file_paths = get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir) + final_data, annotation_audio_chunk_file_paths = get_audio_job_export_data( + job_id, dst_file, job, temp_dir_base, temp_dir + ) # Convert the data into a format final_data = convert_annotation_data_format(final_data, format_name) @@ -1254,15 +1415,23 @@ def export_audino_job(job_id, dst_file, format_name, server_url=None, save_image df = pd.DataFrame(final_data) # sorting by start column in ascending order - df = df.sort_values(by='start') + df = df.sort_values(by="start") # Saving the metadata file meta_data_file_path = os.path.join(temp_dir_base, str(job_id) + ".tsv") - df.to_csv(meta_data_file_path, sep='\t', index=False) + df.to_csv(meta_data_file_path, sep="\t", index=False) + + create_annotation_clips_zip( + annotation_audio_chunk_file_paths, + meta_data_file_path, + temp_dir_base, + dst_file, + ) - create_annotation_clips_zip(annotation_audio_chunk_file_paths, meta_data_file_path, temp_dir_base, dst_file) -def export_audino_task(task_id, dst_file, format_name, server_url=None, save_images=False): +def export_audino_task( + task_id, dst_file, format_name, server_url=None, save_images=False +): with transaction.atomic(): task = TaskAnnotation(task_id) @@ -1284,7 +1453,9 @@ def export_audino_task(task_id, dst_file, format_name, server_url=None, save_ima job = JobAnnotation(job.id) job.init_from_db() - final_data, annotation_audio_chunk_file_paths = get_audio_job_export_data(job.db_job.id, dst_file, job, temp_dir_base, temp_dir) + final_data, annotation_audio_chunk_file_paths = get_audio_job_export_data( + job.db_job.id, dst_file, job, temp_dir_base, temp_dir + ) # Convert the data into a format final_data = convert_annotation_data_format(final_data, format_name) @@ -1295,17 +1466,27 @@ def export_audino_task(task_id, dst_file, format_name, server_url=None, save_ima # Saving the metadata file meta_data_file_path = os.path.join(temp_dir_base, str(task_id) + ".tsv") - final_task_data_flatten = [item for sublist in final_task_data for item in sublist] - final_annotation_chunk_paths_flatten = [item for sublist in final_annotation_chunk_paths for item in sublist] + final_task_data_flatten = [ + item for sublist in final_task_data for item in sublist + ] + final_annotation_chunk_paths_flatten = [ + item for sublist in final_annotation_chunk_paths for item in sublist + ] df = pd.DataFrame(final_task_data_flatten) # sorting by start column in pandas dataframe - df = df.sort_values(by='start') + df = df.sort_values(by="start") - df.to_csv(meta_data_file_path, sep='\t', index=False) + df.to_csv(meta_data_file_path, sep="\t", index=False) + + create_annotation_clips_zip( + final_annotation_chunk_paths_flatten, + meta_data_file_path, + temp_dir_base, + dst_file, + ) - create_annotation_clips_zip(final_annotation_chunk_paths_flatten, meta_data_file_path, temp_dir_base, dst_file) @silk_profile(name="GET task data") @transaction.atomic @@ -1315,6 +1496,7 @@ def get_task_data(pk): return annotation.data + @silk_profile(name="POST task data") @transaction.atomic def put_task_data(pk, data): @@ -1323,6 +1505,7 @@ def put_task_data(pk, data): return annotation.data + @silk_profile(name="UPDATE task data") @transaction.atomic def patch_task_data(pk, data, action): @@ -1336,12 +1519,14 @@ def patch_task_data(pk, data, action): return annotation.data + @silk_profile(name="DELETE task data") @transaction.atomic def delete_task_data(pk): annotation = TaskAnnotation(pk) annotation.delete() + def export_task(task_id, dst_file, format_name, server_url=None, save_images=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. @@ -1353,28 +1538,30 @@ def export_task(task_id, dst_file, format_name, server_url=None, save_images=Fal task.init_from_db() exporter = make_exporter(format_name) - with open(dst_file, 'wb') as f: + with open(dst_file, "wb") as f: task.export(f, exporter, host=server_url, save_images=save_images) + @transaction.atomic def import_task_annotations(src_file, task_id, format_name, conv_mask_to_poly): task = TaskAnnotation(task_id) task.init_from_db() importer = make_importer(format_name) - with open(src_file, 'rb') as f: + with open(src_file, "rb") as f: try: task.import_annotations(f, importer, conv_mask_to_poly=conv_mask_to_poly) except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex: raise CvatImportError(str(ex)) + @transaction.atomic def import_job_annotations(src_file, job_id, format_name, conv_mask_to_poly): job = JobAnnotation(job_id) job.init_from_db() importer = make_importer(format_name) - with open(src_file, 'rb') as f: + with open(src_file, "rb") as f: try: job.import_annotations(f, importer, conv_mask_to_poly=conv_mask_to_poly) except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex: From a4d99c472067311da1eed91416b2f98e9c24a739 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Sat, 28 Sep 2024 23:28:19 +0530 Subject: [PATCH 07/14] minor change --- .../dataset_manager/formats/common_voice.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py index bc525c91cd8e..1e6be783ded1 100644 --- a/cvat/apps/dataset_manager/formats/common_voice.py +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -39,22 +39,6 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = { - 0: "en-US", - 1: "es-ES", - 2: "fr-FR", - 3: "zh-CN", - 4: "hi-IN", - 5: "ar-EG", - 6: "pt-BR", - 7: "ja-JP", - 8: "de-DE", - 9: "ru-RU", - } - - # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language", -1)) - attributes = [] for i in range(1, len(headers)): @@ -89,7 +73,7 @@ def load_anno(file_object, annotations): "transcript": record.get("sentence", ""), "gender": record.get("gender", ""), "age": record.get("age", ""), - "locale": language_id_to_locale_mapping.get(language_id, ""), + "locale": record.get("locale", ""), "accent": record.get("accents", ""), "emotion": record.get("emotion", ""), "rotation": 0.0, From ea8255b6426de59906f54dab997c44166aa38639 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Sun, 29 Sep 2024 15:40:47 +0530 Subject: [PATCH 08/14] Task import feature for project --- cvat/apps/dataset_manager/formats/LibriVox.py | 292 ++++++++++++++++-- .../dataset_manager/formats/VCTK_Corpus.py | 292 ++++++++++++++++-- cvat/apps/dataset_manager/formats/VoxCeleb.py | 292 ++++++++++++++++-- .../apps/dataset_manager/formats/Voxpopuli.py | 292 ++++++++++++++++-- .../dataset_manager/formats/common_voice.py | 237 +++++++++++++- .../dataset_manager/formats/librispeech.py | 262 +++++++++++++++- cvat/apps/dataset_manager/formats/tedlium.py | 292 ++++++++++++++++-- 7 files changed, 1850 insertions(+), 109 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/LibriVox.py b/cvat/apps/dataset_manager/formats/LibriVox.py index 55680859bab8..a8a5924c58ed 100644 --- a/cvat/apps/dataset_manager/formats/LibriVox.py +++ b/cvat/apps/dataset_manager/formats/LibriVox.py @@ -1,12 +1,62 @@ +import os import os.path as osp import zipfile +import csv +from django.db import transaction from glob import glob +from pydub import AudioSegment from cvat.apps.dataset_manager.bindings import InstanceLabelData from cvat.apps.engine.serializers import LabeledDataSerializer import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer -from cvat.apps.engine.models import Task, Job +from cvat.apps.engine.models import Job, Task, Data +from cvat.apps.engine.task import _create_thread +from cvat.apps.dataset_manager.bindings import ProjectData + + +def calculate_duration(row): + start_time = float(row["start"]) # Assuming start and end times are in seconds + end_time = float(row["end"]) + return end_time - start_time + + +def split_rows_by_time(all_rows, time_threshold=600): + result = [] + + total_duration = 0 + + for row in all_rows: + start_time = float(row["start"]) + end_time = float(row["end"]) + duration = end_time - start_time + + total_duration += duration + + if total_duration > time_threshold: + # split logic here + total_duration_till_previous_row = total_duration - duration + remaining_time = time_threshold - total_duration_till_previous_row + + first_part = row.copy() + first_part["end"] = str(float(first_part["start"]) + remaining_time) + + second_part = row.copy() + second_part["start"] = first_part["end"] + + result.append(first_part) + result.append(second_part) + + second_part_duration = float(second_part["end"]) - float( + second_part["start"] + ) + + total_duration = second_part_duration + + else: + result.append(row) + + return result def load_anno(file_object, annotations): @@ -39,22 +89,6 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = { - 0: "en-US", - 1: "es-ES", - 2: "fr-FR", - 3: "zh-CN", - 4: "hi-IN", - 5: "ar-EG", - 6: "pt-BR", - 7: "ja-JP", - 8: "de-DE", - 9: "ru-RU", - } - - # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language", -1)) - attributes = [] for i in range(1, len(headers)): @@ -76,6 +110,24 @@ def load_anno(file_object, annotations): } ) + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) if record.get("language") else -1 + ) + shapes_data = [ { "type": "rectangle", @@ -112,9 +164,209 @@ def load_anno(file_object, annotations): def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) + file_name = os.path.basename(src_file.name) + name_without_extension = os.path.splitext(file_name)[0] + if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) - for p in anno_paths: - load_anno(p, instance_data) + if isinstance(instance_data, ProjectData): + project = instance_data.db_project + new_task = Task.objects.create( + project=project, + name=name_without_extension, + segment_size=0, + ) + new_task.save() + + with transaction.atomic(): + locked_instance = Task.objects.select_for_update().get(pk=new_task.id) + task_data = locked_instance.data + if not task_data: + task_data = Data.objects.create() + task_data.make_dirs() + locked_instance.data = task_data + locked_instance.save() + + clips_folder = os.path.join(temp_dir, "clips") + tsv_file_path = os.path.join(temp_dir, "data.tsv") + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + tsv_rows = list(reader) + + num_tsv_rows = len(tsv_rows) + num_clips = len(os.listdir(clips_folder)) + + if num_tsv_rows != num_clips: + raise ValueError( + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + ) + + # Combined audio that will be the final output + combined_audio = AudioSegment.empty() + + # Read TSV file to get the ordered list of audio files + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + + for row in reader: + audio_file_name = row[ + "file" + ] # Assuming 'file' column contains audio file names + file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(file_path): + audio_segment = AudioSegment.from_file(file_path) + combined_audio += ( + audio_segment # Append the audio in the order from TSV + ) + + # Create raw folder to store combined audio + raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") + os.makedirs(raw_folder_path, exist_ok=True) + + combined_audio_path = os.path.join(raw_folder_path, "combined_audio.wav") + combined_audio.export(combined_audio_path, format="wav") + + data = { + "chunk_size": None, + "image_quality": 70, + "start_frame": 0, + "stop_frame": None, + "frame_filter": "", + "client_files": ["combined_audio.wav"], + "server_files": [], + "remote_files": [], + "use_zip_chunks": False, + "server_files_exclude": [], + "use_cache": False, + "copy_data": False, + "storage_method": "file_system", + "storage": "local", + "sorting_method": "lexicographical", + "filename_pattern": None, + } + + _create_thread( + locked_instance, data, is_task_import=True, temp_dir=temp_dir + ) + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + all_rows = list(reader) + + new_rows = split_rows_by_time(all_rows) + + jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") + + label_data = InstanceLabelData(instance_data.db_project) + + record_index = 0 + for job in jobs: + start_time = 0 + + while record_index < len(new_rows): + record = new_rows[record_index] + + record_duration = calculate_duration(record) + + end_time = start_time + record_duration + + label_name = record.get("label") + label_id = label_data._get_label_id(label_name) + + attributes = [] + + # Process dynamic attribute_i_name and attribute_i_value fields + attribute_index = 1 # Start with the first attribute + while True: + attribute_name_key = f"attribute_{attribute_index}_name" + attribute_value_key = f"attribute_{attribute_index}_value" + + # Check if the keys exist in the record + if ( + attribute_name_key in record + and attribute_value_key in record + ): + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + spec_id = label_data._get_attribute_id( + label_id, attribute_name + ) + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) + + attribute_index += 1 # Move to the next attribute index + else: + break # Exit the loop when no more attributes are found + + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) + if record.get("language") + else -1 + ) + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get( + language_id, "" + ), + "accent": record.get("accent", ""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + + data = {"shapes": shapes_data} + start_time = end_time + + serializer = LabeledDataSerializer(data=data) + pk = int(job.id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + record_index += 1 + total_duration = round(end_time, 2) + if 599.9 <= total_duration <= 600: + break + + else: + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py index 5057e16e9d6f..87e68cdec291 100644 --- a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py +++ b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py @@ -1,12 +1,62 @@ +import os import os.path as osp import zipfile +import csv +from django.db import transaction from glob import glob +from pydub import AudioSegment from cvat.apps.dataset_manager.bindings import InstanceLabelData from cvat.apps.engine.serializers import LabeledDataSerializer import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer -from cvat.apps.engine.models import Task, Job +from cvat.apps.engine.models import Job, Task, Data +from cvat.apps.engine.task import _create_thread +from cvat.apps.dataset_manager.bindings import ProjectData + + +def calculate_duration(row): + start_time = float(row["start"]) # Assuming start and end times are in seconds + end_time = float(row["end"]) + return end_time - start_time + + +def split_rows_by_time(all_rows, time_threshold=600): + result = [] + + total_duration = 0 + + for row in all_rows: + start_time = float(row["start"]) + end_time = float(row["end"]) + duration = end_time - start_time + + total_duration += duration + + if total_duration > time_threshold: + # split logic here + total_duration_till_previous_row = total_duration - duration + remaining_time = time_threshold - total_duration_till_previous_row + + first_part = row.copy() + first_part["end"] = str(float(first_part["start"]) + remaining_time) + + second_part = row.copy() + second_part["start"] = first_part["end"] + + result.append(first_part) + result.append(second_part) + + second_part_duration = float(second_part["end"]) - float( + second_part["start"] + ) + + total_duration = second_part_duration + + else: + result.append(row) + + return result def load_anno(file_object, annotations): @@ -39,22 +89,6 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = { - 0: "en-US", - 1: "es-ES", - 2: "fr-FR", - 3: "zh-CN", - 4: "hi-IN", - 5: "ar-EG", - 6: "pt-BR", - 7: "ja-JP", - 8: "de-DE", - 9: "ru-RU", - } - - # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language", -1)) - attributes = [] for i in range(1, len(headers)): @@ -76,6 +110,24 @@ def load_anno(file_object, annotations): } ) + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) if record.get("language") else -1 + ) + shapes_data = [ { "type": "rectangle", @@ -112,9 +164,209 @@ def load_anno(file_object, annotations): def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) + file_name = os.path.basename(src_file.name) + name_without_extension = os.path.splitext(file_name)[0] + if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) - for p in anno_paths: - load_anno(p, instance_data) + if isinstance(instance_data, ProjectData): + project = instance_data.db_project + new_task = Task.objects.create( + project=project, + name=name_without_extension, + segment_size=0, + ) + new_task.save() + + with transaction.atomic(): + locked_instance = Task.objects.select_for_update().get(pk=new_task.id) + task_data = locked_instance.data + if not task_data: + task_data = Data.objects.create() + task_data.make_dirs() + locked_instance.data = task_data + locked_instance.save() + + clips_folder = os.path.join(temp_dir, "clips") + tsv_file_path = os.path.join(temp_dir, "data.tsv") + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + tsv_rows = list(reader) + + num_tsv_rows = len(tsv_rows) + num_clips = len(os.listdir(clips_folder)) + + if num_tsv_rows != num_clips: + raise ValueError( + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + ) + + # Combined audio that will be the final output + combined_audio = AudioSegment.empty() + + # Read TSV file to get the ordered list of audio files + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + + for row in reader: + audio_file_name = row[ + "file" + ] # Assuming 'file' column contains audio file names + file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(file_path): + audio_segment = AudioSegment.from_file(file_path) + combined_audio += ( + audio_segment # Append the audio in the order from TSV + ) + + # Create raw folder to store combined audio + raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") + os.makedirs(raw_folder_path, exist_ok=True) + + combined_audio_path = os.path.join(raw_folder_path, "combined_audio.wav") + combined_audio.export(combined_audio_path, format="wav") + + data = { + "chunk_size": None, + "image_quality": 70, + "start_frame": 0, + "stop_frame": None, + "frame_filter": "", + "client_files": ["combined_audio.wav"], + "server_files": [], + "remote_files": [], + "use_zip_chunks": False, + "server_files_exclude": [], + "use_cache": False, + "copy_data": False, + "storage_method": "file_system", + "storage": "local", + "sorting_method": "lexicographical", + "filename_pattern": None, + } + + _create_thread( + locked_instance, data, is_task_import=True, temp_dir=temp_dir + ) + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + all_rows = list(reader) + + new_rows = split_rows_by_time(all_rows) + + jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") + + label_data = InstanceLabelData(instance_data.db_project) + + record_index = 0 + for job in jobs: + start_time = 0 + + while record_index < len(new_rows): + record = new_rows[record_index] + + record_duration = calculate_duration(record) + + end_time = start_time + record_duration + + label_name = record.get("label") + label_id = label_data._get_label_id(label_name) + + attributes = [] + + # Process dynamic attribute_i_name and attribute_i_value fields + attribute_index = 1 # Start with the first attribute + while True: + attribute_name_key = f"attribute_{attribute_index}_name" + attribute_value_key = f"attribute_{attribute_index}_value" + + # Check if the keys exist in the record + if ( + attribute_name_key in record + and attribute_value_key in record + ): + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + spec_id = label_data._get_attribute_id( + label_id, attribute_name + ) + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) + + attribute_index += 1 # Move to the next attribute index + else: + break # Exit the loop when no more attributes are found + + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) + if record.get("language") + else -1 + ) + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get( + language_id, "" + ), + "accent": record.get("accent", ""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + + data = {"shapes": shapes_data} + start_time = end_time + + serializer = LabeledDataSerializer(data=data) + pk = int(job.id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + record_index += 1 + total_duration = round(end_time, 2) + if 599.9 <= total_duration <= 600: + break + + else: + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/VoxCeleb.py b/cvat/apps/dataset_manager/formats/VoxCeleb.py index 139b7473a30d..ab527a5aa3d5 100644 --- a/cvat/apps/dataset_manager/formats/VoxCeleb.py +++ b/cvat/apps/dataset_manager/formats/VoxCeleb.py @@ -1,12 +1,62 @@ +import os import os.path as osp import zipfile +import csv +from django.db import transaction from glob import glob +from pydub import AudioSegment from cvat.apps.dataset_manager.bindings import InstanceLabelData from cvat.apps.engine.serializers import LabeledDataSerializer import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer -from cvat.apps.engine.models import Task, Job +from cvat.apps.engine.models import Job, Task, Data +from cvat.apps.engine.task import _create_thread +from cvat.apps.dataset_manager.bindings import ProjectData + + +def calculate_duration(row): + start_time = float(row["start"]) # Assuming start and end times are in seconds + end_time = float(row["end"]) + return end_time - start_time + + +def split_rows_by_time(all_rows, time_threshold=600): + result = [] + + total_duration = 0 + + for row in all_rows: + start_time = float(row["start"]) + end_time = float(row["end"]) + duration = end_time - start_time + + total_duration += duration + + if total_duration > time_threshold: + # split logic here + total_duration_till_previous_row = total_duration - duration + remaining_time = time_threshold - total_duration_till_previous_row + + first_part = row.copy() + first_part["end"] = str(float(first_part["start"]) + remaining_time) + + second_part = row.copy() + second_part["start"] = first_part["end"] + + result.append(first_part) + result.append(second_part) + + second_part_duration = float(second_part["end"]) - float( + second_part["start"] + ) + + total_duration = second_part_duration + + else: + result.append(row) + + return result def load_anno(file_object, annotations): @@ -39,22 +89,6 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = { - 0: "en-US", - 1: "es-ES", - 2: "fr-FR", - 3: "zh-CN", - 4: "hi-IN", - 5: "ar-EG", - 6: "pt-BR", - 7: "ja-JP", - 8: "de-DE", - 9: "ru-RU", - } - - # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language", -1)) - attributes = [] for i in range(1, len(headers)): @@ -76,6 +110,24 @@ def load_anno(file_object, annotations): } ) + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) if record.get("language") else -1 + ) + shapes_data = [ { "type": "rectangle", @@ -112,9 +164,209 @@ def load_anno(file_object, annotations): def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) + file_name = os.path.basename(src_file.name) + name_without_extension = os.path.splitext(file_name)[0] + if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) - for p in anno_paths: - load_anno(p, instance_data) + if isinstance(instance_data, ProjectData): + project = instance_data.db_project + new_task = Task.objects.create( + project=project, + name=name_without_extension, + segment_size=0, + ) + new_task.save() + + with transaction.atomic(): + locked_instance = Task.objects.select_for_update().get(pk=new_task.id) + task_data = locked_instance.data + if not task_data: + task_data = Data.objects.create() + task_data.make_dirs() + locked_instance.data = task_data + locked_instance.save() + + clips_folder = os.path.join(temp_dir, "clips") + tsv_file_path = os.path.join(temp_dir, "data.tsv") + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + tsv_rows = list(reader) + + num_tsv_rows = len(tsv_rows) + num_clips = len(os.listdir(clips_folder)) + + if num_tsv_rows != num_clips: + raise ValueError( + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + ) + + # Combined audio that will be the final output + combined_audio = AudioSegment.empty() + + # Read TSV file to get the ordered list of audio files + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + + for row in reader: + audio_file_name = row[ + "file" + ] # Assuming 'file' column contains audio file names + file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(file_path): + audio_segment = AudioSegment.from_file(file_path) + combined_audio += ( + audio_segment # Append the audio in the order from TSV + ) + + # Create raw folder to store combined audio + raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") + os.makedirs(raw_folder_path, exist_ok=True) + + combined_audio_path = os.path.join(raw_folder_path, "combined_audio.wav") + combined_audio.export(combined_audio_path, format="wav") + + data = { + "chunk_size": None, + "image_quality": 70, + "start_frame": 0, + "stop_frame": None, + "frame_filter": "", + "client_files": ["combined_audio.wav"], + "server_files": [], + "remote_files": [], + "use_zip_chunks": False, + "server_files_exclude": [], + "use_cache": False, + "copy_data": False, + "storage_method": "file_system", + "storage": "local", + "sorting_method": "lexicographical", + "filename_pattern": None, + } + + _create_thread( + locked_instance, data, is_task_import=True, temp_dir=temp_dir + ) + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + all_rows = list(reader) + + new_rows = split_rows_by_time(all_rows) + + jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") + + label_data = InstanceLabelData(instance_data.db_project) + + record_index = 0 + for job in jobs: + start_time = 0 + + while record_index < len(new_rows): + record = new_rows[record_index] + + record_duration = calculate_duration(record) + + end_time = start_time + record_duration + + label_name = record.get("label") + label_id = label_data._get_label_id(label_name) + + attributes = [] + + # Process dynamic attribute_i_name and attribute_i_value fields + attribute_index = 1 # Start with the first attribute + while True: + attribute_name_key = f"attribute_{attribute_index}_name" + attribute_value_key = f"attribute_{attribute_index}_value" + + # Check if the keys exist in the record + if ( + attribute_name_key in record + and attribute_value_key in record + ): + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + spec_id = label_data._get_attribute_id( + label_id, attribute_name + ) + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) + + attribute_index += 1 # Move to the next attribute index + else: + break # Exit the loop when no more attributes are found + + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) + if record.get("language") + else -1 + ) + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get( + language_id, "" + ), + "accent": record.get("accent", ""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + + data = {"shapes": shapes_data} + start_time = end_time + + serializer = LabeledDataSerializer(data=data) + pk = int(job.id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + record_index += 1 + total_duration = round(end_time, 2) + if 599.9 <= total_duration <= 600: + break + + else: + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/Voxpopuli.py b/cvat/apps/dataset_manager/formats/Voxpopuli.py index 1f6905f6e9e0..17c179ceb75b 100644 --- a/cvat/apps/dataset_manager/formats/Voxpopuli.py +++ b/cvat/apps/dataset_manager/formats/Voxpopuli.py @@ -1,12 +1,62 @@ +import os import os.path as osp import zipfile +import csv +from django.db import transaction from glob import glob +from pydub import AudioSegment from cvat.apps.dataset_manager.bindings import InstanceLabelData from cvat.apps.engine.serializers import LabeledDataSerializer import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer -from cvat.apps.engine.models import Task, Job +from cvat.apps.engine.models import Job, Task, Data +from cvat.apps.engine.task import _create_thread +from cvat.apps.dataset_manager.bindings import ProjectData + + +def calculate_duration(row): + start_time = float(row["start"]) # Assuming start and end times are in seconds + end_time = float(row["end"]) + return end_time - start_time + + +def split_rows_by_time(all_rows, time_threshold=600): + result = [] + + total_duration = 0 + + for row in all_rows: + start_time = float(row["start"]) + end_time = float(row["end"]) + duration = end_time - start_time + + total_duration += duration + + if total_duration > time_threshold: + # split logic here + total_duration_till_previous_row = total_duration - duration + remaining_time = time_threshold - total_duration_till_previous_row + + first_part = row.copy() + first_part["end"] = str(float(first_part["start"]) + remaining_time) + + second_part = row.copy() + second_part["start"] = first_part["end"] + + result.append(first_part) + result.append(second_part) + + second_part_duration = float(second_part["end"]) - float( + second_part["start"] + ) + + total_duration = second_part_duration + + else: + result.append(row) + + return result def load_anno(file_object, annotations): @@ -39,22 +89,6 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = { - 0: "en-US", - 1: "es-ES", - 2: "fr-FR", - 3: "zh-CN", - 4: "hi-IN", - 5: "ar-EG", - 6: "pt-BR", - 7: "ja-JP", - 8: "de-DE", - 9: "ru-RU", - } - - # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language", -1)) - attributes = [] for i in range(1, len(headers)): @@ -76,6 +110,24 @@ def load_anno(file_object, annotations): } ) + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) if record.get("language") else -1 + ) + shapes_data = [ { "type": "rectangle", @@ -112,9 +164,209 @@ def load_anno(file_object, annotations): def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) + file_name = os.path.basename(src_file.name) + name_without_extension = os.path.splitext(file_name)[0] + if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) - for p in anno_paths: - load_anno(p, instance_data) + if isinstance(instance_data, ProjectData): + project = instance_data.db_project + new_task = Task.objects.create( + project=project, + name=name_without_extension, + segment_size=0, + ) + new_task.save() + + with transaction.atomic(): + locked_instance = Task.objects.select_for_update().get(pk=new_task.id) + task_data = locked_instance.data + if not task_data: + task_data = Data.objects.create() + task_data.make_dirs() + locked_instance.data = task_data + locked_instance.save() + + clips_folder = os.path.join(temp_dir, "clips") + tsv_file_path = os.path.join(temp_dir, "data.tsv") + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + tsv_rows = list(reader) + + num_tsv_rows = len(tsv_rows) + num_clips = len(os.listdir(clips_folder)) + + if num_tsv_rows != num_clips: + raise ValueError( + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + ) + + # Combined audio that will be the final output + combined_audio = AudioSegment.empty() + + # Read TSV file to get the ordered list of audio files + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + + for row in reader: + audio_file_name = row[ + "audio_path" + ] # Assuming 'file' column contains audio file names + file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(file_path): + audio_segment = AudioSegment.from_file(file_path) + combined_audio += ( + audio_segment # Append the audio in the order from TSV + ) + + # Create raw folder to store combined audio + raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") + os.makedirs(raw_folder_path, exist_ok=True) + + combined_audio_path = os.path.join(raw_folder_path, "combined_audio.wav") + combined_audio.export(combined_audio_path, format="wav") + + data = { + "chunk_size": None, + "image_quality": 70, + "start_frame": 0, + "stop_frame": None, + "frame_filter": "", + "client_files": ["combined_audio.wav"], + "server_files": [], + "remote_files": [], + "use_zip_chunks": False, + "server_files_exclude": [], + "use_cache": False, + "copy_data": False, + "storage_method": "file_system", + "storage": "local", + "sorting_method": "lexicographical", + "filename_pattern": None, + } + + _create_thread( + locked_instance, data, is_task_import=True, temp_dir=temp_dir + ) + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + all_rows = list(reader) + + new_rows = split_rows_by_time(all_rows) + + jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") + + label_data = InstanceLabelData(instance_data.db_project) + + record_index = 0 + for job in jobs: + start_time = 0 + + while record_index < len(new_rows): + record = new_rows[record_index] + + record_duration = calculate_duration(record) + + end_time = start_time + record_duration + + label_name = record.get("label") + label_id = label_data._get_label_id(label_name) + + attributes = [] + + # Process dynamic attribute_i_name and attribute_i_value fields + attribute_index = 1 # Start with the first attribute + while True: + attribute_name_key = f"attribute_{attribute_index}_name" + attribute_value_key = f"attribute_{attribute_index}_value" + + # Check if the keys exist in the record + if ( + attribute_name_key in record + and attribute_value_key in record + ): + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + spec_id = label_data._get_attribute_id( + label_id, attribute_name + ) + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) + + attribute_index += 1 # Move to the next attribute index + else: + break # Exit the loop when no more attributes are found + + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) + if record.get("language") + else -1 + ) + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("normalized_text", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get( + language_id, "" + ), + "accent": record.get("accent", ""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + + data = {"shapes": shapes_data} + start_time = end_time + + serializer = LabeledDataSerializer(data=data) + pk = int(job.id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + record_index += 1 + total_duration = round(end_time, 2) + if 599.9 <= total_duration <= 600: + break + + else: + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py index 1e6be783ded1..9593d3b007ab 100644 --- a/cvat/apps/dataset_manager/formats/common_voice.py +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -1,12 +1,62 @@ +import os import os.path as osp import zipfile +import csv +from django.db import transaction from glob import glob +from pydub import AudioSegment from cvat.apps.dataset_manager.bindings import InstanceLabelData from cvat.apps.engine.serializers import LabeledDataSerializer import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer -from cvat.apps.engine.models import Task, Job +from cvat.apps.engine.models import Job, Task, Data +from cvat.apps.engine.task import _create_thread +from cvat.apps.dataset_manager.bindings import ProjectData + + +def calculate_duration(row): + start_time = float(row["start"]) # Assuming start and end times are in seconds + end_time = float(row["end"]) + return end_time - start_time + + +def split_rows_by_time(all_rows, time_threshold=600): + result = [] + + total_duration = 0 + + for row in all_rows: + start_time = float(row["start"]) + end_time = float(row["end"]) + duration = end_time - start_time + + total_duration += duration + + if total_duration > time_threshold: + # split logic here + total_duration_till_previous_row = total_duration - duration + remaining_time = time_threshold - total_duration_till_previous_row + + first_part = row.copy() + first_part["end"] = str(float(first_part["start"]) + remaining_time) + + second_part = row.copy() + second_part["start"] = first_part["end"] + + result.append(first_part) + result.append(second_part) + + second_part_duration = float(second_part["end"]) - float( + second_part["start"] + ) + + total_duration = second_part_duration + + else: + result.append(row) + + return result def load_anno(file_object, annotations): @@ -96,9 +146,188 @@ def load_anno(file_object, annotations): def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) + file_name = os.path.basename(src_file.name) + name_without_extension = os.path.splitext(file_name)[0] if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) - for p in anno_paths: - load_anno(p, instance_data) + if isinstance(instance_data, ProjectData): + project = instance_data.db_project + new_task = Task.objects.create( + project=project, + name=name_without_extension, + segment_size=0, + ) + new_task.save() + + with transaction.atomic(): + locked_instance = Task.objects.select_for_update().get(pk=new_task.id) + task_data = locked_instance.data + if not task_data: + task_data = Data.objects.create() + task_data.make_dirs() + locked_instance.data = task_data + locked_instance.save() + + clips_folder = os.path.join(temp_dir, "clips") + tsv_file_path = os.path.join(temp_dir, "data.tsv") + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + tsv_rows = list(reader) + + num_tsv_rows = len(tsv_rows) + num_clips = len(os.listdir(clips_folder)) + + if num_tsv_rows != num_clips: + raise ValueError( + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + ) + + # Combined audio that will be the final output + combined_audio = AudioSegment.empty() + + # Read TSV file to get the ordered list of audio files + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + + for row in reader: + audio_file_name = row[ + "path" + ] # Assuming 'file' column contains audio file names + file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(file_path): + audio_segment = AudioSegment.from_file(file_path) + combined_audio += ( + audio_segment # Append the audio in the order from TSV + ) + + # Create raw folder to store combined audio + raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") + os.makedirs(raw_folder_path, exist_ok=True) + + combined_audio_path = os.path.join(raw_folder_path, "combined_audio.wav") + combined_audio.export(combined_audio_path, format="wav") + + data = { + "chunk_size": None, + "image_quality": 70, + "start_frame": 0, + "stop_frame": None, + "frame_filter": "", + "client_files": ["combined_audio.wav"], + "server_files": [], + "remote_files": [], + "use_zip_chunks": False, + "server_files_exclude": [], + "use_cache": False, + "copy_data": False, + "storage_method": "file_system", + "storage": "local", + "sorting_method": "lexicographical", + "filename_pattern": None, + } + + _create_thread( + locked_instance, data, is_task_import=True, temp_dir=temp_dir + ) + + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + all_rows = list(reader) + + new_rows = split_rows_by_time(all_rows) + + jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") + + label_data = InstanceLabelData(instance_data.db_project) + + record_index = 0 + for job in jobs: + start_time = 0 + + while record_index < len(new_rows): + record = new_rows[record_index] + + record_duration = calculate_duration(record) + + end_time = start_time + record_duration + + label_name = record.get("label") + label_id = label_data._get_label_id(label_name) + + attributes = [] + + # Process dynamic attribute_i_name and attribute_i_value fields + attribute_index = 1 # Start with the first attribute + while True: + attribute_name_key = f"attribute_{attribute_index}_name" + attribute_value_key = f"attribute_{attribute_index}_value" + + # Check if the keys exist in the record + if ( + attribute_name_key in record + and attribute_value_key in record + ): + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + spec_id = label_data._get_attribute_id( + label_id, attribute_name + ) + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) + + attribute_index += 1 # Move to the next attribute index + else: + break # Exit the loop when no more attributes are found + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("sentence", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": record.get("locale", ""), + "accent": record.get("accents", ""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + + data = {"shapes": shapes_data} + start_time = end_time + + serializer = LabeledDataSerializer(data=data) + pk = int(job.id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + record_index += 1 + total_duration = round(end_time, 2) + if 599.9 <= total_duration <= 600: + break + + else: + + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/librispeech.py b/cvat/apps/dataset_manager/formats/librispeech.py index 4ec6e7543699..5d5018b9edf6 100644 --- a/cvat/apps/dataset_manager/formats/librispeech.py +++ b/cvat/apps/dataset_manager/formats/librispeech.py @@ -1,12 +1,62 @@ +import os import os.path as osp import zipfile +import csv +from django.db import transaction from glob import glob +from pydub import AudioSegment from cvat.apps.dataset_manager.bindings import InstanceLabelData from cvat.apps.engine.serializers import LabeledDataSerializer import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer -from cvat.apps.engine.models import Task, Job +from cvat.apps.engine.models import Job, Task, Data +from cvat.apps.engine.task import _create_thread +from cvat.apps.dataset_manager.bindings import ProjectData + + +def calculate_duration(row): + start_time = float(row["start"]) # Assuming start and end times are in seconds + end_time = float(row["end"]) + return end_time - start_time + + +def split_rows_by_time(all_rows, time_threshold=600): + result = [] + + total_duration = 0 + + for row in all_rows: + start_time = float(row["start"]) + end_time = float(row["end"]) + duration = end_time - start_time + + total_duration += duration + + if total_duration > time_threshold: + # split logic here + total_duration_till_previous_row = total_duration - duration + remaining_time = time_threshold - total_duration_till_previous_row + + first_part = row.copy() + first_part["end"] = str(float(first_part["start"]) + remaining_time) + + second_part = row.copy() + second_part["start"] = first_part["end"] + + result.append(first_part) + result.append(second_part) + + second_part_duration = float(second_part["end"]) - float( + second_part["start"] + ) + + total_duration = second_part_duration + + else: + result.append(row) + + return result def load_anno(file_object, annotations): @@ -74,7 +124,9 @@ def load_anno(file_object, annotations): } # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language", -1)) + language_id = ( + int(float(record.get("language", -1))) if record.get("language") else -1 + ) shapes_data = [ { @@ -112,9 +164,209 @@ def load_anno(file_object, annotations): def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) + file_name = os.path.basename(src_file.name) + name_without_extension = os.path.splitext(file_name)[0] + if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) - for p in anno_paths: - load_anno(p, instance_data) + if isinstance(instance_data, ProjectData): + project = instance_data.db_project + new_task = Task.objects.create( + project=project, + name=name_without_extension, + segment_size=0, + ) + new_task.save() + + with transaction.atomic(): + locked_instance = Task.objects.select_for_update().get(pk=new_task.id) + task_data = locked_instance.data + if not task_data: + task_data = Data.objects.create() + task_data.make_dirs() + locked_instance.data = task_data + locked_instance.save() + + clips_folder = os.path.join(temp_dir, "clips") + tsv_file_path = os.path.join(temp_dir, "data.tsv") + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + tsv_rows = list(reader) + + num_tsv_rows = len(tsv_rows) + num_clips = len(os.listdir(clips_folder)) + + if num_tsv_rows != num_clips: + raise ValueError( + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + ) + + # Combined audio that will be the final output + combined_audio = AudioSegment.empty() + + # Read TSV file to get the ordered list of audio files + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + + for row in reader: + audio_file_name = row[ + "file" + ] # Assuming 'file' column contains audio file names + file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(file_path): + audio_segment = AudioSegment.from_file(file_path) + combined_audio += ( + audio_segment # Append the audio in the order from TSV + ) + + # Create raw folder to store combined audio + raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") + os.makedirs(raw_folder_path, exist_ok=True) + + combined_audio_path = os.path.join(raw_folder_path, "combined_audio.wav") + combined_audio.export(combined_audio_path, format="wav") + + data = { + "chunk_size": None, + "image_quality": 70, + "start_frame": 0, + "stop_frame": None, + "frame_filter": "", + "client_files": ["combined_audio.wav"], + "server_files": [], + "remote_files": [], + "use_zip_chunks": False, + "server_files_exclude": [], + "use_cache": False, + "copy_data": False, + "storage_method": "file_system", + "storage": "local", + "sorting_method": "lexicographical", + "filename_pattern": None, + } + + _create_thread( + locked_instance, data, is_task_import=True, temp_dir=temp_dir + ) + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + all_rows = list(reader) + + new_rows = split_rows_by_time(all_rows) + + jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") + + label_data = InstanceLabelData(instance_data.db_project) + + record_index = 0 + for job in jobs: + start_time = 0 + + while record_index < len(new_rows): + record = new_rows[record_index] + + record_duration = calculate_duration(record) + + end_time = start_time + record_duration + + label_name = record.get("label") + label_id = label_data._get_label_id(label_name) + + attributes = [] + + # Process dynamic attribute_i_name and attribute_i_value fields + attribute_index = 1 # Start with the first attribute + while True: + attribute_name_key = f"attribute_{attribute_index}_name" + attribute_value_key = f"attribute_{attribute_index}_value" + + # Check if the keys exist in the record + if ( + attribute_name_key in record + and attribute_value_key in record + ): + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + spec_id = label_data._get_attribute_id( + label_id, attribute_name + ) + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) + + attribute_index += 1 # Move to the next attribute index + else: + break # Exit the loop when no more attributes are found + + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) + if record.get("language") + else -1 + ) + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get( + language_id, "" + ), + "accent": record.get("accent", ""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + + data = {"shapes": shapes_data} + start_time = end_time + + serializer = LabeledDataSerializer(data=data) + pk = int(job.id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + record_index += 1 + total_duration = round(end_time, 2) + if 599.9 <= total_duration <= 600: + break + + else: + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) diff --git a/cvat/apps/dataset_manager/formats/tedlium.py b/cvat/apps/dataset_manager/formats/tedlium.py index 3cb49283d674..6463713e1c1f 100644 --- a/cvat/apps/dataset_manager/formats/tedlium.py +++ b/cvat/apps/dataset_manager/formats/tedlium.py @@ -1,12 +1,62 @@ +import os import os.path as osp import zipfile +import csv +from django.db import transaction from glob import glob +from pydub import AudioSegment from cvat.apps.dataset_manager.bindings import InstanceLabelData from cvat.apps.engine.serializers import LabeledDataSerializer import cvat.apps.dataset_manager as dm from cvat.apps.dataset_manager.task import PatchAction from .registry import importer -from cvat.apps.engine.models import Task, Job +from cvat.apps.engine.models import Job, Task, Data +from cvat.apps.engine.task import _create_thread +from cvat.apps.dataset_manager.bindings import ProjectData + + +def calculate_duration(row): + start_time = float(row["start"]) # Assuming start and end times are in seconds + end_time = float(row["end"]) + return end_time - start_time + + +def split_rows_by_time(all_rows, time_threshold=600): + result = [] + + total_duration = 0 + + for row in all_rows: + start_time = float(row["start"]) + end_time = float(row["end"]) + duration = end_time - start_time + + total_duration += duration + + if total_duration > time_threshold: + # split logic here + total_duration_till_previous_row = total_duration - duration + remaining_time = time_threshold - total_duration_till_previous_row + + first_part = row.copy() + first_part["end"] = str(float(first_part["start"]) + remaining_time) + + second_part = row.copy() + second_part["start"] = first_part["end"] + + result.append(first_part) + result.append(second_part) + + second_part_duration = float(second_part["end"]) - float( + second_part["start"] + ) + + total_duration = second_part_duration + + else: + result.append(row) + + return result def load_anno(file_object, annotations): @@ -39,22 +89,6 @@ def load_anno(file_object, annotations): label_name = record.get("label") label_id = label_data._get_label_id(label_name) - language_id_to_locale_mapping = { - 0: "en-US", - 1: "es-ES", - 2: "fr-FR", - 3: "zh-CN", - 4: "hi-IN", - 5: "ar-EG", - 6: "pt-BR", - 7: "ja-JP", - 8: "de-DE", - 9: "ru-RU", - } - - # defaults to -1 if language field not in tsv, locale will be an empty string - language_id = int(record.get("language", -1)) - attributes = [] for i in range(1, len(headers)): @@ -76,6 +110,24 @@ def load_anno(file_object, annotations): } ) + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) if record.get("language") else -1 + ) + shapes_data = [ { "type": "rectangle", @@ -112,9 +164,209 @@ def load_anno(file_object, annotations): def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) + file_name = os.path.basename(src_file.name) + name_without_extension = os.path.splitext(file_name)[0] + if is_zip: zipfile.ZipFile(src_file).extractall(temp_dir) - anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) - for p in anno_paths: - load_anno(p, instance_data) + if isinstance(instance_data, ProjectData): + project = instance_data.db_project + new_task = Task.objects.create( + project=project, + name=name_without_extension, + segment_size=0, + ) + new_task.save() + + with transaction.atomic(): + locked_instance = Task.objects.select_for_update().get(pk=new_task.id) + task_data = locked_instance.data + if not task_data: + task_data = Data.objects.create() + task_data.make_dirs() + locked_instance.data = task_data + locked_instance.save() + + clips_folder = os.path.join(temp_dir, "clips") + tsv_file_path = os.path.join(temp_dir, "data.tsv") + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + tsv_rows = list(reader) + + num_tsv_rows = len(tsv_rows) + num_clips = len(os.listdir(clips_folder)) + + if num_tsv_rows != num_clips: + raise ValueError( + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + ) + + # Combined audio that will be the final output + combined_audio = AudioSegment.empty() + + # Read TSV file to get the ordered list of audio files + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + + for row in reader: + audio_file_name = row[ + "file" + ] # Assuming 'file' column contains audio file names + file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(file_path): + audio_segment = AudioSegment.from_file(file_path) + combined_audio += ( + audio_segment # Append the audio in the order from TSV + ) + + # Create raw folder to store combined audio + raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") + os.makedirs(raw_folder_path, exist_ok=True) + + combined_audio_path = os.path.join(raw_folder_path, "combined_audio.wav") + combined_audio.export(combined_audio_path, format="wav") + + data = { + "chunk_size": None, + "image_quality": 70, + "start_frame": 0, + "stop_frame": None, + "frame_filter": "", + "client_files": ["combined_audio.wav"], + "server_files": [], + "remote_files": [], + "use_zip_chunks": False, + "server_files_exclude": [], + "use_cache": False, + "copy_data": False, + "storage_method": "file_system", + "storage": "local", + "sorting_method": "lexicographical", + "filename_pattern": None, + } + + _create_thread( + locked_instance, data, is_task_import=True, temp_dir=temp_dir + ) + + with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: + reader = csv.DictReader(tsvfile, delimiter="\t") + all_rows = list(reader) + + new_rows = split_rows_by_time(all_rows) + + jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") + + label_data = InstanceLabelData(instance_data.db_project) + + record_index = 0 + for job in jobs: + start_time = 0 + + while record_index < len(new_rows): + record = new_rows[record_index] + + record_duration = calculate_duration(record) + + end_time = start_time + record_duration + + label_name = record.get("label") + label_id = label_data._get_label_id(label_name) + + attributes = [] + + # Process dynamic attribute_i_name and attribute_i_value fields + attribute_index = 1 # Start with the first attribute + while True: + attribute_name_key = f"attribute_{attribute_index}_name" + attribute_value_key = f"attribute_{attribute_index}_value" + + # Check if the keys exist in the record + if ( + attribute_name_key in record + and attribute_value_key in record + ): + attribute_name = record.get(attribute_name_key) + attribute_value = record.get(attribute_value_key) + + if attribute_name and attribute_value: + spec_id = label_data._get_attribute_id( + label_id, attribute_name + ) + attributes.append( + { + "spec_id": spec_id, + "value": attribute_value, + } + ) + + attribute_index += 1 # Move to the next attribute index + else: + break # Exit the loop when no more attributes are found + + language_id_to_locale_mapping = { + 0: "en-US", + 1: "es-ES", + 2: "fr-FR", + 3: "zh-CN", + 4: "hi-IN", + 5: "ar-EG", + 6: "pt-BR", + 7: "ja-JP", + 8: "de-DE", + 9: "ru-RU", + } + + # defaults to -1 if language field not in tsv, locale will be an empty string + language_id = ( + int(float(record.get("language", -1))) + if record.get("language") + else -1 + ) + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "transcript": record.get("text", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": language_id_to_locale_mapping.get( + language_id, "" + ), + "accent": record.get("accent", ""), + "emotion": record.get("emotion", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + + data = {"shapes": shapes_data} + start_time = end_time + + serializer = LabeledDataSerializer(data=data) + pk = int(job.id) + action = PatchAction.CREATE + + if serializer.is_valid(raise_exception=True): + data = dm.task.patch_job_data(pk, serializer.data, action) + + record_index += 1 + total_duration = round(end_time, 2) + if 599.9 <= total_duration <= 600: + break + + else: + anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) + for p in anno_paths: + load_anno(p, instance_data) From 916f1ed1ece302afbe6bbb019a2cae61e2cea425 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Sun, 29 Sep 2024 15:56:44 +0530 Subject: [PATCH 09/14] minor_fix --- cvat/apps/dataset_manager/formats/LibriVox.py | 2 +- cvat/apps/dataset_manager/formats/VCTK_Corpus.py | 2 +- cvat/apps/dataset_manager/formats/VoxCeleb.py | 2 +- cvat/apps/dataset_manager/formats/Voxpopuli.py | 2 +- cvat/apps/dataset_manager/formats/common_voice.py | 2 +- cvat/apps/dataset_manager/formats/librispeech.py | 2 +- cvat/apps/dataset_manager/formats/tedlium.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/LibriVox.py b/cvat/apps/dataset_manager/formats/LibriVox.py index a8a5924c58ed..8a43edd9bb27 100644 --- a/cvat/apps/dataset_manager/formats/LibriVox.py +++ b/cvat/apps/dataset_manager/formats/LibriVox.py @@ -249,7 +249,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs } _create_thread( - locked_instance, data, is_task_import=True, temp_dir=temp_dir + locked_instance, data ) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: diff --git a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py index 87e68cdec291..ac074b327d6f 100644 --- a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py +++ b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py @@ -249,7 +249,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs } _create_thread( - locked_instance, data, is_task_import=True, temp_dir=temp_dir + locked_instance, data ) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: diff --git a/cvat/apps/dataset_manager/formats/VoxCeleb.py b/cvat/apps/dataset_manager/formats/VoxCeleb.py index ab527a5aa3d5..1738a661c33e 100644 --- a/cvat/apps/dataset_manager/formats/VoxCeleb.py +++ b/cvat/apps/dataset_manager/formats/VoxCeleb.py @@ -249,7 +249,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs } _create_thread( - locked_instance, data, is_task_import=True, temp_dir=temp_dir + locked_instance, data ) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: diff --git a/cvat/apps/dataset_manager/formats/Voxpopuli.py b/cvat/apps/dataset_manager/formats/Voxpopuli.py index 17c179ceb75b..c5c50370e791 100644 --- a/cvat/apps/dataset_manager/formats/Voxpopuli.py +++ b/cvat/apps/dataset_manager/formats/Voxpopuli.py @@ -249,7 +249,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs } _create_thread( - locked_instance, data, is_task_import=True, temp_dir=temp_dir + locked_instance, data ) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py index 9593d3b007ab..0ce38e8894d4 100644 --- a/cvat/apps/dataset_manager/formats/common_voice.py +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -230,7 +230,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs } _create_thread( - locked_instance, data, is_task_import=True, temp_dir=temp_dir + locked_instance, data ) diff --git a/cvat/apps/dataset_manager/formats/librispeech.py b/cvat/apps/dataset_manager/formats/librispeech.py index 5d5018b9edf6..6fc7d0ffdd3a 100644 --- a/cvat/apps/dataset_manager/formats/librispeech.py +++ b/cvat/apps/dataset_manager/formats/librispeech.py @@ -249,7 +249,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs } _create_thread( - locked_instance, data, is_task_import=True, temp_dir=temp_dir + locked_instance, data ) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: diff --git a/cvat/apps/dataset_manager/formats/tedlium.py b/cvat/apps/dataset_manager/formats/tedlium.py index 6463713e1c1f..fd34779b5629 100644 --- a/cvat/apps/dataset_manager/formats/tedlium.py +++ b/cvat/apps/dataset_manager/formats/tedlium.py @@ -249,7 +249,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs } _create_thread( - locked_instance, data, is_task_import=True, temp_dir=temp_dir + locked_instance, data ) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: From 32f25ac9fb55c4504ba3e358924f4cf3abf74d7a Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Sun, 29 Sep 2024 15:58:32 +0530 Subject: [PATCH 10/14] formatting fix --- cvat/apps/dataset_manager/formats/LibriVox.py | 4 +--- cvat/apps/dataset_manager/formats/VCTK_Corpus.py | 4 +--- cvat/apps/dataset_manager/formats/VoxCeleb.py | 4 +--- cvat/apps/dataset_manager/formats/Voxpopuli.py | 4 +--- cvat/apps/dataset_manager/formats/common_voice.py | 5 +---- cvat/apps/dataset_manager/formats/librispeech.py | 4 +--- cvat/apps/dataset_manager/formats/tedlium.py | 4 +--- 7 files changed, 7 insertions(+), 22 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/LibriVox.py b/cvat/apps/dataset_manager/formats/LibriVox.py index 8a43edd9bb27..4967fba1c16f 100644 --- a/cvat/apps/dataset_manager/formats/LibriVox.py +++ b/cvat/apps/dataset_manager/formats/LibriVox.py @@ -248,9 +248,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs "filename_pattern": None, } - _create_thread( - locked_instance, data - ) + _create_thread(locked_instance, data) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: reader = csv.DictReader(tsvfile, delimiter="\t") diff --git a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py index ac074b327d6f..344dec795504 100644 --- a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py +++ b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py @@ -248,9 +248,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs "filename_pattern": None, } - _create_thread( - locked_instance, data - ) + _create_thread(locked_instance, data) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: reader = csv.DictReader(tsvfile, delimiter="\t") diff --git a/cvat/apps/dataset_manager/formats/VoxCeleb.py b/cvat/apps/dataset_manager/formats/VoxCeleb.py index 1738a661c33e..c557ea472bfa 100644 --- a/cvat/apps/dataset_manager/formats/VoxCeleb.py +++ b/cvat/apps/dataset_manager/formats/VoxCeleb.py @@ -248,9 +248,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs "filename_pattern": None, } - _create_thread( - locked_instance, data - ) + _create_thread(locked_instance, data) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: reader = csv.DictReader(tsvfile, delimiter="\t") diff --git a/cvat/apps/dataset_manager/formats/Voxpopuli.py b/cvat/apps/dataset_manager/formats/Voxpopuli.py index c5c50370e791..774ba4ed3a79 100644 --- a/cvat/apps/dataset_manager/formats/Voxpopuli.py +++ b/cvat/apps/dataset_manager/formats/Voxpopuli.py @@ -248,9 +248,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs "filename_pattern": None, } - _create_thread( - locked_instance, data - ) + _create_thread(locked_instance, data) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: reader = csv.DictReader(tsvfile, delimiter="\t") diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py index 0ce38e8894d4..0be3c228100c 100644 --- a/cvat/apps/dataset_manager/formats/common_voice.py +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -229,10 +229,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs "filename_pattern": None, } - _create_thread( - locked_instance, data - ) - + _create_thread(locked_instance, data) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: reader = csv.DictReader(tsvfile, delimiter="\t") diff --git a/cvat/apps/dataset_manager/formats/librispeech.py b/cvat/apps/dataset_manager/formats/librispeech.py index 6fc7d0ffdd3a..715b0eb1cf1c 100644 --- a/cvat/apps/dataset_manager/formats/librispeech.py +++ b/cvat/apps/dataset_manager/formats/librispeech.py @@ -248,9 +248,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs "filename_pattern": None, } - _create_thread( - locked_instance, data - ) + _create_thread(locked_instance, data) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: reader = csv.DictReader(tsvfile, delimiter="\t") diff --git a/cvat/apps/dataset_manager/formats/tedlium.py b/cvat/apps/dataset_manager/formats/tedlium.py index fd34779b5629..776434d0ec95 100644 --- a/cvat/apps/dataset_manager/formats/tedlium.py +++ b/cvat/apps/dataset_manager/formats/tedlium.py @@ -248,9 +248,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs "filename_pattern": None, } - _create_thread( - locked_instance, data - ) + _create_thread(locked_instance, data) with open(tsv_file_path, "r", newline="", encoding="utf-8") as tsvfile: reader = csv.DictReader(tsvfile, delimiter="\t") From 60a58735157157100a529d7dd009a3c8d7492cb5 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Tue, 1 Oct 2024 18:42:56 +0530 Subject: [PATCH 11/14] formatting_changes --- .../formats/aud_common_voice.py | 45 +- cvat/apps/dataset_manager/formats/camvid.py | 29 +- .../dataset_manager/formats/cityscapes.py | 43 +- cvat/apps/dataset_manager/formats/coco.py | 53 +- cvat/apps/dataset_manager/formats/cvat.py | 1743 +++++++---------- cvat/apps/dataset_manager/formats/icdar.py | 74 +- cvat/apps/dataset_manager/formats/imagenet.py | 21 +- cvat/apps/dataset_manager/formats/kitti.py | 37 +- cvat/apps/dataset_manager/formats/labelme.py | 24 +- cvat/apps/dataset_manager/formats/lfw.py | 18 +- .../dataset_manager/formats/market1501.py | 37 +- cvat/apps/dataset_manager/formats/mask.py | 34 +- cvat/apps/dataset_manager/formats/mot.py | 66 +- cvat/apps/dataset_manager/formats/mots.py | 69 +- .../dataset_manager/formats/openimages.py | 57 +- .../dataset_manager/formats/pascal_voc.py | 42 +- .../dataset_manager/formats/pointcloud.py | 42 +- cvat/apps/dataset_manager/formats/registry.py | 91 +- .../formats/transformations.py | 58 +- cvat/apps/dataset_manager/formats/utils.py | 64 +- .../dataset_manager/formats/velodynepoint.py | 37 +- cvat/apps/dataset_manager/formats/vggface2.py | 23 +- .../apps/dataset_manager/formats/widerface.py | 20 +- cvat/apps/dataset_manager/formats/yolo.py | 39 +- 24 files changed, 1056 insertions(+), 1710 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/aud_common_voice.py b/cvat/apps/dataset_manager/formats/aud_common_voice.py index 417d90965788..bd1457d819e0 100644 --- a/cvat/apps/dataset_manager/formats/aud_common_voice.py +++ b/cvat/apps/dataset_manager/formats/aud_common_voice.py @@ -4,17 +4,16 @@ # SPDX-License-Identifier: MIT + + + import os.path as osp from glob import glob from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - import_dm_annotations, - match_dm_item, - find_dataset_root, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, + import_dm_annotations, match_dm_item, find_dataset_root) from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.extractor import DatasetItem from datumaro.components.project import Dataset @@ -22,48 +21,42 @@ from .registry import dm_env, exporter, importer from cvat.apps.engine.log import ServerLogManager - slogger = ServerLogManager(__name__) -@exporter(name="YOLO", ext="ZIP", version="1.1") +@exporter(name='YOLO', ext='ZIP', version='1.1') def _export(dst_file, temp_dir, instance_data, save_images=False): slogger.glob.info("HEYLO YOLO EXPORTER AUDINO") # slogger.glob.debug() - dataset = Dataset.from_extractors( - GetCVATDataExtractor(instance_data, include_images=save_images), env=dm_env - ) + dataset = Dataset.from_extractors(GetCVATDataExtractor( + instance_data, include_images=save_images), env=dm_env) - dataset.export(temp_dir, "yolo", save_images=save_images) + dataset.export(temp_dir, 'yolo', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="YOLO", ext="ZIP", version="1.1") +@importer(name='YOLO', ext='ZIP', version='1.1') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) image_info = {} - frames = [ - YoloExtractor.name_from_path(osp.relpath(p, temp_dir)) - for p in glob(osp.join(temp_dir, "**", "*.txt"), recursive=True) - ] + frames = [YoloExtractor.name_from_path(osp.relpath(p, temp_dir)) + for p in glob(osp.join(temp_dir, '**', '*.txt'), recursive=True)] root_hint = find_dataset_root( - [DatasetItem(id=frame) for frame in frames], instance_data - ) + [DatasetItem(id=frame) for frame in frames], instance_data) for frame in frames: frame_info = None try: - frame_id = match_dm_item( - DatasetItem(id=frame), instance_data, root_hint=root_hint - ) + frame_id = match_dm_item(DatasetItem(id=frame), instance_data, + root_hint=root_hint) frame_info = instance_data.frame_info[frame_id] - except Exception: # nosec + except Exception: # nosec pass if frame_info is not None: - image_info[frame] = (frame_info["height"], frame_info["width"]) + image_info[frame] = (frame_info['height'], frame_info['width']) - dataset = Dataset.import_from(temp_dir, "yolo", env=dm_env, image_info=image_info) + dataset = Dataset.import_from(temp_dir, 'yolo', + env=dm_env, image_info=image_info) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/camvid.py b/cvat/apps/dataset_manager/formats/camvid.py index dcbc72c94189..75cea9e98bd4 100644 --- a/cvat/apps/dataset_manager/formats/camvid.py +++ b/cvat/apps/dataset_manager/formats/camvid.py @@ -6,10 +6,8 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -17,35 +15,30 @@ from .utils import make_colormap -@exporter(name="CamVid", ext="ZIP", version="1.0") +@exporter(name='CamVid', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform("polygons_to_masks") - dataset.transform("boxes_to_masks") - dataset.transform("merge_instance_segments") + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') label_map = make_colormap(instance_data) - dataset.export( - temp_dir, - "camvid", - save_images=save_images, - apply_colormap=True, - label_map={label: label_map[label][0] for label in label_map}, - ) + dataset.export(temp_dir, 'camvid', + save_images=save_images, apply_colormap=True, + label_map={label: label_map[label][0] for label in label_map}) make_zip_archive(temp_dir, dst_file) - -@importer(name="CamVid", ext="ZIP", version="1.0") +@importer(name='CamVid', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) # We do not run detect_dataset before import because the Camvid format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - dataset = Dataset.import_from(temp_dir, "camvid", env=dm_env) + dataset = Dataset.import_from(temp_dir, 'camvid', env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/cityscapes.py b/cvat/apps/dataset_manager/formats/cityscapes.py index b82accdca22c..ea39578ea3f3 100644 --- a/cvat/apps/dataset_manager/formats/cityscapes.py +++ b/cvat/apps/dataset_manager/formats/cityscapes.py @@ -9,11 +9,8 @@ from datumaro.plugins.cityscapes_format import write_label_map from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -21,43 +18,33 @@ from .utils import make_colormap -@exporter(name="Cityscapes", ext="ZIP", version="1.0") +@exporter(name='Cityscapes', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform("polygons_to_masks") - dataset.transform("boxes_to_masks") - dataset.transform("merge_instance_segments") + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') - dataset.export( - temp_dir, - "cityscapes", - save_images=save_images, - apply_colormap=True, - label_map={ - label: info[0] for label, info in make_colormap(instance_data).items() - }, - ) + dataset.export(temp_dir, 'cityscapes', save_images=save_images, + apply_colormap=True, label_map={label: info[0] + for label, info in make_colormap(instance_data).items()}) make_zip_archive(temp_dir, dst_file) - -@importer(name="Cityscapes", ext="ZIP", version="1.0") +@importer(name='Cityscapes', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - labelmap_file = osp.join(temp_dir, "label_colors.txt") + labelmap_file = osp.join(temp_dir, 'label_colors.txt') if not osp.isfile(labelmap_file): - colormap = { - label: info[0] for label, info in make_colormap(instance_data).items() - } + colormap = {label: info[0] + for label, info in make_colormap(instance_data).items()} write_label_map(labelmap_file, colormap) - detect_dataset( - temp_dir, format_name="cityscapes", importer=dm_env.importers.get("cityscapes") - ) - dataset = Dataset.import_from(temp_dir, "cityscapes", env=dm_env) + detect_dataset(temp_dir, format_name='cityscapes', importer= dm_env.importers.get('cityscapes')) + dataset = Dataset.import_from(temp_dir, 'cityscapes', env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/coco.py b/cvat/apps/dataset_manager/formats/coco.py index 4943b0c1a3e0..556feea104e1 100644 --- a/cvat/apps/dataset_manager/formats/coco.py +++ b/cvat/apps/dataset_manager/formats/coco.py @@ -9,77 +9,64 @@ from datumaro.components.annotation import AnnotationType from datumaro.plugins.coco_format.importer import CocoImporter -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, detect_dataset, \ + import_dm_annotations from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer - -@exporter(name="COCO", ext="ZIP", version="1.0") +@exporter(name='COCO', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export( - temp_dir, "coco_instances", save_images=save_images, merge_images=True - ) + dataset.export(temp_dir, 'coco_instances', save_images=save_images, + merge_images=True) make_zip_archive(temp_dir, dst_file) - -@importer(name="COCO", ext="JSON, ZIP", version="1.0") +@importer(name='COCO', ext='JSON, ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): if zipfile.is_zipfile(src_file): zipfile.ZipFile(src_file).extractall(temp_dir) # We use coco importer because it gives better error message - detect_dataset(temp_dir, format_name="coco", importer=CocoImporter) - dataset = Dataset.import_from(temp_dir, "coco_instances", env=dm_env) + detect_dataset(temp_dir, format_name='coco', importer=CocoImporter) + dataset = Dataset.import_from(temp_dir, 'coco_instances', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) else: - dataset = Dataset.import_from(src_file.name, "coco_instances", env=dm_env) + dataset = Dataset.import_from(src_file.name, + 'coco_instances', env=dm_env) import_dm_annotations(dataset, instance_data) - -@exporter(name="COCO Keypoints", ext="ZIP", version="1.0") +@exporter(name='COCO Keypoints', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export( - temp_dir, - "coco_person_keypoints", - save_images=save_images, - merge_images=True, - ) + dataset.export(temp_dir, 'coco_person_keypoints', save_images=save_images, + merge_images=True) make_zip_archive(temp_dir, dst_file) - -@importer(name="COCO Keypoints", ext="JSON, ZIP", version="1.0") +@importer(name='COCO Keypoints', ext='JSON, ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): def remove_extra_annotations(dataset): for item in dataset: - annotations = [ - ann for ann in item.annotations if ann.type != AnnotationType.bbox - ] + annotations = [ann for ann in item.annotations + if ann.type != AnnotationType.bbox] item.annotations = annotations if zipfile.is_zipfile(src_file): zipfile.ZipFile(src_file).extractall(temp_dir) # We use coco importer because it gives better error message - detect_dataset(temp_dir, format_name="coco", importer=CocoImporter) - dataset = Dataset.import_from(temp_dir, "coco_person_keypoints", env=dm_env) + detect_dataset(temp_dir, format_name='coco', importer=CocoImporter) + dataset = Dataset.import_from(temp_dir, 'coco_person_keypoints', env=dm_env) remove_extra_annotations(dataset) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) else: - dataset = Dataset.import_from( - src_file.name, "coco_person_keypoints", env=dm_env - ) + dataset = Dataset.import_from(src_file.name, + 'coco_person_keypoints', env=dm_env) remove_extra_annotations(dataset) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 6f6e8b98c788..99293fe470d4 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -11,31 +11,21 @@ from io import BufferedWriter from typing import Callable -from datumaro.components.annotation import ( - AnnotationType, - Bbox, - Label, - LabelCategories, - Points, - Polygon, - PolyLine, - Skeleton, -) +from datumaro.components.annotation import (AnnotationType, Bbox, Label, + LabelCategories, Points, Polygon, + PolyLine, Skeleton) from datumaro.components.dataset import Dataset, DatasetItem -from datumaro.components.extractor import DEFAULT_SUBSET_NAME, Extractor, Importer +from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, Extractor, + Importer) from datumaro.plugins.cvat_format.extractor import CvatImporter as _CvatImporter from datumaro.util.image import Image from defusedxml import ElementTree -from cvat.apps.dataset_manager.bindings import ( - ProjectData, - CommonData, - detect_dataset, - get_defaulted_subset, - import_dm_annotations, - match_dm_item, -) +from cvat.apps.dataset_manager.bindings import (ProjectData, CommonData, detect_dataset, + get_defaulted_subset, + import_dm_annotations, + match_dm_item) from cvat.apps.dataset_manager.util import make_zip_archive from cvat.apps.engine.frame_provider import FrameProvider @@ -43,20 +33,19 @@ class CvatPath: - IMAGES_DIR = "images" + IMAGES_DIR = 'images' - MEDIA_EXTS = (".jpg", ".jpeg", ".png") - - BUILTIN_ATTRS = {"occluded", "outside", "keyframe", "track_id"} + MEDIA_EXTS = ('.jpg', '.jpeg', '.png') + BUILTIN_ATTRS = {'occluded', 'outside', 'keyframe', 'track_id'} class CvatExtractor(Extractor): - _SUPPORTED_SHAPES = ("box", "polygon", "polyline", "points", "skeleton") + _SUPPORTED_SHAPES = ('box', 'polygon', 'polyline', 'points', 'skeleton') def __init__(self, path, subsets=None): assert osp.isfile(path), path rootpath = osp.dirname(path) - images_dir = "" + images_dir = '' if osp.isdir(osp.join(rootpath, CvatPath.IMAGES_DIR)): images_dir = osp.join(rootpath, CvatPath.IMAGES_DIR) self._images_dir = images_dir @@ -82,9 +71,7 @@ def __len__(self): return len(self._items) def get(self, _id, subset=DEFAULT_SUBSET_NAME): - assert subset in self._subsets, "{} not in {}".format( - subset, ", ".join(self._subsets) - ) + assert subset in self._subsets, '{} not in {}'.format(subset, ', '.join(self._subsets)) return super().get(_id, subset) @staticmethod @@ -93,13 +80,13 @@ def _get_subsets_from_anno(path): context = iter(context) for ev, el in context: - if ev == "start": - if el.tag == "subsets": + if ev == 'start': + if el.tag == 'subsets': if el.text is not None: - subsets = el.text.split("\n") + subsets = el.text.split('\n') return subsets - if ev == "end": - if el.tag == "meta": + if ev == 'end': + if el.tag == 'meta': return [DEFAULT_SUBSET_NAME] el.clear() return [DEFAULT_SUBSET_NAME] @@ -112,20 +99,15 @@ def parse_image_dir(image_dir, subset): for file in sorted(glob(image_dir), key=osp.basename): name, ext = osp.splitext(osp.basename(file)) if ext.lower() in CvatPath.MEDIA_EXTS: - items[(subset, name)] = DatasetItem( - id=name, - annotations=[], - image=Image(path=file), - subset=subset or DEFAULT_SUBSET_NAME, + items[(subset, name)] = DatasetItem(id=name, annotations=[], + image=Image(path=file), subset=subset or DEFAULT_SUBSET_NAME, ) - if subsets == [DEFAULT_SUBSET_NAME] and not osp.isdir( - osp.join(image_dir, DEFAULT_SUBSET_NAME) - ): - parse_image_dir(osp.join(image_dir, "*.*"), None) + if subsets == [DEFAULT_SUBSET_NAME] and not osp.isdir(osp.join(image_dir, DEFAULT_SUBSET_NAME)): + parse_image_dir(osp.join(image_dir, '*.*'), None) else: for subset in subsets: - parse_image_dir(osp.join(image_dir, subset, "*.*"), subset) + parse_image_dir(osp.join(image_dir, subset, '*.*'), subset) return items @classmethod @@ -148,36 +130,33 @@ def _parse(cls, path): image = None subset = None for ev, el in context: - if ev == "start": - if el.tag == "track": - frame_size = ( - tasks_info[int(el.attrib.get("task_id"))]["frame_size"] - if el.attrib.get("task_id") - else tuple(tasks_info.values())[0]["frame_size"] - ) + if ev == 'start': + if el.tag == 'track': + frame_size = tasks_info[int(el.attrib.get('task_id'))]['frame_size'] \ + if el.attrib.get('task_id') else tuple(tasks_info.values())[0]['frame_size'] track = { - "id": el.attrib["id"], - "label": el.attrib.get("label"), - "group": int(el.attrib.get("group_id", 0)), - "height": frame_size[0], - "width": frame_size[1], + 'id': el.attrib['id'], + 'label': el.attrib.get('label'), + 'group': int(el.attrib.get('group_id', 0)), + 'height': frame_size[0], + 'width': frame_size[1], } - subset = el.attrib.get("subset") + subset = el.attrib.get('subset') track_shapes = {} - elif el.tag == "image": + elif el.tag == 'image': image = { - "name": el.attrib.get("name"), - "frame": el.attrib["id"], - "width": el.attrib.get("width"), - "height": el.attrib.get("height"), + 'name': el.attrib.get('name'), + 'frame': el.attrib['id'], + 'width': el.attrib.get('width'), + 'height': el.attrib.get('height'), } - subset = el.attrib.get("subset") + subset = el.attrib.get('subset') elif el.tag in cls._SUPPORTED_SHAPES and (track or image): - if shape and shape["type"] == "skeleton": + if shape and shape['type'] == 'skeleton': element_attributes = {} shape_element = { - "type": "rectangle" if el.tag == "box" else el.tag, - "attributes": element_attributes, + 'type': 'rectangle' if el.tag == 'box' else el.tag, + 'attributes': element_attributes, } if track: shape_element.update(track) @@ -186,191 +165,150 @@ def _parse(cls, path): else: attributes = {} shape = { - "type": "rectangle" if el.tag == "box" else el.tag, - "attributes": attributes, + 'type': 'rectangle' if el.tag == 'box' else el.tag, + 'attributes': attributes, } - shape["elements"] = [] + shape['elements'] = [] if track: shape.update(track) - shape["track_id"] = int(track["id"]) - shape["frame"] = el.attrib["frame"] + shape['track_id'] = int(track['id']) + shape['frame'] = el.attrib['frame'] track_elements = [] if image: shape.update(image) - elif el.tag == "tag" and image: + elif el.tag == 'tag' and image: attributes = {} tag = { - "frame": image["frame"], - "attributes": attributes, - "group": int(el.attrib.get("group_id", 0)), - "label": el.attrib["label"], + 'frame': image['frame'], + 'attributes': attributes, + 'group': int(el.attrib.get('group_id', 0)), + 'label': el.attrib['label'], } - subset = el.attrib.get("subset") - elif ev == "end": - if ( - el.tag == "attribute" - and element_attributes is not None - and shape_element is not None - ): - attr_value = el.text or "" - attr_type = attribute_types.get(el.attrib["name"]) - if el.text in ["true", "false"]: - attr_value = attr_value == "true" - elif attr_type is not None and attr_type != "text": + subset = el.attrib.get('subset') + elif ev == 'end': + if el.tag == 'attribute' and element_attributes is not None and shape_element is not None: + attr_value = el.text or '' + attr_type = attribute_types.get(el.attrib['name']) + if el.text in ['true', 'false']: + attr_value = attr_value == 'true' + elif attr_type is not None and attr_type != 'text': try: attr_value = float(attr_value) except ValueError: pass - element_attributes[el.attrib["name"]] = attr_value - - if ( - el.tag == "attribute" - and attributes is not None - and shape_element is None - ): - attr_value = el.text or "" - attr_type = attribute_types.get(el.attrib["name"]) - if el.text in ["true", "false"]: - attr_value = attr_value == "true" - elif attr_type is not None and attr_type != "text": + element_attributes[el.attrib['name']] = attr_value + + if el.tag == 'attribute' and attributes is not None and shape_element is None: + attr_value = el.text or '' + attr_type = attribute_types.get(el.attrib['name']) + if el.text in ['true', 'false']: + attr_value = attr_value == 'true' + elif attr_type is not None and attr_type != 'text': try: attr_value = float(attr_value) except ValueError: pass - attributes[el.attrib["name"]] = attr_value - - elif ( - el.tag in cls._SUPPORTED_SHAPES - and shape["type"] == "skeleton" - and el.tag != "skeleton" - ): - shape_element["label"] = el.attrib.get("label") - shape_element["group"] = int(el.attrib.get("group_id", 0)) - - shape_element["type"] = el.tag - shape_element["z_order"] = int(el.attrib.get("z_order", 0)) - - if el.tag == "box": - shape_element["points"] = list( - map( - float, - [ - el.attrib["xtl"], - el.attrib["ytl"], - el.attrib["xbr"], - el.attrib["ybr"], - ], - ) - ) + attributes[el.attrib['name']] = attr_value + + elif el.tag in cls._SUPPORTED_SHAPES and shape["type"] == "skeleton" and el.tag != "skeleton": + shape_element['label'] = el.attrib.get('label') + shape_element['group'] = int(el.attrib.get('group_id', 0)) + + shape_element['type'] = el.tag + shape_element['z_order'] = int(el.attrib.get('z_order', 0)) + + if el.tag == 'box': + shape_element['points'] = list(map(float, [ + el.attrib['xtl'], el.attrib['ytl'], + el.attrib['xbr'], el.attrib['ybr'], + ])) else: - shape_element["points"] = [] - for pair in el.attrib["points"].split(";"): - shape_element["points"].extend(map(float, pair.split(","))) + shape_element['points'] = [] + for pair in el.attrib['points'].split(';'): + shape_element['points'].extend(map(float, pair.split(','))) - if el.tag == "points" and el.attrib.get("occluded") == "1": - shape_element["visibility"] = [Points.Visibility.hidden] * ( - len(shape_element["points"]) // 2 - ) + if el.tag == 'points' and el.attrib.get('occluded') == '1': + shape_element['visibility'] = [Points.Visibility.hidden] * (len(shape_element['points']) // 2) else: - shape_element["occluded"] = el.attrib.get("occluded") == "1" + shape_element['occluded'] = (el.attrib.get('occluded') == '1') - if el.tag == "points" and el.attrib.get("outside") == "1": - shape_element["visibility"] = [Points.Visibility.absent] * ( - len(shape_element["points"]) // 2 - ) + if el.tag == 'points' and el.attrib.get('outside') == '1': + shape_element['visibility'] = [Points.Visibility.absent] * (len(shape_element['points']) // 2) else: - shape_element["outside"] = el.attrib.get("outside") == "1" + shape_element['outside'] = (el.attrib.get('outside') == '1') if track: - shape_element["keyframe"] = el.attrib.get("keyframe") == "1" - if shape_element["keyframe"]: + shape_element['keyframe'] = (el.attrib.get('keyframe') == '1') + if shape_element['keyframe']: track_elements.append(shape_element) else: - shape["elements"].append(shape_element) + shape['elements'].append(shape_element) shape_element = None elif el.tag in cls._SUPPORTED_SHAPES: if track is not None: - shape["frame"] = el.attrib["frame"] - shape["outside"] = el.attrib.get("outside") == "1" - shape["keyframe"] = el.attrib.get("keyframe") == "1" + shape['frame'] = el.attrib['frame'] + shape['outside'] = (el.attrib.get('outside') == '1') + shape['keyframe'] = (el.attrib.get('keyframe') == '1') if image is not None: - shape["label"] = el.attrib.get("label") - shape["group"] = int(el.attrib.get("group_id", 0)) - - shape["type"] = el.tag - shape["occluded"] = el.attrib.get("occluded") == "1" - shape["z_order"] = int(el.attrib.get("z_order", 0)) - shape["rotation"] = float(el.attrib.get("rotation", 0)) - - if el.tag == "box": - shape["points"] = list( - map( - float, - [ - el.attrib["xtl"], - el.attrib["ytl"], - el.attrib["xbr"], - el.attrib["ybr"], - ], - ) - ) - elif el.tag == "skeleton": - shape["points"] = [] + shape['label'] = el.attrib.get('label') + shape['group'] = int(el.attrib.get('group_id', 0)) + + shape['type'] = el.tag + shape['occluded'] = (el.attrib.get('occluded') == '1') + shape['z_order'] = int(el.attrib.get('z_order', 0)) + shape['rotation'] = float(el.attrib.get('rotation', 0)) + + if el.tag == 'box': + shape['points'] = list(map(float, [ + el.attrib['xtl'], el.attrib['ytl'], + el.attrib['xbr'], el.attrib['ybr'], + ])) + elif el.tag == 'skeleton': + shape['points'] = [] else: - shape["points"] = [] - for pair in el.attrib["points"].split(";"): - shape["points"].extend(map(float, pair.split(","))) + shape['points'] = [] + for pair in el.attrib['points'].split(';'): + shape['points'].extend(map(float, pair.split(','))) if track: if shape["type"] == "skeleton" and track_elements: shape["keyframe"] = True - track_shapes[shape["frame"]] = shape - track_shapes[shape["frame"]]["elements"] = track_elements + track_shapes[shape['frame']] = shape + track_shapes[shape['frame']]['elements'] = track_elements track_elements = None elif shape["type"] != "skeleton": - track_shapes[shape["frame"]] = shape + track_shapes[shape['frame']] = shape else: - frame_desc = items.get( - (subset, shape["frame"]), {"annotations": []} - ) - frame_desc["annotations"].append( - cls._parse_shape_ann(shape, categories) - ) - items[(subset, shape["frame"])] = frame_desc + frame_desc = items.get((subset, shape['frame']), {'annotations': []}) + frame_desc['annotations'].append( + cls._parse_shape_ann(shape, categories)) + items[(subset, shape['frame'])] = frame_desc shape = None - elif el.tag == "tag": - frame_desc = items.get((subset, tag["frame"]), {"annotations": []}) - frame_desc["annotations"].append( - cls._parse_tag_ann(tag, categories) - ) - items[(subset, tag["frame"])] = frame_desc + elif el.tag == 'tag': + frame_desc = items.get((subset, tag['frame']), {'annotations': []}) + frame_desc['annotations'].append( + cls._parse_tag_ann(tag, categories)) + items[(subset, tag['frame'])] = frame_desc tag = None - elif el.tag == "track": + elif el.tag == 'track': for track_shape in track_shapes.values(): - frame_desc = items.get( - (subset, track_shape["frame"]), {"annotations": []} - ) - frame_desc["annotations"].append( - cls._parse_shape_ann(track_shape, categories) - ) - items[(subset, track_shape["frame"])] = frame_desc + frame_desc = items.get((subset, track_shape['frame']), {'annotations': []}) + frame_desc['annotations'].append( + cls._parse_shape_ann(track_shape, categories)) + items[(subset, track_shape['frame'])] = frame_desc track = None - elif el.tag == "image": - frame_desc = items.get( - (subset, image["frame"]), {"annotations": []} - ) - frame_desc.update( - { - "name": image.get("name"), - "height": image.get("height"), - "width": image.get("width"), - "subset": subset, - } - ) - items[(subset, image["frame"])] = frame_desc + elif el.tag == 'image': + frame_desc = items.get((subset, image['frame']), {'annotations': []}) + frame_desc.update({ + 'name': image.get('name'), + 'height': image.get('height'), + 'width': image.get('width'), + 'subset': subset, + }) + items[(subset, image['frame'])] = frame_desc image = None el.clear() @@ -379,7 +317,7 @@ def _parse(cls, path): @staticmethod def _parse_meta(context): ev, el = next(context) - if not (ev == "start" and el.tag == "annotations"): + if not (ev == 'start' and el.tag == 'annotations'): raise Exception("Unexpected token ") categories = {} @@ -393,8 +331,7 @@ def _parse_meta(context): # Recursive descent parser el = None - states = ["annotations"] - + states = ['annotations'] def accepted(expected_state, tag, next_state=None): state = states[-1] if state == expected_state and el is not None and el.tag == tag: @@ -403,7 +340,6 @@ def accepted(expected_state, tag, next_state=None): states.append(next_state) return True return False - def consumed(expected_state, tag): state = states[-1] if state == expected_state and el is not None and el.tag == tag: @@ -412,269 +348,194 @@ def consumed(expected_state, tag): return False for ev, el in context: - if ev == "start": - if accepted("annotations", "meta"): - pass - elif accepted("meta", "task"): - pass - elif accepted("meta", "project"): - pass - elif accepted("project", "tasks"): - pass - elif accepted("tasks", "task"): - pass - elif accepted("task", "id", next_state="task_id"): - pass - elif accepted("task", "segment"): - pass - elif accepted("task", "mode"): - pass - elif accepted("task", "original_size"): - pass - elif accepted("original_size", "height", next_state="frame_height"): - pass - elif accepted("original_size", "width", next_state="frame_width"): - pass - elif accepted("task", "labels"): - pass - elif accepted("project", "labels"): - pass - elif accepted("labels", "label"): - label = {"name": None, "attributes": []} - elif accepted("label", "name", next_state="label_name"): - pass - elif accepted("label", "attributes"): - pass - elif accepted("attributes", "attribute"): - pass - elif accepted("attribute", "name", next_state="attr_name"): - pass - elif accepted("attribute", "input_type", next_state="attr_type"): - pass - elif ( - accepted("annotations", "image") - or accepted("annotations", "track") - or accepted("annotations", "tag") - ): + if ev == 'start': + if accepted('annotations', 'meta'): pass + elif accepted('meta', 'task'): pass + elif accepted('meta', 'project'): pass + elif accepted('project', 'tasks'): pass + elif accepted('tasks', 'task'): pass + elif accepted('task', 'id', next_state='task_id'): pass + elif accepted('task', 'segment'): pass + elif accepted('task', 'mode'): pass + elif accepted('task', 'original_size'): pass + elif accepted('original_size', 'height', next_state='frame_height'): pass + elif accepted('original_size', 'width', next_state='frame_width'): pass + elif accepted('task', 'labels'): pass + elif accepted('project', 'labels'): pass + elif accepted('labels', 'label'): + label = { 'name': None, 'attributes': [] } + elif accepted('label', 'name', next_state='label_name'): pass + elif accepted('label', 'attributes'): pass + elif accepted('attributes', 'attribute'): pass + elif accepted('attribute', 'name', next_state='attr_name'): pass + elif accepted('attribute', 'input_type', next_state='attr_type'): pass + elif accepted('annotations', 'image') or \ + accepted('annotations', 'track') or \ + accepted('annotations', 'tag'): break else: pass - elif ev == "end": - if consumed("meta", "meta"): + elif ev == 'end': + if consumed('meta', 'meta'): break - elif consumed("project", "project"): - pass - elif consumed("tasks", "tasks"): - pass - elif consumed("task", "task"): + elif consumed('project', 'project'): pass + elif consumed('tasks', 'tasks'): pass + elif consumed('task', 'task'): tasks_info[task_id] = { - "frame_size": frame_size, - "mode": mode, + 'frame_size': frame_size, + 'mode': mode, } frame_size = [None, None] mode = None - elif consumed("task_id", "id"): + elif consumed('task_id', 'id'): task_id = int(el.text) - elif consumed("segment", "segment"): - pass - elif consumed("mode", "mode"): + elif consumed('segment', 'segment'): pass + elif consumed('mode', 'mode'): mode = el.text - elif consumed("original_size", "original_size"): - pass - elif consumed("frame_height", "height"): + elif consumed('original_size', 'original_size'): pass + elif consumed('frame_height', 'height'): frame_size[0] = int(el.text) - elif consumed("frame_width", "width"): + elif consumed('frame_width', 'width'): frame_size[1] = int(el.text) - elif consumed("label_name", "name"): - label["name"] = el.text - elif consumed("attr_name", "name"): - label["attributes"].append({"name": el.text}) - elif consumed("attr_type", "input_type"): - label["attributes"][-1]["input_type"] = el.text - elif consumed("attribute", "attribute"): - pass - elif consumed("attributes", "attributes"): - pass - elif consumed("label", "label"): - labels[label["name"]] = label["attributes"] + elif consumed('label_name', 'name'): + label['name'] = el.text + elif consumed('attr_name', 'name'): + label['attributes'].append({'name': el.text}) + elif consumed('attr_type', 'input_type'): + label['attributes'][-1]['input_type'] = el.text + elif consumed('attribute', 'attribute'): pass + elif consumed('attributes', 'attributes'): pass + elif consumed('label', 'label'): + labels[label['name']] = label['attributes'] label = None - elif consumed("labels", "labels"): - pass + elif consumed('labels', 'labels'): pass else: pass - assert len(states) == 1 and states[0] == "annotations", ( + assert len(states) == 1 and states[0] == 'annotations', \ "Expected 'meta' section in the annotation file, path: %s" % states - ) - common_attrs = ["occluded"] - if "interpolation" in map(lambda t: t["mode"], tasks_info.values()): - common_attrs.append("keyframe") - common_attrs.append("outside") - common_attrs.append("track_id") + common_attrs = ['occluded'] + if 'interpolation' in map(lambda t: t['mode'], tasks_info.values()): + common_attrs.append('keyframe') + common_attrs.append('outside') + common_attrs.append('track_id') label_cat = LabelCategories(attributes=common_attrs) attribute_types = {} for label, attrs in labels.items(): - attr_names = {v["name"] for v in attrs} + attr_names = {v['name'] for v in attrs} label_cat.add(label, attributes=attr_names) for attr in attrs: - attribute_types[attr["name"]] = attr["input_type"] + attribute_types[attr['name']] = attr['input_type'] categories[AnnotationType.label] = label_cat return categories, tasks_info, attribute_types @classmethod def _parse_shape_ann(cls, ann, categories): - ann_id = ann.get("id", 0) - ann_type = ann["type"] - - attributes = ann.get("attributes") or {} - if "occluded" in categories[AnnotationType.label].attributes: - attributes["occluded"] = ann.get("occluded", False) - if "outside" in ann: - attributes["outside"] = ann["outside"] - if "keyframe" in ann: - attributes["keyframe"] = ann["keyframe"] - if "track_id" in ann: - attributes["track_id"] = ann["track_id"] - if "rotation" in ann: - attributes["rotation"] = ann["rotation"] - - group = ann.get("group") - - label = ann.get("label") + ann_id = ann.get('id', 0) + ann_type = ann['type'] + + attributes = ann.get('attributes') or {} + if 'occluded' in categories[AnnotationType.label].attributes: + attributes['occluded'] = ann.get('occluded', False) + if 'outside' in ann: + attributes['outside'] = ann['outside'] + if 'keyframe' in ann: + attributes['keyframe'] = ann['keyframe'] + if 'track_id' in ann: + attributes['track_id'] = ann['track_id'] + if 'rotation' in ann: + attributes['rotation'] = ann['rotation'] + + group = ann.get('group') + + label = ann.get('label') label_id = categories[AnnotationType.label].find(label)[0] - z_order = ann.get("z_order", 0) - points = ann.get("points", []) - - if ann_type == "polyline": - return PolyLine( - points, - label=label_id, - z_order=z_order, - id=ann_id, - attributes=attributes, - group=group, - ) - - elif ann_type == "polygon": - return Polygon( - points, - label=label_id, - z_order=z_order, - id=ann_id, - attributes=attributes, - group=group, - ) - - elif ann_type == "points": - visibility = ann.get("visibility", None) - return Points( - points, - visibility, - label=label_id, - z_order=z_order, - id=ann_id, - attributes=attributes, - group=group, - ) - - elif ann_type == "box": + z_order = ann.get('z_order', 0) + points = ann.get('points', []) + + if ann_type == 'polyline': + return PolyLine(points, label=label_id, z_order=z_order, + id=ann_id, attributes=attributes, group=group) + + elif ann_type == 'polygon': + return Polygon(points, label=label_id, z_order=z_order, + id=ann_id, attributes=attributes, group=group) + + elif ann_type == 'points': + visibility = ann.get('visibility', None) + return Points(points, visibility, label=label_id, z_order=z_order, + id=ann_id, attributes=attributes, group=group) + + elif ann_type == 'box': x, y = points[0], points[1] w, h = points[2] - x, points[3] - y - return Bbox( - x, - y, - w, - h, - label=label_id, - z_order=z_order, - id=ann_id, - attributes=attributes, - group=group, - ) - - elif ann_type == "skeleton": + return Bbox(x, y, w, h, label=label_id, z_order=z_order, + id=ann_id, attributes=attributes, group=group) + + elif ann_type == 'skeleton': elements = [] - for element in ann.get("elements", []): + for element in ann.get('elements', []): elements.append(cls._parse_shape_ann(element, categories)) - return Skeleton( - elements, - label=label_id, - z_order=z_order, - id=ann_id, - attributes=attributes, - group=group, - ) + return Skeleton(elements, label=label_id, z_order=z_order, + id=ann_id, attributes=attributes, group=group) else: raise NotImplementedError("Unknown annotation type '%s'" % ann_type) @classmethod def _parse_tag_ann(cls, ann, categories): - label = ann.get("label") + label = ann.get('label') label_id = categories[AnnotationType.label].find(label)[0] - group = ann.get("group") - attributes = ann.get("attributes") + group = ann.get('group') + attributes = ann.get('attributes') return Label(label_id, attributes=attributes, group=group) def _load_items(self, parsed, image_items): for (subset, frame_id), item_desc in parsed.items(): - name = item_desc.get("name", "frame_%06d.PNG" % int(frame_id)) - image = ( - osp.join(self._images_dir, subset, name) - if subset - else osp.join(self._images_dir, name) - ) - image_size = (item_desc.get("height"), item_desc.get("width")) + name = item_desc.get('name', 'frame_%06d.PNG' % int(frame_id)) + image = osp.join(self._images_dir, subset, name) if subset else osp.join(self._images_dir, name) + image_size = (item_desc.get('height'), item_desc.get('width')) if all(image_size): image = Image(path=image, size=tuple(map(int, image_size))) - di = image_items.get( - (subset, osp.splitext(name)[0]), - DatasetItem( - id=name, - annotations=[], - ), - ) + di = image_items.get((subset, osp.splitext(name)[0]), DatasetItem( + id=name, annotations=[], + )) di.subset = subset or DEFAULT_SUBSET_NAME - di.annotations = item_desc.get("annotations") - di.attributes = {"frame": int(frame_id)} + di.annotations = item_desc.get('annotations') + di.attributes = {'frame': int(frame_id)} di.media = image if isinstance(image, Image) else di.media image_items[(subset, osp.splitext(name)[0])] = di return image_items - -dm_env.extractors.register("cvat", CvatExtractor) - +dm_env.extractors.register('cvat', CvatExtractor) class CvatImporter(Importer): @classmethod def find_sources(cls, path): - return cls._find_sources_recursive(path, ".xml", "cvat") - + return cls._find_sources_recursive(path, '.xml', 'cvat') -dm_env.importers.register("cvat", CvatImporter) +dm_env.importers.register('cvat', CvatImporter) def pairwise(iterable): a = iter(iterable) return zip(a, a) - def create_xml_dumper(file_object): from xml.sax.saxutils import XMLGenerator - class XmlAnnotationWriter: def __init__(self, file): self.version = "1.1" self.file = file - self.xmlgen = XMLGenerator(self.file, "utf-8") + self.xmlgen = XMLGenerator(self.file, 'utf-8') self._level = 0 - def _indent(self, newline=True): + def _indent(self, newline = True): if newline: self.xmlgen.ignorableWhitespace("\n") self.xmlgen.ignorableWhitespace(" " * self._level) @@ -848,8 +709,8 @@ def close_root(self): def close_document(self): self.xmlgen.endDocument() - return XmlAnnotationWriter(file_object) + return XmlAnnotationWriter(file_object) def dump_as_cvat_annotation(dumper, annotations): dumper.open_root() @@ -857,154 +718,102 @@ def dump_as_cvat_annotation(dumper, annotations): for frame_annotation in annotations.group_by_frame(include_empty=True): frame_id = frame_annotation.frame - image_attrs = OrderedDict( - [ - ("id", str(frame_id)), - ("name", frame_annotation.name), - ] - ) + image_attrs = OrderedDict([ + ("id", str(frame_id)), + ("name", frame_annotation.name), + ]) if isinstance(annotations, ProjectData): - image_attrs.update( - OrderedDict( - [ - ("subset", frame_annotation.subset), - ("task_id", str(frame_annotation.task_id)), - ] - ) - ) - image_attrs.update( - OrderedDict( - [ - ("width", str(frame_annotation.width)), - ("height", str(frame_annotation.height)), - ] - ) - ) + image_attrs.update(OrderedDict([ + ("subset", frame_annotation.subset), + ("task_id", str(frame_annotation.task_id)), + ])) + image_attrs.update(OrderedDict([ + ("width", str(frame_annotation.width)), + ("height", str(frame_annotation.height)) + ])) dumper.open_image(image_attrs) def dump_labeled_shapes(shapes, is_skeleton=False): for shape in shapes: - dump_data = OrderedDict( - [ - ("label", shape.label), - ("source", shape.source), - ] - ) + dump_data = OrderedDict([ + ("label", shape.label), + ("source", shape.source), + ]) if is_skeleton: - dump_data.update( - OrderedDict([("outside", str(int(shape.outside)))]) - ) + dump_data.update(OrderedDict([ + ("outside", str(int(shape.outside))) + ])) - if shape.type != "skeleton": - dump_data.update( - OrderedDict([("occluded", str(int(shape.occluded)))]) - ) + if shape.type != 'skeleton': + dump_data.update(OrderedDict([ + ("occluded", str(int(shape.occluded))) + ])) if shape.type == "rectangle": - dump_data.update( - OrderedDict( - [ - ("xtl", "{:.2f}".format(shape.points[0])), - ("ytl", "{:.2f}".format(shape.points[1])), - ("xbr", "{:.2f}".format(shape.points[2])), - ("ybr", "{:.2f}".format(shape.points[3])), - ] - ) - ) + dump_data.update(OrderedDict([ + ("xtl", "{:.2f}".format(shape.points[0])), + ("ytl", "{:.2f}".format(shape.points[1])), + ("xbr", "{:.2f}".format(shape.points[2])), + ("ybr", "{:.2f}".format(shape.points[3])) + ])) if shape.rotation: - dump_data.update( - OrderedDict([("rotation", "{:.2f}".format(shape.rotation))]) - ) + dump_data.update(OrderedDict([ + ("rotation", "{:.2f}".format(shape.rotation)) + ])) elif shape.type == "ellipse": - dump_data.update( - OrderedDict( - [ - ("cx", "{:.2f}".format(shape.points[0])), - ("cy", "{:.2f}".format(shape.points[1])), - ( - "rx", - "{:.2f}".format(shape.points[2] - shape.points[0]), - ), - ( - "ry", - "{:.2f}".format(shape.points[1] - shape.points[3]), - ), - ] - ) - ) + dump_data.update(OrderedDict([ + ("cx", "{:.2f}".format(shape.points[0])), + ("cy", "{:.2f}".format(shape.points[1])), + ("rx", "{:.2f}".format(shape.points[2] - shape.points[0])), + ("ry", "{:.2f}".format(shape.points[1] - shape.points[3])) + ])) if shape.rotation: - dump_data.update( - OrderedDict([("rotation", "{:.2f}".format(shape.rotation))]) - ) + dump_data.update(OrderedDict([ + ("rotation", "{:.2f}".format(shape.rotation)) + ])) elif shape.type == "cuboid": - dump_data.update( - OrderedDict( - [ - ("xtl1", "{:.2f}".format(shape.points[0])), - ("ytl1", "{:.2f}".format(shape.points[1])), - ("xbl1", "{:.2f}".format(shape.points[2])), - ("ybl1", "{:.2f}".format(shape.points[3])), - ("xtr1", "{:.2f}".format(shape.points[4])), - ("ytr1", "{:.2f}".format(shape.points[5])), - ("xbr1", "{:.2f}".format(shape.points[6])), - ("ybr1", "{:.2f}".format(shape.points[7])), - ("xtl2", "{:.2f}".format(shape.points[8])), - ("ytl2", "{:.2f}".format(shape.points[9])), - ("xbl2", "{:.2f}".format(shape.points[10])), - ("ybl2", "{:.2f}".format(shape.points[11])), - ("xtr2", "{:.2f}".format(shape.points[12])), - ("ytr2", "{:.2f}".format(shape.points[13])), - ("xbr2", "{:.2f}".format(shape.points[14])), - ("ybr2", "{:.2f}".format(shape.points[15])), - ] - ) - ) + dump_data.update(OrderedDict([ + ("xtl1", "{:.2f}".format(shape.points[0])), + ("ytl1", "{:.2f}".format(shape.points[1])), + ("xbl1", "{:.2f}".format(shape.points[2])), + ("ybl1", "{:.2f}".format(shape.points[3])), + ("xtr1", "{:.2f}".format(shape.points[4])), + ("ytr1", "{:.2f}".format(shape.points[5])), + ("xbr1", "{:.2f}".format(shape.points[6])), + ("ybr1", "{:.2f}".format(shape.points[7])), + ("xtl2", "{:.2f}".format(shape.points[8])), + ("ytl2", "{:.2f}".format(shape.points[9])), + ("xbl2", "{:.2f}".format(shape.points[10])), + ("ybl2", "{:.2f}".format(shape.points[11])), + ("xtr2", "{:.2f}".format(shape.points[12])), + ("ytr2", "{:.2f}".format(shape.points[13])), + ("xbr2", "{:.2f}".format(shape.points[14])), + ("ybr2", "{:.2f}".format(shape.points[15])) + ])) elif shape.type == "mask": - dump_data.update( - OrderedDict( - [ - ( - "rle", - f"{list(int (v) for v in shape.points[:-4])}"[1:-1], - ), - ("left", f"{int(shape.points[-4])}"), - ("top", f"{int(shape.points[-3])}"), - ( - "width", - f"{int(shape.points[-2] - shape.points[-4]) + 1}", - ), - ( - "height", - f"{int(shape.points[-1] - shape.points[-3]) + 1}", - ), - ] - ) - ) - elif shape.type != "skeleton": - dump_data.update( - OrderedDict( - [ - ( - "points", - ";".join( - ( - ",".join( - ("{:.2f}".format(x), "{:.2f}".format(y)) - ) - for x, y in pairwise(shape.points) - ) - ), - ), - ] - ) - ) + dump_data.update(OrderedDict([ + ("rle", f"{list(int (v) for v in shape.points[:-4])}"[1:-1]), + ("left", f"{int(shape.points[-4])}"), + ("top", f"{int(shape.points[-3])}"), + ("width", f"{int(shape.points[-2] - shape.points[-4]) + 1}"), + ("height", f"{int(shape.points[-1] - shape.points[-3]) + 1}"), + ])) + elif shape.type != 'skeleton': + dump_data.update(OrderedDict([ + ("points", ';'.join(( + ','.join(( + "{:.2f}".format(x), + "{:.2f}".format(y) + )) for x, y in pairwise(shape.points)) + )), + ])) if not is_skeleton: - dump_data["z_order"] = str(shape.z_order) + dump_data['z_order'] = str(shape.z_order) if shape.group: - dump_data["group_id"] = str(shape.group) + dump_data['group_id'] = str(shape.group) if shape.type == "rectangle": dumper.open_box(dump_data) @@ -1027,9 +836,10 @@ def dump_labeled_shapes(shapes, is_skeleton=False): raise NotImplementedError("unknown shape type") for attr in shape.attributes: - dumper.add_attribute( - OrderedDict([("name", attr.name), ("value", attr.value)]) - ) + dumper.add_attribute(OrderedDict([ + ("name", attr.name), + ("value", attr.value) + ])) if shape.type == "rectangle": dumper.close_box() @@ -1053,27 +863,25 @@ def dump_labeled_shapes(shapes, is_skeleton=False): dump_labeled_shapes(frame_annotation.labeled_shapes) for tag in frame_annotation.tags: - tag_data = OrderedDict( - [ - ("label", tag.label), - ("source", tag.source), - ] - ) + tag_data = OrderedDict([ + ("label", tag.label), + ("source", tag.source), + ]) if tag.group: tag_data["group_id"] = str(tag.group) dumper.open_tag(tag_data) for attr in tag.attributes: - dumper.add_attribute( - OrderedDict([("name", attr.name), ("value", attr.value)]) - ) + dumper.add_attribute(OrderedDict([ + ("name", attr.name), + ("value", attr.value) + ])) dumper.close_tag() dumper.close_image() dumper.close_root() - def dump_as_cvat_interpolation(dumper, annotations): dumper.open_root() dumper.add_meta(annotations.meta) @@ -1081,122 +889,79 @@ def dump_as_cvat_interpolation(dumper, annotations): def dump_shape(shape, element_shapes=None, label=None): dump_data = OrderedDict() if label is None: - dump_data.update( - OrderedDict( - [ - ("frame", str(shape.frame)), - ] - ) - ) + dump_data.update(OrderedDict([ + ("frame", str(shape.frame)), + ])) else: - dump_data.update( - OrderedDict( - [ - ("label", label), - ] - ) - ) - dump_data.update( - OrderedDict( - [ - ("keyframe", str(int(shape.keyframe))), - ] - ) - ) + dump_data.update(OrderedDict([ + ("label", label), + ])) + dump_data.update(OrderedDict([ + ("keyframe", str(int(shape.keyframe))), + ])) if shape.type != "skeleton": - dump_data.update( - OrderedDict( - [ - ("outside", str(int(shape.outside))), - ("occluded", str(int(shape.occluded))), - ] - ) - ) + dump_data.update(OrderedDict([ + ("outside", str(int(shape.outside))), + ("occluded", str(int(shape.occluded))), + ])) if shape.type == "rectangle": - dump_data.update( - OrderedDict( - [ - ("xtl", "{:.2f}".format(shape.points[0])), - ("ytl", "{:.2f}".format(shape.points[1])), - ("xbr", "{:.2f}".format(shape.points[2])), - ("ybr", "{:.2f}".format(shape.points[3])), - ] - ) - ) + dump_data.update(OrderedDict([ + ("xtl", "{:.2f}".format(shape.points[0])), + ("ytl", "{:.2f}".format(shape.points[1])), + ("xbr", "{:.2f}".format(shape.points[2])), + ("ybr", "{:.2f}".format(shape.points[3])), + ])) if shape.rotation: - dump_data.update( - OrderedDict([("rotation", "{:.2f}".format(shape.rotation))]) - ) + dump_data.update(OrderedDict([ + ("rotation", "{:.2f}".format(shape.rotation)) + ])) elif shape.type == "ellipse": - dump_data.update( - OrderedDict( - [ - ("cx", "{:.2f}".format(shape.points[0])), - ("cy", "{:.2f}".format(shape.points[1])), - ("rx", "{:.2f}".format(shape.points[2] - shape.points[0])), - ("ry", "{:.2f}".format(shape.points[1] - shape.points[3])), - ] - ) - ) + dump_data.update(OrderedDict([ + ("cx", "{:.2f}".format(shape.points[0])), + ("cy", "{:.2f}".format(shape.points[1])), + ("rx", "{:.2f}".format(shape.points[2] - shape.points[0])), + ("ry", "{:.2f}".format(shape.points[1] - shape.points[3])) + ])) if shape.rotation: - dump_data.update( - OrderedDict([("rotation", "{:.2f}".format(shape.rotation))]) - ) + dump_data.update(OrderedDict([ + ("rotation", "{:.2f}".format(shape.rotation)) + ])) elif shape.type == "mask": - dump_data.update( - OrderedDict( - [ - ("rle", f"{list(int (v) for v in shape.points[:-4])}"[1:-1]), - ("left", f"{int(shape.points[-4])}"), - ("top", f"{int(shape.points[-3])}"), - ("width", f"{int(shape.points[-2] - shape.points[-4]) + 1}"), - ("height", f"{int(shape.points[-1] - shape.points[-3]) + 1}"), - ] - ) - ) + dump_data.update(OrderedDict([ + ("rle", f"{list(int (v) for v in shape.points[:-4])}"[1:-1]), + ("left", f"{int(shape.points[-4])}"), + ("top", f"{int(shape.points[-3])}"), + ("width", f"{int(shape.points[-2] - shape.points[-4]) + 1}"), + ("height", f"{int(shape.points[-1] - shape.points[-3]) + 1}"), + ])) elif shape.type == "cuboid": - dump_data.update( - OrderedDict( - [ - ("xtl1", "{:.2f}".format(shape.points[0])), - ("ytl1", "{:.2f}".format(shape.points[1])), - ("xbl1", "{:.2f}".format(shape.points[2])), - ("ybl1", "{:.2f}".format(shape.points[3])), - ("xtr1", "{:.2f}".format(shape.points[4])), - ("ytr1", "{:.2f}".format(shape.points[5])), - ("xbr1", "{:.2f}".format(shape.points[6])), - ("ybr1", "{:.2f}".format(shape.points[7])), - ("xtl2", "{:.2f}".format(shape.points[8])), - ("ytl2", "{:.2f}".format(shape.points[9])), - ("xbl2", "{:.2f}".format(shape.points[10])), - ("ybl2", "{:.2f}".format(shape.points[11])), - ("xtr2", "{:.2f}".format(shape.points[12])), - ("ytr2", "{:.2f}".format(shape.points[13])), - ("xbr2", "{:.2f}".format(shape.points[14])), - ("ybr2", "{:.2f}".format(shape.points[15])), - ] - ) - ) + dump_data.update(OrderedDict([ + ("xtl1", "{:.2f}".format(shape.points[0])), + ("ytl1", "{:.2f}".format(shape.points[1])), + ("xbl1", "{:.2f}".format(shape.points[2])), + ("ybl1", "{:.2f}".format(shape.points[3])), + ("xtr1", "{:.2f}".format(shape.points[4])), + ("ytr1", "{:.2f}".format(shape.points[5])), + ("xbr1", "{:.2f}".format(shape.points[6])), + ("ybr1", "{:.2f}".format(shape.points[7])), + ("xtl2", "{:.2f}".format(shape.points[8])), + ("ytl2", "{:.2f}".format(shape.points[9])), + ("xbl2", "{:.2f}".format(shape.points[10])), + ("ybl2", "{:.2f}".format(shape.points[11])), + ("xtr2", "{:.2f}".format(shape.points[12])), + ("ytr2", "{:.2f}".format(shape.points[13])), + ("xbr2", "{:.2f}".format(shape.points[14])), + ("ybr2", "{:.2f}".format(shape.points[15])) + ])) elif shape.type != "skeleton": - dump_data.update( - OrderedDict( - [ - ( - "points", - ";".join( - [ - "{:.2f},{:.2f}".format(x, y) - for x, y in pairwise(shape.points) - ] - ), - ) - ] - ) - ) + dump_data.update(OrderedDict([ + ("points", ';'.join(['{:.2f},{:.2f}'.format(x, y) + for x,y in pairwise(shape.points)])) + ])) if label is None: dump_data["z_order"] = str(shape.z_order) @@ -1211,11 +976,11 @@ def dump_shape(shape, element_shapes=None, label=None): dumper.open_polyline(dump_data) elif shape.type == "points": dumper.open_points(dump_data) - elif shape.type == "mask": + elif shape.type == 'mask': dumper.open_mask(dump_data) elif shape.type == "cuboid": dumper.open_cuboid(dump_data) - elif shape.type == "skeleton": + elif shape.type == 'skeleton': if element_shapes and element_shapes.get(shape.frame): dumper.open_skeleton(dump_data) for element_shape, label in element_shapes.get(shape.frame, []): @@ -1223,16 +988,13 @@ def dump_shape(shape, element_shapes=None, label=None): else: raise NotImplementedError("unknown shape type") - if ( - shape.type == "skeleton" - and element_shapes - and element_shapes.get(shape.frame) - or shape.type != "skeleton" - ): + if shape.type == "skeleton" and element_shapes \ + and element_shapes.get(shape.frame) or shape.type != "skeleton": for attr in shape.attributes: - dumper.add_attribute( - OrderedDict([("name", attr.name), ("value", attr.value)]) - ) + dumper.add_attribute(OrderedDict([ + ("name", attr.name), + ("value", attr.value) + ])) if shape.type == "rectangle": dumper.close_box() @@ -1244,7 +1006,7 @@ def dump_shape(shape, element_shapes=None, label=None): dumper.close_polyline() elif shape.type == "points": dumper.close_points() - elif shape.type == "mask": + elif shape.type == 'mask': dumper.close_mask() elif shape.type == "cuboid": dumper.close_cuboid() @@ -1256,30 +1018,21 @@ def dump_shape(shape, element_shapes=None, label=None): def dump_track(idx, track): track_id = idx - dump_data = OrderedDict( - [ - ("id", str(track_id)), - ("label", track.label), - ("source", track.source), - ] - ) - - if hasattr(track, "task_id"): - (task,) = filter(lambda task: task.id == track.task_id, annotations.tasks) - dump_data.update( - OrderedDict( - [ - ("task_id", str(track.task_id)), - ( - "subset", - get_defaulted_subset(task.subset, annotations.subsets), - ), - ] - ) - ) + dump_data = OrderedDict([ + ("id", str(track_id)), + ("label", track.label), + ("source", track.source), + ]) + + if hasattr(track, 'task_id'): + task, = filter(lambda task: task.id == track.task_id, annotations.tasks) + dump_data.update(OrderedDict([ + ('task_id', str(track.task_id)), + ('subset', get_defaulted_subset(task.subset, annotations.subsets)), + ])) if track.group: - dump_data["group_id"] = str(track.group) + dump_data['group_id'] = str(track.group) dumper.open_track(dump_data) element_shapes = {} @@ -1287,9 +1040,7 @@ def dump_track(idx, track): for element_shape in element_track.shapes: if element_shape.frame not in element_shapes: element_shapes[element_shape.frame] = [] - element_shapes[element_shape.frame].append( - (element_shape, element_track.label) - ) + element_shapes[element_shape.frame].append((element_shape, element_track.label)) for shape in track.shapes: dump_shape(shape, element_shapes) @@ -1302,366 +1053,297 @@ def dump_track(idx, track): counter += 1 for shape in annotations.shapes: - frame_step = ( - annotations.frame_step - if not isinstance(annotations, ProjectData) + frame_step = annotations.frame_step if not isinstance(annotations, ProjectData) \ else annotations.frame_step[shape.task_id] - ) if not isinstance(annotations, ProjectData): - stop_frame = int(annotations.meta[annotations.META_FIELD]["stop_frame"]) + stop_frame = int(annotations.meta[annotations.META_FIELD]['stop_frame']) else: - task_meta = list( - filter( - lambda task: int(task[1]["id"]) == shape.task_id, - annotations.meta[annotations.META_FIELD]["tasks"], - ) - )[0][1] - stop_frame = int(task_meta["stop_frame"]) + task_meta = list(filter(lambda task: int(task[1]['id']) == shape.task_id, + annotations.meta[annotations.META_FIELD]['tasks']))[0][1] + stop_frame = int(task_meta['stop_frame']) track = { - "label": shape.label, - "group": shape.group, - "source": shape.source, - "shapes": [ - annotations.TrackedShape( - type=shape.type, - points=shape.points, - rotation=shape.rotation, - occluded=shape.occluded, - outside=False, + 'label': shape.label, + 'group': shape.group, + 'source': shape.source, + 'shapes': [annotations.TrackedShape( + type=shape.type, + points=shape.points, + rotation=shape.rotation, + occluded=shape.occluded, + outside=False, + keyframe=True, + z_order=shape.z_order, + frame=shape.frame, + attributes=shape.attributes, + )] + + ( # add a finishing frame if it does not hop over the last frame + [annotations.TrackedShape( + type=shape.type, + points=shape.points, + rotation=shape.rotation, + occluded=shape.occluded, + outside=True, + keyframe=True, + z_order=shape.z_order, + frame=shape.frame + frame_step, + attributes=shape.attributes, + )] if shape.frame + frame_step < \ + stop_frame \ + else [] + ), + 'elements': [annotations.Track( + label=element.label, + group=element.group, + source=element.source, + shapes=[annotations.TrackedShape( + type=element.type, + points=element.points, + rotation=element.rotation, + occluded=element.occluded, + outside=element.outside, keyframe=True, - z_order=shape.z_order, - frame=shape.frame, - attributes=shape.attributes, - ) - ] - + ( # add a finishing frame if it does not hop over the last frame - [ - annotations.TrackedShape( - type=shape.type, - points=shape.points, - rotation=shape.rotation, - occluded=shape.occluded, - outside=True, - keyframe=True, - z_order=shape.z_order, - frame=shape.frame + frame_step, - attributes=shape.attributes, - ) - ] - if shape.frame + frame_step < stop_frame + z_order=element.z_order, + frame=element.frame, + attributes=element.attributes, + )] + + ( # add a finishing frame if it does not hop over the last frame + [annotations.TrackedShape( + type=element.type, + points=element.points, + rotation=element.rotation, + occluded=element.occluded, + outside=True, + keyframe=True, + z_order=element.z_order, + frame=element.frame + frame_step, + attributes=element.attributes, + )] if element.frame + frame_step < \ + stop_frame \ else [] - ), - "elements": [ - annotations.Track( - label=element.label, - group=element.group, - source=element.source, - shapes=[ - annotations.TrackedShape( - type=element.type, - points=element.points, - rotation=element.rotation, - occluded=element.occluded, - outside=element.outside, - keyframe=True, - z_order=element.z_order, - frame=element.frame, - attributes=element.attributes, - ) - ] - + ( # add a finishing frame if it does not hop over the last frame - [ - annotations.TrackedShape( - type=element.type, - points=element.points, - rotation=element.rotation, - occluded=element.occluded, - outside=True, - keyframe=True, - z_order=element.z_order, - frame=element.frame + frame_step, - attributes=element.attributes, - ) - ] - if element.frame + frame_step < stop_frame - else [] - ), - elements=[], - ) - for element in shape.elements - ], + ), + elements=[], + ) for element in shape.elements] } if isinstance(annotations, ProjectData): - track["task_id"] = shape.task_id - for element in track["elements"]: + track['task_id'] = shape.task_id + for element in track['elements']: element.task_id = shape.task_id dump_track(counter, annotations.Track(**track)) counter += 1 dumper.close_root() - def load_anno(file_object, annotations): - supported_shapes = ( - "box", - "ellipse", - "polygon", - "polyline", - "points", - "cuboid", - "skeleton", - "mask", - ) + supported_shapes = ('box', 'ellipse', 'polygon', 'polyline', 'points', 'cuboid', 'skeleton', 'mask') context = ElementTree.iterparse(file_object, events=("start", "end")) context = iter(context) next(context) track = None shape = None - shape_element = None + shape_element=None tag = None image_is_opened = False attributes = None elem_attributes = None track_elements = None for ev, el in context: - if ev == "start": - if el.tag == "track": + if ev == 'start': + if el.tag == 'track': track = annotations.Track( - label=el.attrib["label"], - group=int(el.attrib.get("group_id", 0)), - source="file", + label=el.attrib['label'], + group=int(el.attrib.get('group_id', 0)), + source='file', shapes=[], elements=[], ) - elif el.tag == "image": + elif el.tag == 'image': image_is_opened = True - frame_id = annotations.abs_frame_id( - match_dm_item( - DatasetItem( - id=osp.splitext(el.attrib["name"])[0], - attributes={"frame": el.attrib["id"]}, - image=el.attrib["name"], - ), - instance_data=annotations, - ) - ) + frame_id = annotations.abs_frame_id(match_dm_item( + DatasetItem(id=osp.splitext(el.attrib['name'])[0], + attributes={'frame': el.attrib['id']}, + image=el.attrib['name'] + ), + instance_data=annotations + )) elif el.tag in supported_shapes and (track is not None or image_is_opened): - if shape and shape["type"] == "skeleton": + if shape and shape['type'] == 'skeleton': elem_attributes = [] shape_element = { - "attributes": elem_attributes, - "points": [], - "type": "rectangle" if el.tag == "box" else el.tag, + 'attributes': elem_attributes, + 'points': [], + 'type': 'rectangle' if el.tag == 'box' else el.tag } - if track is not None and el.attrib["label"] not in track_elements: - track_elements[el.attrib["label"]] = annotations.Track( - label=el.attrib["label"], + if track is not None and el.attrib['label'] not in track_elements: + track_elements[el.attrib['label']] = annotations.Track( + label=el.attrib['label'], group=0, - source="file", + source='file', shapes=[], elements=[], ) else: attributes = [] shape = { - "attributes": attributes, - "points": [], - "type": "rectangle" if el.tag == "box" else el.tag, + 'attributes': attributes, + 'points': [], + 'type': 'rectangle' if el.tag == 'box' else el.tag } if track is None: - shape["elements"] = [] - elif shape["type"] == "skeleton": - shape["frame"] = el.attrib["frame"] + shape['elements'] = [] + elif shape['type'] == 'skeleton': + shape['frame'] = el.attrib['frame'] if track_elements is None: track_elements = {} - elif el.tag == "tag" and image_is_opened: + elif el.tag == 'tag' and image_is_opened: attributes = [] tag = { - "frame": frame_id, - "label": el.attrib["label"], - "group": int(el.attrib.get("group_id", 0)), - "attributes": attributes, - "source": "file", + 'frame': frame_id, + 'label': el.attrib['label'], + 'group': int(el.attrib.get('group_id', 0)), + 'attributes': attributes, + 'source': 'file', } - elif ev == "end": - if ( - el.tag == "attribute" - and elem_attributes is not None - and shape_element is not None - ): - elem_attributes.append( - annotations.Attribute( - name=el.attrib["name"], - value=el.text or "", - ) - ) - if ( - el.tag == "attribute" - and attributes is not None - and shape_element is None - ): - attributes.append( - annotations.Attribute( - name=el.attrib["name"], - value=el.text or "", - ) - ) - if ( - el.tag in supported_shapes - and shape["type"] == "skeleton" - and el.tag != "skeleton" - ): - shape_element["label"] = el.attrib["label"] - - shape_element["occluded"] = el.attrib["occluded"] == "1" - shape_element["outside"] = el.attrib["outside"] == "1" - shape_element["elements"] = [] - - if el.tag == "box": - shape_element["points"].append(el.attrib["xtl"]) - shape_element["points"].append(el.attrib["ytl"]) - shape_element["points"].append(el.attrib["xbr"]) - shape_element["points"].append(el.attrib["ybr"]) - elif el.tag == "ellipse": - shape_element["points"].append(el.attrib["cx"]) - shape_element["points"].append(el.attrib["cy"]) - shape_element["points"].append( - "{:.2f}".format(float(el.attrib["cx"]) + float(el.attrib["rx"])) - ) - shape_element["points"].append( - "{:.2f}".format(float(el.attrib["cy"]) - float(el.attrib["ry"])) - ) - elif el.tag == "cuboid": - shape_element["points"].append(el.attrib["xtl1"]) - shape_element["points"].append(el.attrib["ytl1"]) - shape_element["points"].append(el.attrib["xbl1"]) - shape_element["points"].append(el.attrib["ybl1"]) - shape_element["points"].append(el.attrib["xtr1"]) - shape_element["points"].append(el.attrib["ytr1"]) - shape_element["points"].append(el.attrib["xbr1"]) - shape_element["points"].append(el.attrib["ybr1"]) - - shape_element["points"].append(el.attrib["xtl2"]) - shape_element["points"].append(el.attrib["ytl2"]) - shape_element["points"].append(el.attrib["xbl2"]) - shape_element["points"].append(el.attrib["ybl2"]) - shape_element["points"].append(el.attrib["xtr2"]) - shape_element["points"].append(el.attrib["ytr2"]) - shape_element["points"].append(el.attrib["xbr2"]) - shape_element["points"].append(el.attrib["ybr2"]) + elif ev == 'end': + if el.tag == 'attribute' and elem_attributes is not None and shape_element is not None: + elem_attributes.append(annotations.Attribute( + name=el.attrib['name'], + value=el.text or "", + )) + if el.tag == 'attribute' and attributes is not None and shape_element is None: + attributes.append(annotations.Attribute( + name=el.attrib['name'], + value=el.text or "", + )) + if el.tag in supported_shapes and shape['type'] == 'skeleton' and el.tag != 'skeleton': + shape_element['label'] = el.attrib['label'] + + shape_element['occluded'] = el.attrib['occluded'] == '1' + shape_element['outside'] = el.attrib['outside'] == '1' + shape_element['elements'] = [] + + if el.tag == 'box': + shape_element['points'].append(el.attrib['xtl']) + shape_element['points'].append(el.attrib['ytl']) + shape_element['points'].append(el.attrib['xbr']) + shape_element['points'].append(el.attrib['ybr']) + elif el.tag == 'ellipse': + shape_element['points'].append(el.attrib['cx']) + shape_element['points'].append(el.attrib['cy']) + shape_element['points'].append("{:.2f}".format(float(el.attrib['cx']) + float(el.attrib['rx']))) + shape_element['points'].append("{:.2f}".format(float(el.attrib['cy']) - float(el.attrib['ry']))) + elif el.tag == 'cuboid': + shape_element['points'].append(el.attrib['xtl1']) + shape_element['points'].append(el.attrib['ytl1']) + shape_element['points'].append(el.attrib['xbl1']) + shape_element['points'].append(el.attrib['ybl1']) + shape_element['points'].append(el.attrib['xtr1']) + shape_element['points'].append(el.attrib['ytr1']) + shape_element['points'].append(el.attrib['xbr1']) + shape_element['points'].append(el.attrib['ybr1']) + + shape_element['points'].append(el.attrib['xtl2']) + shape_element['points'].append(el.attrib['ytl2']) + shape_element['points'].append(el.attrib['xbl2']) + shape_element['points'].append(el.attrib['ybl2']) + shape_element['points'].append(el.attrib['xtr2']) + shape_element['points'].append(el.attrib['ytr2']) + shape_element['points'].append(el.attrib['xbr2']) + shape_element['points'].append(el.attrib['ybr2']) else: - for pair in el.attrib["points"].split(";"): - shape_element["points"].extend(map(float, pair.split(","))) + for pair in el.attrib['points'].split(';'): + shape_element['points'].extend(map(float, pair.split(','))) if track is None: - shape_element["frame"] = frame_id - shape_element["source"] = "file" - shape["elements"].append(annotations.LabeledShape(**shape_element)) + shape_element['frame'] = frame_id + shape_element['source'] = 'file' + shape['elements'].append(annotations.LabeledShape(**shape_element)) else: - shape_element["frame"] = shape["frame"] - shape_element["keyframe"] = el.attrib["keyframe"] == "1" - if shape_element["keyframe"]: - track_elements[el.attrib["label"]].shapes.append( - annotations.TrackedShape(**shape_element) - ) + shape_element["frame"] = shape['frame'] + shape_element['keyframe'] = el.attrib['keyframe'] == "1" + if shape_element['keyframe']: + track_elements[el.attrib['label']].shapes.append(annotations.TrackedShape(**shape_element)) shape_element = None elif el.tag in supported_shapes: if track is not None: - shape["frame"] = el.attrib["frame"] - shape["outside"] = el.attrib.get("outside", "0") == "1" - shape["keyframe"] = el.attrib["keyframe"] == "1" + shape['frame'] = el.attrib['frame'] + shape['outside'] = el.attrib.get('outside', "0") == "1" + shape['keyframe'] = el.attrib['keyframe'] == "1" else: - shape["frame"] = frame_id - shape["label"] = el.attrib["label"] - shape["group"] = int(el.attrib.get("group_id", 0)) - shape["source"] = "file" - shape["outside"] = False - - shape["occluded"] = el.attrib.get("occluded", "0") == "1" - shape["z_order"] = int(el.attrib.get("z_order", 0)) - shape["rotation"] = float(el.attrib.get("rotation", 0)) - - if el.tag == "box": - shape["points"].append(el.attrib["xtl"]) - shape["points"].append(el.attrib["ytl"]) - shape["points"].append(el.attrib["xbr"]) - shape["points"].append(el.attrib["ybr"]) - elif el.tag == "ellipse": - shape["points"].append(el.attrib["cx"]) - shape["points"].append(el.attrib["cy"]) - shape["points"].append( - "{:.2f}".format(float(el.attrib["cx"]) + float(el.attrib["rx"])) - ) - shape["points"].append( - "{:.2f}".format(float(el.attrib["cy"]) - float(el.attrib["ry"])) - ) - elif el.tag == "mask": - shape["points"] = el.attrib["rle"].split(",") - shape["points"].append(el.attrib["left"]) - shape["points"].append(el.attrib["top"]) - shape["points"].append( - "{}".format( - int(el.attrib["left"]) + int(el.attrib["width"]) - 1 - ) - ) - shape["points"].append( - "{}".format( - int(el.attrib["top"]) + int(el.attrib["height"]) - 1 - ) - ) - elif el.tag == "cuboid": - shape["points"].append(el.attrib["xtl1"]) - shape["points"].append(el.attrib["ytl1"]) - shape["points"].append(el.attrib["xbl1"]) - shape["points"].append(el.attrib["ybl1"]) - shape["points"].append(el.attrib["xtr1"]) - shape["points"].append(el.attrib["ytr1"]) - shape["points"].append(el.attrib["xbr1"]) - shape["points"].append(el.attrib["ybr1"]) - - shape["points"].append(el.attrib["xtl2"]) - shape["points"].append(el.attrib["ytl2"]) - shape["points"].append(el.attrib["xbl2"]) - shape["points"].append(el.attrib["ybl2"]) - shape["points"].append(el.attrib["xtr2"]) - shape["points"].append(el.attrib["ytr2"]) - shape["points"].append(el.attrib["xbr2"]) - shape["points"].append(el.attrib["ybr2"]) - elif el.tag == "skeleton": + shape['frame'] = frame_id + shape['label'] = el.attrib['label'] + shape['group'] = int(el.attrib.get('group_id', 0)) + shape['source'] = 'file' + shape['outside'] = False + + shape['occluded'] = el.attrib.get('occluded', "0") == '1' + shape['z_order'] = int(el.attrib.get('z_order', 0)) + shape['rotation'] = float(el.attrib.get('rotation', 0)) + + if el.tag == 'box': + shape['points'].append(el.attrib['xtl']) + shape['points'].append(el.attrib['ytl']) + shape['points'].append(el.attrib['xbr']) + shape['points'].append(el.attrib['ybr']) + elif el.tag == 'ellipse': + shape['points'].append(el.attrib['cx']) + shape['points'].append(el.attrib['cy']) + shape['points'].append("{:.2f}".format(float(el.attrib['cx']) + float(el.attrib['rx']))) + shape['points'].append("{:.2f}".format(float(el.attrib['cy']) - float(el.attrib['ry']))) + elif el.tag == 'mask': + shape['points'] = el.attrib['rle'].split(',') + shape['points'].append(el.attrib['left']) + shape['points'].append(el.attrib['top']) + shape['points'].append("{}".format(int(el.attrib['left']) + int(el.attrib['width']) - 1)) + shape['points'].append("{}".format(int(el.attrib['top']) + int(el.attrib['height']) - 1)) + elif el.tag == 'cuboid': + shape['points'].append(el.attrib['xtl1']) + shape['points'].append(el.attrib['ytl1']) + shape['points'].append(el.attrib['xbl1']) + shape['points'].append(el.attrib['ybl1']) + shape['points'].append(el.attrib['xtr1']) + shape['points'].append(el.attrib['ytr1']) + shape['points'].append(el.attrib['xbr1']) + shape['points'].append(el.attrib['ybr1']) + + shape['points'].append(el.attrib['xtl2']) + shape['points'].append(el.attrib['ytl2']) + shape['points'].append(el.attrib['xbl2']) + shape['points'].append(el.attrib['ybl2']) + shape['points'].append(el.attrib['xtr2']) + shape['points'].append(el.attrib['ytr2']) + shape['points'].append(el.attrib['xbr2']) + shape['points'].append(el.attrib['ybr2']) + elif el.tag == 'skeleton': pass else: - for pair in el.attrib["points"].split(";"): - shape["points"].extend(map(float, pair.split(","))) + for pair in el.attrib['points'].split(';'): + shape['points'].extend(map(float, pair.split(','))) if track is not None: - if shape["keyframe"]: + if shape['keyframe']: track.shapes.append(annotations.TrackedShape(**shape)) else: annotations.add_shape(annotations.LabeledShape(**shape)) shape = None - elif el.tag == "track": - if track.shapes[0].type == "mask": + elif el.tag == 'track': + if track.shapes[0].type == 'mask': # convert mask tracks to shapes # because mask track are not supported - annotations.add_shape( - annotations.LabeledShape( - **{ - "attributes": track.shapes[0].attributes, - "points": track.shapes[0].points, - "type": track.shapes[0].type, - "occluded": track.shapes[0].occluded, - "frame": track.shapes[0].frame, - "source": track.shapes[0].source, - "rotation": track.shapes[0].rotation, - "z_order": track.shapes[0].z_order, - "group": track.shapes[0].group, - "label": track.label, - } - ) - ) + annotations.add_shape(annotations.LabeledShape(**{ + 'attributes': track.shapes[0].attributes, + 'points': track.shapes[0].points, + 'type': track.shapes[0].type, + 'occluded': track.shapes[0].occluded, + 'frame': track.shapes[0].frame, + 'source': track.shapes[0].source, + 'rotation': track.shapes[0].rotation, + 'z_order': track.shapes[0].z_order, + 'group': track.shapes[0].group, + 'label': track.label, + })) else: if track_elements is not None: for element in track_elements.values(): @@ -1669,138 +1351,89 @@ def load_anno(file_object, annotations): track_elements = None annotations.add_track(track) track = None - elif el.tag == "image": + elif el.tag == 'image': image_is_opened = False - elif el.tag == "tag": + elif el.tag == 'tag': annotations.add_tag(annotations.Tag(**tag)) tag = None el.clear() - def dump_task_or_job_anno(dst_file, instance_data, callback): dumper = create_xml_dumper(dst_file) dumper.open_document() callback(dumper, instance_data) dumper.close_document() - -def dump_project_anno( - dst_file: BufferedWriter, project_data: ProjectData, callback: Callable -): +def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callback: Callable): dumper = create_xml_dumper(dst_file) dumper.open_document() callback(dumper, project_data) dumper.close_document() - -def dump_media_files( - instance_data: CommonData, img_dir: str, project_data: ProjectData = None -): - ext = "" - if instance_data.meta[instance_data.META_FIELD]["mode"] == "interpolation": +def dump_media_files(instance_data: CommonData, img_dir: str, project_data: ProjectData = None): + ext = '' + if instance_data.meta[instance_data.META_FIELD]['mode'] == 'interpolation': ext = FrameProvider.VIDEO_FRAME_EXT frame_provider = FrameProvider(instance_data.db_data) frames = frame_provider.get_frames( - instance_data.start, - instance_data.stop, + instance_data.start, instance_data.stop, frame_provider.Quality.ORIGINAL, - frame_provider.Type.BUFFER, - ) + frame_provider.Type.BUFFER) for frame_id, (frame_data, _) in zip(instance_data.rel_range, frames): - if ( - project_data is not None - and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames - ) or frame_id in instance_data.deleted_frames: + if (project_data is not None and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames) \ + or frame_id in instance_data.deleted_frames: continue - frame_name = ( - instance_data.frame_info[frame_id]["path"] - if project_data is None - else project_data.frame_info[(instance_data.db_instance.id, frame_id)][ - "path" - ] - ) + frame_name = instance_data.frame_info[frame_id]['path'] if project_data is None \ + else project_data.frame_info[(instance_data.db_instance.id, frame_id)]['path'] img_path = osp.join(img_dir, frame_name + ext) os.makedirs(osp.dirname(img_path), exist_ok=True) - with open(img_path, "wb") as f: + with open(img_path, 'wb') as f: f.write(frame_data.getvalue()) - -def _export_task_or_job( - dst_file, temp_dir, instance_data, anno_callback, save_images=False -): - with open(osp.join(temp_dir, "annotations.xml"), "wb") as f: +def _export_task_or_job(dst_file, temp_dir, instance_data, anno_callback, save_images=False): + with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: dump_task_or_job_anno(f, instance_data, anno_callback) if save_images: - dump_media_files(instance_data, osp.join(temp_dir, "images")) + dump_media_files(instance_data, osp.join(temp_dir, 'images')) make_zip_archive(temp_dir, dst_file) - -def _export_project( - dst_file: str, - temp_dir: str, - project_data: ProjectData, - anno_callback: Callable, - save_images: bool = False, +def _export_project(dst_file: str, temp_dir: str, project_data: ProjectData, + anno_callback: Callable, save_images: bool=False ): - with open(osp.join(temp_dir, "annotations.xml"), "wb") as f: + with open(osp.join(temp_dir, 'annotations.xml'), 'wb') as f: dump_project_anno(f, project_data, anno_callback) if save_images: for task_data in project_data.task_data: - subset = get_defaulted_subset( - task_data.db_instance.subset, project_data.subsets - ) - subset_dir = osp.join(temp_dir, "images", subset) + subset = get_defaulted_subset(task_data.db_instance.subset, project_data.subsets) + subset_dir = osp.join(temp_dir, 'images', subset) os.makedirs(subset_dir, exist_ok=True) dump_media_files(task_data, subset_dir, project_data) make_zip_archive(temp_dir, dst_file) - -@exporter(name="CVAT for video", ext="ZIP", version="1.1") +@exporter(name='CVAT for video', ext='ZIP', version='1.1') def _export_video(dst_file, temp_dir, instance_data, save_images=False): if isinstance(instance_data, ProjectData): - _export_project( - dst_file, - temp_dir, - instance_data, - anno_callback=dump_as_cvat_interpolation, - save_images=save_images, - ) + _export_project(dst_file, temp_dir, instance_data, + anno_callback=dump_as_cvat_interpolation, save_images=save_images) else: - _export_task_or_job( - dst_file, - temp_dir, - instance_data, - anno_callback=dump_as_cvat_interpolation, - save_images=save_images, - ) + _export_task_or_job(dst_file, temp_dir, instance_data, + anno_callback=dump_as_cvat_interpolation, save_images=save_images) - -@exporter(name="CVAT for images", ext="ZIP", version="1.1") +@exporter(name='CVAT for images', ext='ZIP', version='1.1') def _export_images(dst_file, temp_dir, instance_data, save_images=False): if isinstance(instance_data, ProjectData): - _export_project( - dst_file, - temp_dir, - instance_data, - anno_callback=dump_as_cvat_annotation, - save_images=save_images, - ) + _export_project(dst_file, temp_dir, instance_data, + anno_callback=dump_as_cvat_annotation, save_images=save_images) else: - _export_task_or_job( - dst_file, - temp_dir, - instance_data, - anno_callback=dump_as_cvat_annotation, - save_images=save_images, - ) - + _export_task_or_job(dst_file, temp_dir, instance_data, + anno_callback=dump_as_cvat_annotation, save_images=save_images) -@importer(name="CVAT", ext="XML, ZIP", version="1.1") +@importer(name='CVAT', ext='XML, ZIP', version='1.1') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) @@ -1808,13 +1441,13 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs zipfile.ZipFile(src_file).extractall(temp_dir) if isinstance(instance_data, ProjectData): - detect_dataset(temp_dir, format_name="cvat", importer=_CvatImporter) - dataset = Dataset.import_from(temp_dir, "cvat", env=dm_env) + detect_dataset(temp_dir, format_name='cvat', importer=_CvatImporter) + dataset = Dataset.import_from(temp_dir, 'cvat', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) else: - anno_paths = glob(osp.join(temp_dir, "**", "*.xml"), recursive=True) + anno_paths = glob(osp.join(temp_dir, '**', '*.xml'), recursive=True) for p in anno_paths: load_anno(p, instance_data) else: diff --git a/cvat/apps/dataset_manager/formats/icdar.py b/cvat/apps/dataset_manager/formats/icdar.py index bf0968d4ae22..5d031eef82b0 100644 --- a/cvat/apps/dataset_manager/formats/icdar.py +++ b/cvat/apps/dataset_manager/formats/icdar.py @@ -5,19 +5,13 @@ import zipfile -from datumaro.components.annotation import ( - AnnotationType, - Caption, - Label, - LabelCategories, -) +from datumaro.components.annotation import (AnnotationType, Caption, Label, + LabelCategories) from datumaro.components.dataset import Dataset from datumaro.components.extractor import ItemTransform -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -42,15 +36,11 @@ def categories(self): def transform_item(self, item): annotations = item.annotations for ann in annotations: - if ann.type in [ - AnnotationType.polygon, - AnnotationType.bbox, - AnnotationType.mask, - ]: + if ann.type in [AnnotationType.polygon, + AnnotationType.bbox, AnnotationType.mask]: ann.label = self._label return item.wrap(annotations=annotations) - class CaptionToLabel(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -68,92 +58,90 @@ def categories(self): def transform_item(self, item): annotations = item.annotations - captions = [ann for ann in annotations if ann.type == AnnotationType.caption] + captions = [ann for ann in annotations + if ann.type == AnnotationType.caption] for ann in captions: - annotations.append(Label(self._label, attributes={"text": ann.caption})) + annotations.append(Label(self._label, + attributes={'text': ann.caption})) annotations.remove(ann) return item.wrap(annotations=annotations) - class LabelToCaption(ItemTransform): def transform_item(self, item): annotations = item.annotations - anns = [p for p in annotations if "text" in p.attributes] + anns = [p for p in annotations + if 'text' in p.attributes] for ann in anns: - annotations.append(Caption(ann.attributes["text"])) + annotations.append(Caption(ann.attributes['text'])) annotations.remove(ann) return item.wrap(annotations=annotations) - -@exporter(name="ICDAR Recognition", ext="ZIP", version="1.0") +@exporter(name='ICDAR Recognition', ext='ZIP', version='1.0') def _export_recognition(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(LabelToCaption) - dataset.export(temp_dir, "icdar_word_recognition", save_images=save_images) + dataset.export(temp_dir, 'icdar_word_recognition', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="ICDAR Recognition", ext="ZIP", version="1.0") +@importer(name='ICDAR Recognition', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) # We do not run detect_dataset before import because the ICDAR format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - dataset = Dataset.import_from(temp_dir, "icdar_word_recognition", env=dm_env) - dataset.transform(CaptionToLabel, label="icdar") + dataset = Dataset.import_from(temp_dir, 'icdar_word_recognition', env=dm_env) + dataset.transform(CaptionToLabel, label='icdar') if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) -@exporter(name="ICDAR Localization", ext="ZIP", version="1.0") +@exporter(name='ICDAR Localization', ext='ZIP', version='1.0') def _export_localization(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, "icdar_text_localization", save_images=save_images) + dataset.export(temp_dir, 'icdar_text_localization', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="ICDAR Localization", ext="ZIP", version="1.0") +@importer(name='ICDAR Localization', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) # We do not run detect_dataset before import because the ICDAR format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - dataset = Dataset.import_from(temp_dir, "icdar_text_localization", env=dm_env) - dataset.transform(AddLabelToAnns, label="icdar") + dataset = Dataset.import_from(temp_dir, 'icdar_text_localization', env=dm_env) + dataset.transform(AddLabelToAnns, label='icdar') if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) -@exporter(name="ICDAR Segmentation", ext="ZIP", version="1.0") +@exporter(name='ICDAR Segmentation', ext='ZIP', version='1.0') def _export_segmentation(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform("polygons_to_masks") - dataset.transform("boxes_to_masks") - dataset.transform("merge_instance_segments") - dataset.export(temp_dir, "icdar_text_segmentation", save_images=save_images) + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') + dataset.export(temp_dir, 'icdar_text_segmentation', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="ICDAR Segmentation", ext="ZIP", version="1.0") +@importer(name='ICDAR Segmentation', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) # We do not run detect_dataset before import because the ICDAR format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - dataset = Dataset.import_from(temp_dir, "icdar_text_segmentation", env=dm_env) - dataset.transform(AddLabelToAnns, label="icdar") + dataset = Dataset.import_from(temp_dir, 'icdar_text_segmentation', env=dm_env) + dataset.transform(AddLabelToAnns, label='icdar') dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/imagenet.py b/cvat/apps/dataset_manager/formats/imagenet.py index 45da41715547..fd5e9a99a176 100644 --- a/cvat/apps/dataset_manager/formats/imagenet.py +++ b/cvat/apps/dataset_manager/formats/imagenet.py @@ -9,38 +9,35 @@ from datumaro.components.dataset import Dataset -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, \ + import_dm_annotations from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name="ImageNet", ext="ZIP", version="1.0") +@exporter(name='ImageNet', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) if save_images: - dataset.export(temp_dir, "imagenet", save_images=save_images) + dataset.export(temp_dir, 'imagenet', save_images=save_images) else: - dataset.export(temp_dir, "imagenet_txt", save_images=save_images) + dataset.export(temp_dir, 'imagenet_txt', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="ImageNet", ext="ZIP", version="1.0") +@importer(name='ImageNet', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) # We do not run detect_dataset before import because the Imagenet format # has problem with the dataset detection in case of empty annotation file(s) # Details in: https://github.com/cvat-ai/datumaro/issues/43 - if glob(osp.join(temp_dir, "*.txt")): - dataset = Dataset.import_from(temp_dir, "imagenet_txt", env=dm_env) + if glob(osp.join(temp_dir, '*.txt')): + dataset = Dataset.import_from(temp_dir, 'imagenet_txt', env=dm_env) else: - dataset = Dataset.import_from(temp_dir, "imagenet", env=dm_env) + dataset = Dataset.import_from(temp_dir, 'imagenet', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/kitti.py b/cvat/apps/dataset_manager/formats/kitti.py index 1784cdbbf670..01e1cd3fc4bc 100644 --- a/cvat/apps/dataset_manager/formats/kitti.py +++ b/cvat/apps/dataset_manager/formats/kitti.py @@ -10,11 +10,7 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -22,26 +18,22 @@ from .utils import make_colormap -@exporter(name="KITTI", ext="ZIP", version="1.0") +@exporter(name='KITTI', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform("polygons_to_masks") - dataset.transform("merge_instance_segments") - dataset.export( - temp_dir, - format="kitti", + dataset.transform('polygons_to_masks') + dataset.transform('merge_instance_segments') + dataset.export(temp_dir, format='kitti', label_map={k: v[0] for k, v in make_colormap(instance_data).items()}, - apply_colormap=True, - save_images=save_images, + apply_colormap=True, save_images=save_images ) make_zip_archive(temp_dir, dst_file) - -@importer(name="KITTI", ext="ZIP", version="1.0") +@importer(name='KITTI', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) @@ -50,15 +42,12 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs if not osp.isfile(color_map_path): write_label_map(color_map_path, color_map) - detect_dataset( - temp_dir, format_name="kitti", importer=dm_env.importers.get("kitti") - ) - dataset = Dataset.import_from(temp_dir, format="kitti", env=dm_env) - labels_meta = instance_data.meta[instance_data.META_FIELD]["labels"] - if "background" not in [label["name"] for _, label in labels_meta]: - dataset.filter( - '/item/annotation[label != "background"]', filter_annotations=True - ) + detect_dataset(temp_dir, format_name='kitti', importer=dm_env.importers.get('kitti')) + dataset = Dataset.import_from(temp_dir, format='kitti', env=dm_env) + labels_meta = instance_data.meta[instance_data.META_FIELD]['labels'] + if 'background' not in [label['name'] for _, label in labels_meta]: + dataset.filter('/item/annotation[label != "background"]', + filter_annotations=True) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: diff --git a/cvat/apps/dataset_manager/formats/labelme.py b/cvat/apps/dataset_manager/formats/labelme.py index a797f0cd492f..be9679f268e8 100644 --- a/cvat/apps/dataset_manager/formats/labelme.py +++ b/cvat/apps/dataset_manager/formats/labelme.py @@ -6,36 +6,28 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) -from cvat.apps.dataset_manager.formats.transformations import ( - MaskToPolygonTransformation, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + import_dm_annotations) +from cvat.apps.dataset_manager.formats.transformations import MaskToPolygonTransformation from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name="LabelMe", ext="ZIP", version="3.0") +@exporter(name='LabelMe', ext='ZIP', version='3.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, "label_me", save_images=save_images) + dataset.export(temp_dir, 'label_me', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="LabelMe", ext="ZIP", version="3.0") +@importer(name='LabelMe', ext='ZIP', version='3.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset( - temp_dir, format_name="label_me", importer=dm_env.importers.get("label_me") - ) - dataset = Dataset.import_from(temp_dir, "label_me", env=dm_env) + detect_dataset(temp_dir, format_name='label_me', importer=dm_env.importers.get('label_me')) + dataset = Dataset.import_from(temp_dir, 'label_me', env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/lfw.py b/cvat/apps/dataset_manager/formats/lfw.py index b6aa35a4ef13..0af356332bb5 100644 --- a/cvat/apps/dataset_manager/formats/lfw.py +++ b/cvat/apps/dataset_manager/formats/lfw.py @@ -6,31 +6,27 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@importer(name="LFW", ext="ZIP", version="1.0") +@importer(name='LFW', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name="lfw", importer=dm_env.importers.get("lfw")) - dataset = Dataset.import_from(temp_dir, "lfw") + detect_dataset(temp_dir, format_name='lfw', importer=dm_env.importers.get('lfw')) + dataset = Dataset.import_from(temp_dir, 'lfw') if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) - -@exporter(name="LFW", ext="ZIP", version="1.0") +@exporter(name='LFW', ext='ZIP', version='1.0') def _exporter(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, format="lfw", save_images=save_images) + dataset.export(temp_dir, format='lfw', save_images=save_images) make_zip_archive(temp_dir, dst_file) diff --git a/cvat/apps/dataset_manager/formats/market1501.py b/cvat/apps/dataset_manager/formats/market1501.py index a7e255a92747..6be8b2fcf75f 100644 --- a/cvat/apps/dataset_manager/formats/market1501.py +++ b/cvat/apps/dataset_manager/formats/market1501.py @@ -5,20 +5,17 @@ import zipfile -from datumaro.components.annotation import AnnotationType, Label, LabelCategories +from datumaro.components.annotation import (AnnotationType, Label, + LabelCategories) from datumaro.components.dataset import Dataset from datumaro.components.extractor import ItemTransform -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer - class AttrToLabelAttr(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -42,7 +39,6 @@ def transform_item(self, item): attributes = {} return item.wrap(annotations=annotations, attributes=attributes) - class LabelAttrToAttr(ItemTransform): def __init__(self, extractor, label): super().__init__(extractor) @@ -55,37 +51,32 @@ def transform_item(self, item): annotations = list(item.annotations) attributes = dict(item.attributes) if self._label is not None: - labels = [ - ann - for ann in annotations - if ann.type == AnnotationType.label and ann.label == self._label - ] + labels = [ann for ann in annotations + if ann.type == AnnotationType.label \ + and ann.label == self._label] if len(labels) == 1: attributes.update(labels[0].attributes) annotations.remove(labels[0]) return item.wrap(annotations=annotations, attributes=attributes) -@exporter(name="Market-1501", ext="ZIP", version="1.0") +@exporter(name='Market-1501', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.transform(LabelAttrToAttr, label="market-1501") - dataset.export(temp_dir, "market1501", save_images=save_images) + dataset.transform(LabelAttrToAttr, label='market-1501') + dataset.export(temp_dir, 'market1501', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="Market-1501", ext="ZIP", version="1.0") +@importer(name='Market-1501', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset( - temp_dir, format_name="market1501", importer=dm_env.importers.get("market1501") - ) - dataset = Dataset.import_from(temp_dir, "market1501", env=dm_env) - dataset.transform(AttrToLabelAttr, label="market-1501") + detect_dataset(temp_dir, format_name='market1501', importer=dm_env.importers.get('market1501')) + dataset = Dataset.import_from(temp_dir, 'market1501', env=dm_env) + dataset.transform(AttrToLabelAttr, label='market-1501') if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/mask.py b/cvat/apps/dataset_manager/formats/mask.py index 400ed8399c06..f003f68383e7 100644 --- a/cvat/apps/dataset_manager/formats/mask.py +++ b/cvat/apps/dataset_manager/formats/mask.py @@ -6,44 +6,34 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons from .registry import dm_env, exporter, importer from .utils import make_colormap - -@exporter(name="Segmentation mask", ext="ZIP", version="1.1") +@exporter(name='Segmentation mask', ext='ZIP', version='1.1') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform("polygons_to_masks") - dataset.transform("boxes_to_masks") - dataset.transform("merge_instance_segments") - - dataset.export( - temp_dir, - "voc_segmentation", - save_images=save_images, - apply_colormap=True, - label_map=make_colormap(instance_data), - ) + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') - make_zip_archive(temp_dir, dst_file) + dataset.export(temp_dir, 'voc_segmentation', save_images=save_images, + apply_colormap=True, label_map=make_colormap(instance_data)) + make_zip_archive(temp_dir, dst_file) -@importer(name="Segmentation mask", ext="ZIP", version="1.1") +@importer(name='Segmentation mask', ext='ZIP', version='1.1') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name="voc", importer=dm_env.importers.get("voc")) - dataset = Dataset.import_from(temp_dir, "voc", env=dm_env) + detect_dataset(temp_dir, format_name='voc', importer=dm_env.importers.get('voc')) + dataset = Dataset.import_from(temp_dir, 'voc', env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/mot.py b/cvat/apps/dataset_manager/formats/mot.py index f3ede7150299..4030d865c742 100644 --- a/cvat/apps/dataset_manager/formats/mot.py +++ b/cvat/apps/dataset_manager/formats/mot.py @@ -26,31 +26,29 @@ def _import_to_task(dataset, instance_data): if ann.type != dm.AnnotationType.bbox: continue - occluded = ann.attributes.pop("occluded", False) is True - track_id = ann.attributes.pop("track_id", None) + occluded = ann.attributes.pop('occluded', False) is True + track_id = ann.attributes.pop('track_id', None) attributes = [ instance_data.Attribute(name=n, value=str(v)) for n, v in ann.attributes.items() ] if track_id is None: # Extension. Import regular boxes: - instance_data.add_shape( - instance_data.LabeledShape( - type="rectangle", - label=label_cat.items[ann.label].name, - points=ann.points, - occluded=occluded, - z_order=ann.z_order, - group=0, - frame=frame_number, - attributes=attributes, - source="manual", - ) - ) + instance_data.add_shape(instance_data.LabeledShape( + type='rectangle', + label=label_cat.items[ann.label].name, + points=ann.points, + occluded=occluded, + z_order=ann.z_order, + group=0, + frame=frame_number, + attributes=attributes, + source='manual', + )) continue shape = instance_data.TrackedShape( - type="rectangle", + type='rectangle', points=ann.points, occluded=occluded, outside=False, @@ -58,14 +56,13 @@ def _import_to_task(dataset, instance_data): z_order=ann.z_order, frame=frame_number, attributes=attributes, - source="manual", + source='manual', ) # build trajectories as lists of shapes in track dict if track_id not in tracks: tracks[track_id] = instance_data.Track( - label_cat.items[ann.label].name, 0, "manual", [] - ) + label_cat.items[ann.label].name, 0, 'manual', []) tracks[track_id].shapes.append(shape) for track in tracks.values(): @@ -78,9 +75,8 @@ def _import_to_task(dataset, instance_data): for shape in track.shapes[1:]: has_skip = instance_data.frame_step < shape.frame - prev_shape.frame if has_skip and not prev_shape.outside: - prev_shape = prev_shape._replace( - outside=True, frame=prev_shape.frame + instance_data.frame_step - ) + prev_shape = prev_shape._replace(outside=True, + frame=prev_shape.frame + instance_data.frame_step) prev_shape_idx += 1 track.shapes.insert(prev_shape_idx, prev_shape) prev_shape = shape @@ -88,40 +84,34 @@ def _import_to_task(dataset, instance_data): # Append a shape with outside=True to finish the track last_shape = track.shapes[-1] - if last_shape.frame + instance_data.frame_step <= int( - instance_data.meta[instance_data.META_FIELD]["stop_frame"] - ): - track.shapes.append( - last_shape._replace( - outside=True, frame=last_shape.frame + instance_data.frame_step - ) + if last_shape.frame + instance_data.frame_step <= \ + int(instance_data.meta[instance_data.META_FIELD]['stop_frame']): + track.shapes.append(last_shape._replace(outside=True, + frame=last_shape.frame + instance_data.frame_step) ) instance_data.add_track(track) -@exporter(name="MOT", ext="ZIP", version="1.1") +@exporter(name='MOT', ext='ZIP', version='1.1') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = dm.Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, "mot_seq_gt", save_images=save_images) + dataset.export(temp_dir, 'mot_seq_gt', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="MOT", ext="ZIP", version="1.1") +@importer(name='MOT', ext='ZIP', version='1.1') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset( - temp_dir, format_name="mot_seq", importer=dm_env.importers.get("mot_seq") - ) - dataset = dm.Dataset.import_from(temp_dir, "mot_seq", env=dm_env) + detect_dataset(temp_dir, format_name='mot_seq', importer=dm_env.importers.get('mot_seq')) + dataset = dm.Dataset.import_from(temp_dir, 'mot_seq', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) # Dirty way to determine instance type to avoid circular dependency - if hasattr(instance_data, "_db_project"): + if hasattr(instance_data, '_db_project'): for sub_dataset, task_data in instance_data.split_dataset(dataset): _import_to_task(sub_dataset, task_data) else: diff --git a/cvat/apps/dataset_manager/formats/mots.py b/cvat/apps/dataset_manager/formats/mots.py index 0ed88842012b..9ed156e6cd4e 100644 --- a/cvat/apps/dataset_manager/formats/mots.py +++ b/cvat/apps/dataset_manager/formats/mots.py @@ -8,12 +8,8 @@ from datumaro.components.extractor import ItemTransform from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - find_dataset_root, - match_dm_item, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + find_dataset_root, match_dm_item) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -22,10 +18,8 @@ class KeepTracks(ItemTransform): def transform_item(self, item): - return item.wrap( - annotations=[a for a in item.annotations if "track_id" in a.attributes] - ) - + return item.wrap(annotations=[a for a in item.annotations + if 'track_id' in a.attributes]) def _import_to_task(dataset, instance_data): tracks = {} @@ -36,8 +30,7 @@ def _import_to_task(dataset, instance_data): shift = 0 for item in dataset: frame_number = instance_data.abs_frame_id( - match_dm_item(item, instance_data, root_hint=root_hint) - ) + match_dm_item(item, instance_data, root_hint=root_hint)) track_ids = set() @@ -45,7 +38,7 @@ def _import_to_task(dataset, instance_data): if ann.type != AnnotationType.polygon: continue - track_id = ann.attributes["track_id"] + track_id = ann.attributes['track_id'] group_id = track_id if track_id in track_ids: @@ -56,23 +49,22 @@ def _import_to_task(dataset, instance_data): track_ids.add(track_id) shape = instance_data.TrackedShape( - type="polygon", + type='polygon', points=ann.points, - occluded=ann.attributes.get("occluded") is True, + occluded=ann.attributes.get('occluded') is True, outside=False, keyframe=True, z_order=ann.z_order, frame=frame_number, attributes=[], - source="manual", - group=group_id, + source='manual', + group=group_id ) # build trajectories as lists of shapes in track dict if track_id not in tracks: tracks[track_id] = instance_data.Track( - label_cat.items[ann.label].name, 0, "manual", [] - ) + label_cat.items[ann.label].name, 0, 'manual', []) tracks[track_id].shapes.append(shape) for track in tracks.values(): @@ -84,9 +76,8 @@ def _import_to_task(dataset, instance_data): for shape in track.shapes[1:]: has_skip = instance_data.frame_step < shape.frame - prev_shape.frame if has_skip and not prev_shape.outside: - prev_shape = prev_shape._replace( - outside=True, frame=prev_shape.frame + instance_data.frame_step - ) + prev_shape = prev_shape._replace(outside=True, + frame=prev_shape.frame + instance_data.frame_step) prev_shape_idx += 1 track.shapes.insert(prev_shape_idx, prev_shape) prev_shape = shape @@ -94,45 +85,41 @@ def _import_to_task(dataset, instance_data): # Append a shape with outside=True to finish the track last_shape = track.shapes[-1] - if last_shape.frame + instance_data.frame_step <= int( - instance_data.meta[instance_data.META_FIELD]["stop_frame"] - ): - track.shapes.append( - last_shape._replace( - outside=True, frame=last_shape.frame + instance_data.frame_step - ) + if last_shape.frame + instance_data.frame_step <= \ + int(instance_data.meta[instance_data.META_FIELD]['stop_frame']): + track.shapes.append(last_shape._replace(outside=True, + frame=last_shape.frame + instance_data.frame_step) ) instance_data.add_track(track) - -@exporter(name="MOTS PNG", ext="ZIP", version="1.0") +@exporter(name='MOTS PNG', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.transform(KeepTracks) # can only export tracks + dataset.transform(KeepTracks) # can only export tracks dataset.transform(RotatedBoxesToPolygons) - dataset.transform("polygons_to_masks") - dataset.transform("boxes_to_masks") - dataset.transform("merge_instance_segments") + dataset.transform('polygons_to_masks') + dataset.transform('boxes_to_masks') + dataset.transform('merge_instance_segments') - dataset.export(temp_dir, "mots_png", save_images=save_images) + dataset.export(temp_dir, 'mots_png', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="MOTS PNG", ext="ZIP", version="1.0") +@importer(name='MOTS PNG', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset(temp_dir, format_name="mots", importer=dm_env.importers.get("mots")) - dataset = Dataset.import_from(temp_dir, "mots", env=dm_env) + detect_dataset(temp_dir, format_name='mots', importer=dm_env.importers.get('mots')) + dataset = Dataset.import_from(temp_dir, 'mots', env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) # Dirty way to determine instance type to avoid circular dependency - if hasattr(instance_data, "_db_project"): + if hasattr(instance_data, '_db_project'): for sub_dataset, task_data in instance_data.split_dataset(dataset): _import_to_task(sub_dataset, task_data) else: _import_to_task(dataset, instance_data) + diff --git a/cvat/apps/dataset_manager/formats/openimages.py b/cvat/apps/dataset_manager/formats/openimages.py index ae299386a74a..51fcee29a2fb 100644 --- a/cvat/apps/dataset_manager/formats/openimages.py +++ b/cvat/apps/dataset_manager/formats/openimages.py @@ -11,13 +11,8 @@ from datumaro.util.image import DEFAULT_IMAGE_META_FILE_NAME from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - find_dataset_root, - import_dm_annotations, - match_dm_item, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + find_dataset_root, import_dm_annotations, match_dm_item) from cvat.apps.dataset_manager.util import make_zip_archive from .transformations import MaskToPolygonTransformation, RotatedBoxesToPolygons @@ -27,7 +22,7 @@ def find_item_ids(path): image_desc_patterns = ( OpenImagesPath.FULL_IMAGE_DESCRIPTION_FILE_NAME, - *OpenImagesPath.SUBSET_IMAGE_DESCRIPTION_FILE_PATTERNS, + *OpenImagesPath.SUBSET_IMAGE_DESCRIPTION_FILE_PATTERNS ) image_desc_patterns = ( @@ -37,32 +32,29 @@ def find_item_ids(path): for pattern in image_desc_patterns: for path in glob.glob(pattern): - with open(path, "r") as desc: + with open(path, 'r') as desc: next(desc) for row in desc: - yield row.split(",")[0] + yield row.split(',')[0] - -@exporter(name="Open Images V6", ext="ZIP", version="1.0") +@exporter(name='Open Images V6', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, task_data, save_images=False): with GetCVATDataExtractor(task_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RotatedBoxesToPolygons) - dataset.transform("polygons_to_masks") - dataset.transform("merge_instance_segments") + dataset.transform('polygons_to_masks') + dataset.transform('merge_instance_segments') - dataset.export(temp_dir, "open_images", save_images=save_images) + dataset.export(temp_dir, 'open_images', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="Open Images V6", ext="ZIP", version="1.0") +@importer(name='Open Images V6', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - image_meta_path = osp.join( - temp_dir, OpenImagesPath.ANNOTATIONS_DIR, DEFAULT_IMAGE_META_FILE_NAME - ) + image_meta_path = osp.join(temp_dir, OpenImagesPath.ANNOTATIONS_DIR, + DEFAULT_IMAGE_META_FILE_NAME) image_meta = None if not osp.isfile(image_meta_path): @@ -70,30 +62,25 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs item_ids = list(find_item_ids(temp_dir)) root_hint = find_dataset_root( - [DatasetItem(id=item_id) for item_id in item_ids], instance_data - ) + [DatasetItem(id=item_id) for item_id in item_ids], instance_data) for item_id in item_ids: frame_info = None try: - frame_id = match_dm_item( - DatasetItem(id=item_id), instance_data, root_hint - ) + frame_id = match_dm_item(DatasetItem(id=item_id), + instance_data, root_hint) frame_info = instance_data.frame_info[frame_id] - except Exception: # nosec + except Exception: # nosec pass if frame_info is not None: - image_meta[item_id] = (frame_info["height"], frame_info["width"]) + image_meta[item_id] = (frame_info['height'], frame_info['width']) - detect_dataset( - temp_dir, - format_name="open_images", - importer=dm_env.importers.get("open_images"), - ) - dataset = Dataset.import_from( - temp_dir, "open_images", image_meta=image_meta, env=dm_env - ) + detect_dataset(temp_dir, format_name='open_images', importer=dm_env.importers.get('open_images')) + dataset = Dataset.import_from(temp_dir, 'open_images', + image_meta=image_meta, env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) + + diff --git a/cvat/apps/dataset_manager/formats/pascal_voc.py b/cvat/apps/dataset_manager/formats/pascal_voc.py index 1a16d62350d8..a0d84b745d73 100644 --- a/cvat/apps/dataset_manager/formats/pascal_voc.py +++ b/cvat/apps/dataset_manager/formats/pascal_voc.py @@ -11,57 +11,51 @@ from datumaro.components.dataset import Dataset from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) -from cvat.apps.dataset_manager.formats.transformations import ( - MaskToPolygonTransformation, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, import_dm_annotations) +from cvat.apps.dataset_manager.formats.transformations import MaskToPolygonTransformation from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name="PASCAL VOC", ext="ZIP", version="1.1") +@exporter(name='PASCAL VOC', ext='ZIP', version='1.1') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, "voc", save_images=save_images, label_map="source") + dataset.export(temp_dir, 'voc', save_images=save_images, + label_map='source') make_zip_archive(temp_dir, dst_file) - -@importer(name="PASCAL VOC", ext="ZIP", version="1.1") +@importer(name='PASCAL VOC', ext='ZIP', version='1.1') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) # put label map from the task if not present - labelmap_file = osp.join(temp_dir, "labelmap.txt") + labelmap_file = osp.join(temp_dir, 'labelmap.txt') if not osp.isfile(labelmap_file): - labels_meta = instance_data.meta[instance_data.META_FIELD]["labels"] - labels = (label["name"] + ":::" for _, label in labels_meta) - with open(labelmap_file, "w") as f: - f.write("\n".join(labels)) + labels_meta = instance_data.meta[instance_data.META_FIELD]['labels'] + labels = (label['name'] + ':::' for _, label in labels_meta) + with open(labelmap_file, 'w') as f: + f.write('\n'.join(labels)) # support flat archive layout - anno_dir = osp.join(temp_dir, "Annotations") + anno_dir = osp.join(temp_dir, 'Annotations') if not osp.isdir(anno_dir): - anno_files = glob(osp.join(temp_dir, "**", "*.xml"), recursive=True) - subsets_dir = osp.join(temp_dir, "ImageSets", "Main") + anno_files = glob(osp.join(temp_dir, '**', '*.xml'), recursive=True) + subsets_dir = osp.join(temp_dir, 'ImageSets', 'Main') os.makedirs(subsets_dir, exist_ok=True) - with open(osp.join(subsets_dir, "train.txt"), "w") as subset_file: + with open(osp.join(subsets_dir, 'train.txt'), 'w') as subset_file: for f in anno_files: - subset_file.write(osp.splitext(osp.basename(f))[0] + "\n") + subset_file.write(osp.splitext(osp.basename(f))[0] + '\n') os.makedirs(anno_dir, exist_ok=True) for f in anno_files: shutil.move(f, anno_dir) - detect_dataset(temp_dir, format_name="voc", importer=dm_env.importers.get("voc")) - dataset = Dataset.import_from(temp_dir, "voc", env=dm_env) + detect_dataset(temp_dir, format_name='voc', importer=dm_env.importers.get('voc')) + dataset = Dataset.import_from(temp_dir, 'voc', env=dm_env) dataset = MaskToPolygonTransformation.convert_dataset(dataset, **kwargs) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/pointcloud.py b/cvat/apps/dataset_manager/formats/pointcloud.py index 106611993469..6ddfbb495427 100644 --- a/cvat/apps/dataset_manager/formats/pointcloud.py +++ b/cvat/apps/dataset_manager/formats/pointcloud.py @@ -7,59 +7,35 @@ from datumaro.components.dataset import Dataset -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from cvat.apps.engine.models import DimensionType from .registry import dm_env, exporter, importer -@exporter( - name="Sly Point Cloud Format", - ext="ZIP", - version="1.0", - dimension=DimensionType.DIM_3D, -) +@exporter(name='Sly Point Cloud Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) def _export_images(dst_file, temp_dir, task_data, save_images=False): with GetCVATDataExtractor( - task_data, - include_images=save_images, - format_type="sly_pointcloud", + task_data, include_images=save_images, format_type='sly_pointcloud', dimension=DimensionType.DIM_3D, ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export( - temp_dir, - "sly_pointcloud", - save_images=save_images, - allow_undeclared_attrs=True, - ) + dataset.export(temp_dir, 'sly_pointcloud', save_images=save_images, allow_undeclared_attrs=True) make_zip_archive(temp_dir, dst_file) -@importer( - name="Sly Point Cloud Format", - ext="ZIP", - version="1.0", - dimension=DimensionType.DIM_3D, -) +@importer(name='Sly Point Cloud Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): if zipfile.is_zipfile(src_file): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset( - temp_dir, - format_name="sly_pointcloud", - importer=dm_env.importers.get("sly_pointcloud"), - ) - dataset = Dataset.import_from(temp_dir, "sly_pointcloud", env=dm_env) + detect_dataset(temp_dir, format_name='sly_pointcloud', importer=dm_env.importers.get('sly_pointcloud')) + dataset = Dataset.import_from(temp_dir, 'sly_pointcloud', env=dm_env) else: - dataset = Dataset.import_from(src_file.name, "sly_pointcloud", env=dm_env) + dataset = Dataset.import_from(src_file.name, 'sly_pointcloud', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/registry.py b/cvat/apps/dataset_manager/formats/registry.py index 5c0a7550f736..99121290de46 100644 --- a/cvat/apps/dataset_manager/formats/registry.py +++ b/cvat/apps/dataset_manager/formats/registry.py @@ -8,45 +8,28 @@ dm_env = Environment() - class _Format: - NAME = "" - EXT = "" - VERSION = "" - DISPLAY_NAME = "{NAME} {VERSION}" + NAME = '' + EXT = '' + VERSION = '' + DISPLAY_NAME = '{NAME} {VERSION}' ENABLED = True - class Exporter(_Format): def __call__(self, dst_file, temp_dir, instance_data, **options): raise NotImplementedError() - class Importer(_Format): - def __call__( - self, src_file, temp_dir, instance_data, load_data_callback=None, **options - ): + def __call__(self, src_file, temp_dir, instance_data, load_data_callback=None, **options): raise NotImplementedError() - -def _wrap_format( - f_or_cls, - klass, - name, - version, - ext, - display_name, - enabled, - dimension=DimensionType.DIM_2D, -): +def _wrap_format(f_or_cls, klass, name, version, ext, display_name, enabled, dimension=DimensionType.DIM_2D): import inspect - assert inspect.isclass(f_or_cls) or inspect.isfunction(f_or_cls) if inspect.isclass(f_or_cls): - assert hasattr(f_or_cls, "__call__") + assert hasattr(f_or_cls, '__call__') target = f_or_cls elif inspect.isfunction(f_or_cls): - class wrapper(klass): # pylint: disable=arguments-differ def __call__(self, *args, **kwargs): @@ -60,8 +43,7 @@ def __call__(self, *args, **kwargs): target.VERSION = version or klass.VERSION target.EXT = ext or klass.EXT target.DISPLAY_NAME = (display_name or klass.DISPLAY_NAME).format( - NAME=name, VERSION=version, EXT=ext - ) + NAME=name, VERSION=version, EXT=ext) assert all([target.NAME, target.VERSION, target.EXT, target.DISPLAY_NAME]) target.DIMENSION = dimension target.ENABLED = enabled @@ -82,59 +64,33 @@ def format_for(export_format, mode): return format_name -def exporter( - name, version, ext, display_name=None, enabled=True, dimension=DimensionType.DIM_2D -): +def exporter(name, version, ext, display_name=None, enabled=True, dimension=DimensionType.DIM_2D): assert name not in EXPORT_FORMATS, "Export format '%s' already registered" % name - def wrap_with_params(f_or_cls): - t = _wrap_format( - f_or_cls, - Exporter, - name=name, - ext=ext, - version=version, - display_name=display_name, - enabled=enabled, - dimension=dimension, - ) + t = _wrap_format(f_or_cls, Exporter, + name=name, ext=ext, version=version, display_name=display_name, + enabled=enabled, dimension=dimension) key = t.DISPLAY_NAME assert key not in EXPORT_FORMATS, "Export format '%s' already registered" % name EXPORT_FORMATS[key] = t return t - return wrap_with_params - IMPORT_FORMATS = {} - - -def importer( - name, version, ext, display_name=None, enabled=True, dimension=DimensionType.DIM_2D -): +def importer(name, version, ext, display_name=None, enabled=True, dimension=DimensionType.DIM_2D): def wrap_with_params(f_or_cls): - t = _wrap_format( - f_or_cls, - Importer, - name=name, - ext=ext, - version=version, - display_name=display_name, - enabled=enabled, - dimension=dimension, - ) + t = _wrap_format(f_or_cls, Importer, + name=name, ext=ext, version=version, display_name=display_name, + enabled=enabled, dimension=dimension) key = t.DISPLAY_NAME assert key not in IMPORT_FORMATS, "Import format '%s' already registered" % name IMPORT_FORMATS[key] = t return t - return wrap_with_params - def make_importer(name): return IMPORT_FORMATS[name]() - def make_exporter(name): return EXPORT_FORMATS[name]() @@ -148,7 +104,6 @@ def make_exporter(name): import cvat.apps.dataset_manager.formats.mot import cvat.apps.dataset_manager.formats.mots import cvat.apps.dataset_manager.formats.pascal_voc - # import cvat.apps.dataset_manager.formats.yolo import cvat.apps.dataset_manager.formats.imagenet import cvat.apps.dataset_manager.formats.camvid @@ -162,11 +117,9 @@ def make_exporter(name): import cvat.apps.dataset_manager.formats.lfw import cvat.apps.dataset_manager.formats.cityscapes import cvat.apps.dataset_manager.formats.openimages - -# Audino Export Formats +#Audino Export Formats import cvat.apps.dataset_manager.formats.aud_common_voice - -# Audino Import Formats +#Audino Import Formats import cvat.apps.dataset_manager.formats.librispeech import cvat.apps.dataset_manager.formats.common_voice import cvat.apps.dataset_manager.formats.tedlium @@ -174,3 +127,11 @@ def make_exporter(name): import cvat.apps.dataset_manager.formats.VoxCeleb import cvat.apps.dataset_manager.formats.VCTK_Corpus import cvat.apps.dataset_manager.formats.LibriVox + + + + + + + + diff --git a/cvat/apps/dataset_manager/formats/transformations.py b/cvat/apps/dataset_manager/formats/transformations.py index 0b37bcdf7b4e..99d754252378 100644 --- a/cvat/apps/dataset_manager/formats/transformations.py +++ b/cvat/apps/dataset_manager/formats/transformations.py @@ -21,38 +21,22 @@ def _rotate_point(self, p, angle, cx, cy): def transform_item(self, item): annotations = item.annotations[:] - anns = [ - p - for p in annotations - if p.type == dm.AnnotationType.bbox and p.attributes["rotation"] - ] + anns = [p for p in annotations if p.type == dm.AnnotationType.bbox and p.attributes['rotation']] for ann in anns: - rotation = math.radians(ann.attributes["rotation"]) + rotation = math.radians(ann.attributes['rotation']) x0, y0, x1, y1 = ann.points [cx, cy] = [(x0 + (x1 - x0) / 2), (y0 + (y1 - y0) / 2)] - anno_points = list( - chain.from_iterable( - map( - lambda p: self._rotate_point(p, rotation, cx, cy), - [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], - ) - ) - ) + anno_points = list(chain.from_iterable( + map(lambda p: self._rotate_point(p, rotation, cx, cy), [(x0, y0), (x1, y0), (x1, y1), (x0, y1)]) + )) annotations.remove(ann) - annotations.append( - dm.Polygon( - anno_points, - label=ann.label, - attributes=ann.attributes, - group=ann.group, - z_order=ann.z_order, - ) - ) + annotations.append(dm.Polygon(anno_points, + label=ann.label, attributes=ann.attributes, group=ann.group, + z_order=ann.z_order)) return item.wrap(annotations=annotations) - class MaskConverter: @staticmethod def cvat_rle_to_dm_rle(shape, img_h: int, img_w: int) -> dm.RleMask: @@ -77,13 +61,8 @@ def cvat_rle_to_dm_rle(shape, img_h: int, img_w: int) -> dm.RleMask: # obtain RLE coco_rle = mask_utils.encode(np.asfortranarray(full_mask)) - return dm.RleMask( - rle=coco_rle, - label=shape.label, - z_order=shape.z_order, - attributes=shape.attributes, - group=shape.group, - ) + return dm.RleMask(rle=coco_rle, label=shape.label, z_order=shape.z_order, + attributes=shape.attributes, group=shape.group) @classmethod def dm_mask_to_cvat_rle(cls, dm_mask: dm.Mask) -> list[int]: @@ -121,7 +100,6 @@ def rle(cls, arr: np.ndarray) -> list[int]: return cvat_rle - class EllipsesToMasks: @staticmethod def convert_ellipse(ellipse, img_h, img_w): @@ -134,14 +112,8 @@ def convert_ellipse(ellipse, img_h, img_w): mat = np.zeros((img_h, img_w), dtype=np.uint8) cv2.ellipse(mat, center, axis, angle, 0, 360, 255, thickness=-1) rle = mask_utils.encode(np.asfortranarray(mat)) - return dm.RleMask( - rle=rle, - label=ellipse.label, - z_order=ellipse.z_order, - attributes=ellipse.attributes, - group=ellipse.group, - ) - + return dm.RleMask(rle=rle, label=ellipse.label, z_order=ellipse.z_order, + attributes=ellipse.attributes, group=ellipse.group) class MaskToPolygonTransformation: """ @@ -151,10 +123,10 @@ class MaskToPolygonTransformation: @classmethod def declare_arg_names(cls): - return ["conv_mask_to_poly"] + return ['conv_mask_to_poly'] @classmethod def convert_dataset(cls, dataset, **kwargs): - if kwargs.get("conv_mask_to_poly", True): - dataset.transform("masks_to_polygons") + if kwargs.get('conv_mask_to_poly', True): + dataset.transform('masks_to_polygons') return dataset diff --git a/cvat/apps/dataset_manager/formats/utils.py b/cvat/apps/dataset_manager/formats/utils.py index 6392a90a13b2..7811fbbfc902 100644 --- a/cvat/apps/dataset_manager/formats/utils.py +++ b/cvat/apps/dataset_manager/formats/utils.py @@ -9,7 +9,6 @@ from datumaro.util.os_util import make_file_name - def get_color_from_index(index): def get_bit(number, index): return (number >> index) & 1 @@ -23,10 +22,7 @@ def get_bit(number, index): return tuple(color) - -DEFAULT_COLORMAP_PATH = osp.join(osp.dirname(__file__), "predefined_colors.txt") - - +DEFAULT_COLORMAP_PATH = osp.join(osp.dirname(__file__), 'predefined_colors.txt') def parse_default_colors(file_path=None): if file_path is None: file_path = DEFAULT_COLORMAP_PATH @@ -35,72 +31,58 @@ def parse_default_colors(file_path=None): with open(file_path) as f: for line in f: line = line.strip() - if not line or line[0] == "#": + if not line or line[0] == '#': continue - _, label, color = line.split(":") - colors[label] = tuple(map(int, color.split(","))) + _, label, color = line.split(':') + colors[label] = tuple(map(int, color.split(','))) return colors - def normalize_label(label): - label = make_file_name(label) # basically, convert to ASCII lowercase - label = label.replace("-", "_") + label = make_file_name(label) # basically, convert to ASCII lowercase + label = label.replace('-', '_') return label - def rgb2hex(color): - return "#{0:02x}{1:02x}{2:02x}".format(*color) - + return '#{0:02x}{1:02x}{2:02x}'.format(*color) def hex2rgb(color): - return tuple(int(color.lstrip("#")[i : i + 2], 16) for i in (0, 2, 4)) - + return tuple(int(color.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)) def make_colormap(instance_data): - labels = [ - label for _, label in instance_data.meta[instance_data.META_FIELD]["labels"] - ] - label_names = [label["name"] for label in labels] - - if "background" not in label_names: - labels.insert( - 0, - { - "name": "background", - "color": "#000000", - }, + labels = [label for _, label in instance_data.meta[instance_data.META_FIELD]['labels']] + label_names = [label['name'] for label in labels] + + if 'background' not in label_names: + labels.insert(0, { + 'name': 'background', + 'color': '#000000', + } ) - return {label["name"]: [hex2rgb(label["color"]), [], []] for label in labels} - + return {label['name']: [hex2rgb(label['color']), [], []] for label in labels} def generate_color(color, used_colors): def tint_shade_color(): for added_color in (255, 0): for factor in range(1, 10): - yield tuple( - map(lambda c: int(c + (added_color - c) * factor / 10), color) - ) + yield tuple(map(lambda c: int(c + (added_color - c) * factor / 10), color)) def get_unused_color(): def get_avg_color(index): sorted_colors = sorted(used_colors, key=operator.itemgetter(index)) - max_dist_pair = max( - zip(sorted_colors, sorted_colors[1:]), - key=lambda c_pair: c_pair[1][index] - c_pair[0][index], - ) + max_dist_pair = max(zip(sorted_colors, sorted_colors[1:]), + key=lambda c_pair: c_pair[1][index] - c_pair[0][index]) return (max_dist_pair[0][index] + max_dist_pair[1][index]) // 2 return tuple(get_avg_color(i) for i in range(3)) - # try to tint and shade color firstly + #try to tint and shade color firstly for new_color in tint_shade_color(): if new_color not in used_colors: return new_color return get_unused_color() - def get_label_color(label_name, label_colors): predefined = parse_default_colors() label_colors = tuple(hex2rgb(c) for c in label_colors if c) @@ -109,9 +91,7 @@ def get_label_color(label_name, label_colors): color = predefined.get(normalized_name, None) if color is None: - name_hash = int.from_bytes( - blake2s(normalized_name.encode(), digest_size=3).digest(), byteorder="big" - ) + name_hash = int.from_bytes(blake2s(normalized_name.encode(), digest_size=3).digest(), byteorder="big") color = get_color_from_index(name_hash) if color in label_colors: diff --git a/cvat/apps/dataset_manager/formats/velodynepoint.py b/cvat/apps/dataset_manager/formats/velodynepoint.py index e373b5de8697..9912d0b1d67b 100644 --- a/cvat/apps/dataset_manager/formats/velodynepoint.py +++ b/cvat/apps/dataset_manager/formats/velodynepoint.py @@ -8,11 +8,8 @@ from datumaro.components.dataset import Dataset from datumaro.components.extractor import ItemTransform -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, detect_dataset, \ + import_dm_annotations from .registry import dm_env from cvat.apps.dataset_manager.util import make_zip_archive @@ -20,47 +17,35 @@ from .registry import exporter, importer - class RemoveTrackingInformation(ItemTransform): def transform_item(self, item): annotations = list(item.annotations) for anno in annotations: - if hasattr(anno, "attributes") and "track_id" in anno.attributes: - del anno.attributes["track_id"] + if hasattr(anno, 'attributes') and 'track_id' in anno.attributes: + del anno.attributes['track_id'] return item.wrap(annotations=annotations) - -@exporter( - name="Kitti Raw Format", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D -) +@exporter(name='Kitti Raw Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) def _export_images(dst_file, temp_dir, task_data, save_images=False): with GetCVATDataExtractor( - task_data, - include_images=save_images, - format_type="kitti_raw", + task_data, include_images=save_images, format_type="kitti_raw", dimension=DimensionType.DIM_3D, ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) dataset.transform(RemoveTrackingInformation) - dataset.export(temp_dir, "kitti_raw", save_images=save_images, reindex=True) + dataset.export(temp_dir, 'kitti_raw', save_images=save_images, reindex=True) make_zip_archive(temp_dir, dst_file) -@importer( - name="Kitti Raw Format", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D -) +@importer(name='Kitti Raw Format', ext='ZIP', version='1.0', dimension=DimensionType.DIM_3D) def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): if zipfile.is_zipfile(src_file): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset( - temp_dir, - format_name="kitti_raw", - importer=dm_env.importers.get("kitti_raw"), - ) - dataset = Dataset.import_from(temp_dir, "kitti_raw", env=dm_env) + detect_dataset(temp_dir, format_name='kitti_raw', importer=dm_env.importers.get('kitti_raw')) + dataset = Dataset.import_from(temp_dir, 'kitti_raw', env=dm_env) else: - dataset = Dataset.import_from(src_file.name, "kitti_raw", env=dm_env) + dataset = Dataset.import_from(src_file.name, 'kitti_raw', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/vggface2.py b/cvat/apps/dataset_manager/formats/vggface2.py index 623076cbb10e..642171f0f8d9 100644 --- a/cvat/apps/dataset_manager/formats/vggface2.py +++ b/cvat/apps/dataset_manager/formats/vggface2.py @@ -7,36 +7,29 @@ from datumaro.components.dataset import Dataset -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - TaskData, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, TaskData, detect_dataset, \ + import_dm_annotations from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name="VGGFace2", ext="ZIP", version="1.0") +@exporter(name='VGGFace2', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, "vgg_face2", save_images=save_images) + dataset.export(temp_dir, 'vgg_face2', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="VGGFace2", ext="ZIP", version="1.0") +@importer(name='VGGFace2', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset( - temp_dir, format_name="vgg_face2", importer=dm_env.importers.get("vgg_face2") - ) - dataset = Dataset.import_from(temp_dir, "vgg_face2", env=dm_env) + detect_dataset(temp_dir, format_name='vgg_face2', importer=dm_env.importers.get('vgg_face2')) + dataset = Dataset.import_from(temp_dir, 'vgg_face2', env=dm_env) if isinstance(instance_data, TaskData): - dataset.transform("rename", regex=r"|([^/]+/)?(.+)|\2|") + dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|") if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/widerface.py b/cvat/apps/dataset_manager/formats/widerface.py index 1aeb25353145..12a9bf0d21e5 100644 --- a/cvat/apps/dataset_manager/formats/widerface.py +++ b/cvat/apps/dataset_manager/formats/widerface.py @@ -7,33 +7,27 @@ from datumaro.components.dataset import Dataset -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, detect_dataset, \ + import_dm_annotations from cvat.apps.dataset_manager.util import make_zip_archive from .registry import dm_env, exporter, importer -@exporter(name="WiderFace", ext="ZIP", version="1.0") +@exporter(name='WiderFace', ext='ZIP', version='1.0') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, "wider_face", save_images=save_images) + dataset.export(temp_dir, 'wider_face', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="WiderFace", ext="ZIP", version="1.0") +@importer(name='WiderFace', ext='ZIP', version='1.0') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): zipfile.ZipFile(src_file).extractall(temp_dir) - detect_dataset( - temp_dir, format_name="wider_face", importer=dm_env.importers.get("wider_face") - ) - dataset = Dataset.import_from(temp_dir, "wider_face", env=dm_env) + detect_dataset(temp_dir, format_name='wider_face', importer=dm_env.importers.get('wider_face')) + dataset = Dataset.import_from(temp_dir, 'wider_face', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/formats/yolo.py b/cvat/apps/dataset_manager/formats/yolo.py index 14a197d5a1d0..9f0e46558117 100644 --- a/cvat/apps/dataset_manager/formats/yolo.py +++ b/cvat/apps/dataset_manager/formats/yolo.py @@ -8,13 +8,8 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, - match_dm_item, - find_dataset_root, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + import_dm_annotations, match_dm_item, find_dataset_root) from cvat.apps.dataset_manager.util import make_zip_archive from datumaro.components.extractor import DatasetItem from datumaro.components.project import Dataset @@ -23,41 +18,37 @@ from .registry import dm_env, exporter, importer -@exporter(name="YOLO", ext="ZIP", version="1.1") +@exporter(name='YOLO', ext='ZIP', version='1.1') def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor(instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) - dataset.export(temp_dir, "yolo", save_images=save_images) + dataset.export(temp_dir, 'yolo', save_images=save_images) make_zip_archive(temp_dir, dst_file) - -@importer(name="YOLO", ext="ZIP", version="1.1") +@importer(name='YOLO', ext='ZIP', version='1.1') def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) image_info = {} - frames = [ - YoloExtractor.name_from_path(osp.relpath(p, temp_dir)) - for p in glob(osp.join(temp_dir, "**", "*.txt"), recursive=True) - ] + frames = [YoloExtractor.name_from_path(osp.relpath(p, temp_dir)) + for p in glob(osp.join(temp_dir, '**', '*.txt'), recursive=True)] root_hint = find_dataset_root( - [DatasetItem(id=frame) for frame in frames], instance_data - ) + [DatasetItem(id=frame) for frame in frames], instance_data) for frame in frames: frame_info = None try: - frame_id = match_dm_item( - DatasetItem(id=frame), instance_data, root_hint=root_hint - ) + frame_id = match_dm_item(DatasetItem(id=frame), instance_data, + root_hint=root_hint) frame_info = instance_data.frame_info[frame_id] - except Exception: # nosec + except Exception: # nosec pass if frame_info is not None: - image_info[frame] = (frame_info["height"], frame_info["width"]) + image_info[frame] = (frame_info['height'], frame_info['width']) - detect_dataset(temp_dir, format_name="yolo", importer=dm_env.importers.get("yolo")) - dataset = Dataset.import_from(temp_dir, "yolo", env=dm_env, image_info=image_info) + detect_dataset(temp_dir, format_name='yolo', importer=dm_env.importers.get('yolo')) + dataset = Dataset.import_from(temp_dir, 'yolo', + env=dm_env, image_info=image_info) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) From cc02cd397ed79e7b68b09aa4d797a6951cf84181 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Tue, 1 Oct 2024 18:48:32 +0530 Subject: [PATCH 12/14] formatting changes --- cvat/apps/dataset_manager/formats/datumaro.py | 36 +- cvat/apps/dataset_manager/task.py | 612 +++++++----------- 2 files changed, 228 insertions(+), 420 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/datumaro.py b/cvat/apps/dataset_manager/formats/datumaro.py index b3cede391d36..090397b7a471 100644 --- a/cvat/apps/dataset_manager/formats/datumaro.py +++ b/cvat/apps/dataset_manager/formats/datumaro.py @@ -9,76 +9,62 @@ from pyunpack import Archive -from cvat.apps.dataset_manager.bindings import ( - GetCVATDataExtractor, - detect_dataset, - import_dm_annotations, -) +from cvat.apps.dataset_manager.bindings import (GetCVATDataExtractor, detect_dataset, + import_dm_annotations) from cvat.apps.dataset_manager.util import make_zip_archive from cvat.apps.engine.models import DimensionType from .registry import dm_env, exporter, importer - class DeleteImagePath(ItemTransform): def transform_item(self, item): image = None if item.has_image and item.image.has_data: image = Image(data=item.image.data, size=item.image.size) - return item.wrap(image=image, point_cloud="", related_images=[]) + return item.wrap(image=image, point_cloud='', related_images=[]) @exporter(name="Datumaro", ext="ZIP", version="1.0") def _export(dst_file, temp_dir, instance_data, save_images=False): - with GetCVATDataExtractor( - instance_data=instance_data, include_images=save_images - ) as extractor: + with GetCVATDataExtractor(instance_data=instance_data, include_images=save_images) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) if not save_images: dataset.transform(DeleteImagePath) - dataset.export(temp_dir, "datumaro", save_images=save_images) + dataset.export(temp_dir, 'datumaro', save_images=save_images) make_zip_archive(temp_dir, dst_file) - @importer(name="Datumaro", ext="ZIP", version="1.0") def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset( - temp_dir, format_name="datumaro", importer=dm_env.importers.get("datumaro") - ) - dataset = Dataset.import_from(temp_dir, "datumaro", env=dm_env) + detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro')) + dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) import_dm_annotations(dataset, instance_data) - @exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) def _export(dst_file, temp_dir, instance_data, save_images=False): with GetCVATDataExtractor( - instance_data=instance_data, - include_images=save_images, + instance_data=instance_data, include_images=save_images, dimension=DimensionType.DIM_3D, ) as extractor: dataset = Dataset.from_extractors(extractor, env=dm_env) if not save_images: dataset.transform(DeleteImagePath) - dataset.export(temp_dir, "datumaro", save_images=save_images) + dataset.export(temp_dir, 'datumaro', save_images=save_images) make_zip_archive(temp_dir, dst_file) - @importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D) def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): Archive(src_file.name).extractall(temp_dir) - detect_dataset( - temp_dir, format_name="datumaro", importer=dm_env.importers.get("datumaro") - ) - dataset = Dataset.import_from(temp_dir, "datumaro", env=dm_env) + detect_dataset(temp_dir, format_name='datumaro', importer=dm_env.importers.get('datumaro')) + dataset = Dataset.import_from(temp_dir, 'datumaro', env=dm_env) if load_data_callback is not None: load_data_callback(dataset, instance_data) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index d27223f3334b..eb7e90bf3b14 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -18,11 +18,7 @@ from copy import deepcopy from enum import Enum from tempfile import TemporaryDirectory -from datumaro.components.errors import ( - DatasetError, - DatasetImportError, - DatasetNotFoundError, -) +from datumaro.components.errors import DatasetError, DatasetImportError, DatasetNotFoundError from django.db import transaction from django.db.models.query import Prefetch @@ -39,29 +35,21 @@ from cvat.apps.profiler import silk_profile from cvat.apps.dataset_manager.annotation import AnnotationIR, AnnotationManager -from cvat.apps.dataset_manager.bindings import ( - TaskData, - JobData, - CvatImportError, - CvatDatasetNotFoundError, -) +from cvat.apps.dataset_manager.bindings import TaskData, JobData, CvatImportError, CvatDatasetNotFoundError from cvat.apps.dataset_manager.formats.registry import make_exporter, make_importer from cvat.apps.dataset_manager.util import add_prefetch_fields, bulk_create, get_cached dlogger = DatasetLogManager() slogger = ServerLogManager(__name__) - class dotdict(OrderedDict): """dot.notation access to dictionary attributes""" - __getattr__ = OrderedDict.get __setattr__ = OrderedDict.__setitem__ __delattr__ = OrderedDict.__delitem__ __eq__ = lambda self, other: self.id == other.id __hash__ = lambda self: self.id - class PatchAction(str, Enum): CREATE = "create" UPDATE = "update" @@ -74,7 +62,6 @@ def values(cls): def __str__(self): return self.value - def merge_table_rows(rows, keys_for_merge, field_id): # It is necessary to keep a stable order of original rows # (e.g. for tracked boxes). Otherwise prev_box.frame can be bigger @@ -91,7 +78,7 @@ def merge_table_rows(rows, keys_for_merge, field_id): merged_rows[row_id][key] = [] for key in keys_for_merge: - item = dotdict({v.split("__", 1)[-1]: row[v] for v in keys_for_merge[key]}) + item = dotdict({v.split('__', 1)[-1]:row[v] for v in keys_for_merge[key]}) if item.id is not None: merged_rows[row_id][key].append(item) @@ -103,39 +90,35 @@ def merge_table_rows(rows, keys_for_merge, field_id): return list(merged_rows.values()) - class JobAnnotation: @classmethod def add_prefetch_info(cls, queryset): assert issubclass(queryset.model, models.Job) - label_qs = add_prefetch_fields( - models.Label.objects.all(), - [ - "skeleton", - "parent", - "attributespec_set", - ], - ) + label_qs = add_prefetch_fields(models.Label.objects.all(), [ + 'skeleton', + 'parent', + 'attributespec_set', + ]) label_qs = JobData.add_prefetch_info(label_qs) return queryset.select_related( - "segment", - "segment__task", + 'segment', + 'segment__task', ).prefetch_related( - "segment__task__project", - "segment__task__owner", - "segment__task__assignee", - "segment__task__project__owner", - "segment__task__project__assignee", - Prefetch( - "segment__task__data", - queryset=models.Data.objects.select_related("video").prefetch_related( - Prefetch("images", queryset=models.Image.objects.order_by("frame")) - ), - ), - Prefetch("segment__task__label_set", queryset=label_qs), - Prefetch("segment__task__project__label_set", queryset=label_qs), + 'segment__task__project', + 'segment__task__owner', + 'segment__task__assignee', + 'segment__task__project__owner', + 'segment__task__project__assignee', + + Prefetch('segment__task__data', + queryset=models.Data.objects.select_related('video').prefetch_related( + Prefetch('images', queryset=models.Image.objects.order_by('frame')) + )), + + Prefetch('segment__task__label_set', queryset=label_qs), + Prefetch('segment__task__project__label_set', queryset=label_qs), ) def __init__(self, pk, *, is_prefetched=False, queryset=None): @@ -143,9 +126,9 @@ def __init__(self, pk, *, is_prefetched=False, queryset=None): queryset = self.add_prefetch_info(models.Job.objects) if is_prefetched: - self.db_job: models.Job = ( - queryset.select_related("segment__task").select_for_update().get(id=pk) - ) + self.db_job: models.Job = queryset.select_related( + 'segment__task' + ).select_for_update().get(id=pk) else: self.db_job: models.Job = get_cached(queryset, pk=int(pk)) @@ -154,14 +137,9 @@ def __init__(self, pk, *, is_prefetched=False, queryset=None): self.stop_frame = db_segment.stop_frame self.ir_data = AnnotationIR(db_segment.task.dimension) - self.db_labels = { - db_label.id: db_label - for db_label in ( - db_segment.task.project.label_set.all() - if db_segment.task.project_id - else db_segment.task.label_set.all() - ) - } + self.db_labels = {db_label.id:db_label + for db_label in (db_segment.task.project.label_set.all() + if db_segment.task.project_id else db_segment.task.label_set.all())} self.db_attributes = {} for db_label in self.db_labels.values(): @@ -171,20 +149,14 @@ def __init__(self, pk, *, is_prefetched=False, queryset=None): "all": OrderedDict(), } for db_attr in db_label.attributespec_set.all(): - default_value = dotdict( - [ - ("spec_id", db_attr.id), - ("value", db_attr.default_value), - ] - ) + default_value = dotdict([ + ('spec_id', db_attr.id), + ('value', db_attr.default_value), + ]) if db_attr.mutable: - self.db_attributes[db_label.id]["mutable"][ - db_attr.id - ] = default_value + self.db_attributes[db_label.id]["mutable"][db_attr.id] = default_value else: - self.db_attributes[db_label.id]["immutable"][ - db_attr.id - ] = default_value + self.db_attributes[db_label.id]["immutable"][db_attr.id] = default_value self.db_attributes[db_label.id]["all"][db_attr.id] = default_value @@ -267,20 +239,14 @@ def create_tracks(tracks, parent_track=None): track_attributes = track.pop("attributes", []) shapes = track.pop("shapes") elements = track.pop("elements", []) - db_track = models.LabeledTrack( - job=self.db_job, parent=parent_track, **track - ) + db_track = models.LabeledTrack(job=self.db_job, parent=parent_track, **track) self._validate_label_for_existence(db_track.label_id) for attr in track_attributes: - db_attr_val = models.LabeledTrackAttributeVal( - **attr, track_id=len(db_tracks) - ) + db_attr_val = models.LabeledTrackAttributeVal(**attr, track_id=len(db_tracks)) - self._validate_attribute_for_existence( - db_attr_val, db_track.label_id, "immutable" - ) + self._validate_attribute_for_existence(db_attr_val, db_track.label_id, "immutable") db_track_attr_vals.append(db_attr_val) @@ -289,13 +255,9 @@ def create_tracks(tracks, parent_track=None): db_shape = models.TrackedShape(**shape, track_id=len(db_tracks)) for attr in shape_attributes: - db_attr_val = models.TrackedShapeAttributeVal( - **attr, shape_id=len(db_shapes) - ) + db_attr_val = models.TrackedShapeAttributeVal(**attr, shape_id=len(db_shapes)) - self._validate_attribute_for_existence( - db_attr_val, db_track.label_id, "mutable" - ) + self._validate_attribute_for_existence(db_attr_val, db_track.label_id, "mutable") db_shape_attr_vals.append(db_attr_val) @@ -312,7 +274,7 @@ def create_tracks(tracks, parent_track=None): db_tracks = bulk_create( db_model=models.LabeledTrack, objects=db_tracks, - flt_param={"job_id": self.db_job.id}, + flt_param={"job_id": self.db_job.id} ) for db_attr_val in db_track_attr_vals: @@ -321,7 +283,7 @@ def create_tracks(tracks, parent_track=None): bulk_create( db_model=models.LabeledTrackAttributeVal, objects=db_track_attr_vals, - flt_param={}, + flt_param={} ) for db_shape in db_shapes: @@ -330,7 +292,7 @@ def create_tracks(tracks, parent_track=None): db_shapes = bulk_create( db_model=models.TrackedShape, objects=db_shapes, - flt_param={"track__job_id": self.db_job.id}, + flt_param={"track__job_id": self.db_job.id} ) for db_attr_val in db_shape_attr_vals: @@ -339,7 +301,7 @@ def create_tracks(tracks, parent_track=None): bulk_create( db_model=models.TrackedShapeAttributeVal, objects=db_shape_attr_vals, - flt_param={}, + flt_param={} ) shape_idx = 0 @@ -364,20 +326,14 @@ def create_shapes(shapes, parent_shape=None): shape_elements = shape.pop("elements", []) # FIXME: need to clamp points (be sure that all of them inside the image) # Should we check here or implement a validator? - db_shape = models.LabeledShape( - job=self.db_job, parent=parent_shape, **shape - ) + db_shape = models.LabeledShape(job=self.db_job, parent=parent_shape, **shape) self._validate_label_for_existence(db_shape.label_id) for attr in attributes: - db_attr_val = models.LabeledShapeAttributeVal( - **attr, shape_id=len(db_shapes) - ) + db_attr_val = models.LabeledShapeAttributeVal(**attr, shape_id=len(db_shapes)) - self._validate_attribute_for_existence( - db_attr_val, db_shape.label_id, "all" - ) + self._validate_attribute_for_existence(db_attr_val, db_shape.label_id, "all") db_attr_vals.append(db_attr_val) @@ -389,7 +345,7 @@ def create_shapes(shapes, parent_shape=None): db_shapes = bulk_create( db_model=models.LabeledShape, objects=db_shapes, - flt_param={"job_id": self.db_job.id}, + flt_param={"job_id": self.db_job.id} ) for db_attr_val in db_attr_vals: @@ -398,7 +354,7 @@ def create_shapes(shapes, parent_shape=None): bulk_create( db_model=models.LabeledShapeAttributeVal, objects=db_attr_vals, - flt_param={}, + flt_param={} ) for shape, db_shape in zip(shapes, db_shapes): @@ -422,9 +378,7 @@ def _save_tags_to_db(self, tags): for attr in attributes: db_attr_val = models.LabeledImageAttributeVal(**attr) - self._validate_attribute_for_existence( - db_attr_val, db_tag.label_id, "all" - ) + self._validate_attribute_for_existence(db_attr_val, db_tag.label_id, "all") db_attr_val.tag_id = len(db_tags) db_attr_vals.append(db_attr_val) @@ -435,14 +389,16 @@ def _save_tags_to_db(self, tags): db_tags = bulk_create( db_model=models.LabeledImage, objects=db_tags, - flt_param={"job_id": self.db_job.id}, + flt_param={"job_id": self.db_job.id} ) for db_attr_val in db_attr_vals: db_attr_val.image_id = db_tags[db_attr_val.tag_id].id bulk_create( - db_model=models.LabeledImageAttributeVal, objects=db_attr_vals, flt_param={} + db_model=models.LabeledImageAttributeVal, + objects=db_attr_vals, + flt_param={} ) for tag, db_tag in zip(tags, db_tags): @@ -517,9 +473,9 @@ def _delete(self, data=None): # It is not important for us that data had some "invalid" objects # which were skipped (not actually deleted). The main idea is to # say that all requested objects are absent in DB after the method. - self.ir_data.tags = data["tags"] - self.ir_data.shapes = data["shapes"] - self.ir_data.tracks = data["tracks"] + self.ir_data.tags = data['tags'] + self.ir_data.shapes = data['shapes'] + self.ir_data.tracks = data['tracks'] labeledimage_set.delete() labeledshape_set.delete() @@ -545,50 +501,40 @@ def _extend_attributes(attributeval_set, default_attribute_values): shape_attribute_specs_set = set(attr.spec_id for attr in attributeval_set) for db_attr in default_attribute_values: if db_attr.spec_id not in shape_attribute_specs_set: - attributeval_set.append( - dotdict( - [ - ("spec_id", db_attr.spec_id), - ("value", db_attr.value), - ] - ) - ) + attributeval_set.append(dotdict([ + ('spec_id', db_attr.spec_id), + ('value', db_attr.value), + ])) def _init_tags_from_db(self): # NOTE: do not use .prefetch_related() with .values() since it's useless: # https://github.com/cvat-ai/cvat/pull/7748#issuecomment-2063695007 - db_tags = ( - self.db_job.labeledimage_set.values( - "id", - "frame", - "label_id", - "group", - "source", - "labeledimageattributeval__spec_id", - "labeledimageattributeval__value", - "labeledimageattributeval__id", - ) - .order_by("frame") - .iterator(chunk_size=2000) - ) + db_tags = self.db_job.labeledimage_set.values( + 'id', + 'frame', + 'label_id', + 'group', + 'source', + 'labeledimageattributeval__spec_id', + 'labeledimageattributeval__value', + 'labeledimageattributeval__id', + ).order_by('frame').iterator(chunk_size=2000) db_tags = merge_table_rows( rows=db_tags, keys_for_merge={ "labeledimageattributeval_set": [ - "labeledimageattributeval__spec_id", - "labeledimageattributeval__value", - "labeledimageattributeval__id", + 'labeledimageattributeval__spec_id', + 'labeledimageattributeval__value', + 'labeledimageattributeval__id', ], }, - field_id="id", + field_id='id', ) for db_tag in db_tags: - self._extend_attributes( - db_tag.labeledimageattributeval_set, - self.db_attributes[db_tag.label_id]["all"].values(), - ) + self._extend_attributes(db_tag.labeledimageattributeval_set, + self.db_attributes[db_tag.label_id]["all"].values()) serializer = serializers.LabeledImageSerializerFromDB(db_tags, many=True) self.ir_data.tags = serializer.data @@ -596,53 +542,47 @@ def _init_tags_from_db(self): def _init_shapes_from_db(self): # NOTE: do not use .prefetch_related() with .values() since it's useless: # https://github.com/cvat-ai/cvat/pull/7748#issuecomment-2063695007 - db_shapes = ( - self.db_job.labeledshape_set.values( - "id", - "label_id", - "type", - "frame", - "group", - "source", - "occluded", - "outside", - "z_order", - "rotation", - "points", - "parent", - "transcript", - "gender", - "age", - "locale", - "accent", - "emotion", - "labeledshapeattributeval__spec_id", - "labeledshapeattributeval__value", - "labeledshapeattributeval__id", - ) - .order_by("frame") - .iterator(chunk_size=2000) - ) + db_shapes = self.db_job.labeledshape_set.values( + 'id', + 'label_id', + 'type', + 'frame', + 'group', + 'source', + 'occluded', + 'outside', + 'z_order', + 'rotation', + 'points', + 'parent', + 'transcript', + 'gender', + 'age', + 'locale', + 'accent', + 'emotion', + 'labeledshapeattributeval__spec_id', + 'labeledshapeattributeval__value', + 'labeledshapeattributeval__id', + ).order_by('frame').iterator(chunk_size=2000) db_shapes = merge_table_rows( rows=db_shapes, keys_for_merge={ - "labeledshapeattributeval_set": [ - "labeledshapeattributeval__spec_id", - "labeledshapeattributeval__value", - "labeledshapeattributeval__id", + 'labeledshapeattributeval_set': [ + 'labeledshapeattributeval__spec_id', + 'labeledshapeattributeval__value', + 'labeledshapeattributeval__id', ], }, - field_id="id", + field_id='id', ) shapes = {} elements = {} for db_shape in db_shapes: - self._extend_attributes( - db_shape.labeledshapeattributeval_set, - self.db_attributes[db_shape.label_id]["all"].values(), - ) + self._extend_attributes(db_shape.labeledshapeattributeval_set, + self.db_attributes[db_shape.label_id]["all"].values()) if db_shape.parent is None: db_shape.elements = [] @@ -655,40 +595,34 @@ def _init_shapes_from_db(self): for shape_id, shape_elements in elements.items(): shapes[shape_id].elements = shape_elements - serializer = serializers.LabeledShapeSerializerFromDB( - list(shapes.values()), many=True - ) + serializer = serializers.LabeledShapeSerializerFromDB(list(shapes.values()), many=True) self.ir_data.shapes = serializer.data def _init_tracks_from_db(self): # NOTE: do not use .prefetch_related() with .values() since it's useless: # https://github.com/cvat-ai/cvat/pull/7748#issuecomment-2063695007 - db_tracks = ( - self.db_job.labeledtrack_set.values( - "id", - "frame", - "label_id", - "group", - "source", - "parent", - "labeledtrackattributeval__spec_id", - "labeledtrackattributeval__value", - "labeledtrackattributeval__id", - "trackedshape__type", - "trackedshape__occluded", - "trackedshape__z_order", - "trackedshape__rotation", - "trackedshape__points", - "trackedshape__id", - "trackedshape__frame", - "trackedshape__outside", - "trackedshape__trackedshapeattributeval__spec_id", - "trackedshape__trackedshapeattributeval__value", - "trackedshape__trackedshapeattributeval__id", - ) - .order_by("id", "trackedshape__frame") - .iterator(chunk_size=2000) - ) + db_tracks = self.db_job.labeledtrack_set.values( + "id", + "frame", + "label_id", + "group", + "source", + "parent", + "labeledtrackattributeval__spec_id", + "labeledtrackattributeval__value", + "labeledtrackattributeval__id", + "trackedshape__type", + "trackedshape__occluded", + "trackedshape__z_order", + "trackedshape__rotation", + "trackedshape__points", + "trackedshape__id", + "trackedshape__frame", + "trackedshape__outside", + "trackedshape__trackedshapeattributeval__spec_id", + "trackedshape__trackedshapeattributeval__value", + "trackedshape__trackedshapeattributeval__id", + ).order_by('id', 'trackedshape__frame').iterator(chunk_size=2000) db_tracks = merge_table_rows( rows=db_tracks, @@ -698,7 +632,7 @@ def _init_tracks_from_db(self): "labeledtrackattributeval__value", "labeledtrackattributeval__id", ], - "trackedshape_set": [ + "trackedshape_set":[ "trackedshape__type", "trackedshape__occluded", "trackedshape__z_order", @@ -718,40 +652,28 @@ def _init_tracks_from_db(self): tracks = {} elements = {} for db_track in db_tracks: - db_track["trackedshape_set"] = merge_table_rows( - db_track["trackedshape_set"], - { - "trackedshapeattributeval_set": [ - "trackedshapeattributeval__value", - "trackedshapeattributeval__spec_id", - "trackedshapeattributeval__id", - ] - }, - "id", - ) + db_track["trackedshape_set"] = merge_table_rows(db_track["trackedshape_set"], { + 'trackedshapeattributeval_set': [ + 'trackedshapeattributeval__value', + 'trackedshapeattributeval__spec_id', + 'trackedshapeattributeval__id', + ] + }, 'id') # A result table can consist many equal rows for track/shape attributes # We need filter unique attributes manually - db_track["labeledtrackattributeval_set"] = list( - set(db_track["labeledtrackattributeval_set"]) - ) - self._extend_attributes( - db_track.labeledtrackattributeval_set, - self.db_attributes[db_track.label_id]["immutable"].values(), - ) + db_track["labeledtrackattributeval_set"] = list(set(db_track["labeledtrackattributeval_set"])) + self._extend_attributes(db_track.labeledtrackattributeval_set, + self.db_attributes[db_track.label_id]["immutable"].values()) - default_attribute_values = self.db_attributes[db_track.label_id][ - "mutable" - ].values() + default_attribute_values = self.db_attributes[db_track.label_id]["mutable"].values() for db_shape in db_track["trackedshape_set"]: db_shape["trackedshapeattributeval_set"] = list( set(db_shape["trackedshapeattributeval_set"]) ) # in case of trackedshapes need to interpolate attriute values and extend it # by previous shape attribute values (not default values) - self._extend_attributes( - db_shape["trackedshapeattributeval_set"], default_attribute_values - ) + self._extend_attributes(db_shape["trackedshapeattributeval_set"], default_attribute_values) default_attribute_values = db_shape["trackedshapeattributeval_set"] if db_track.parent is None: @@ -765,13 +687,11 @@ def _init_tracks_from_db(self): for track_id, track_elements in elements.items(): tracks[track_id].elements = track_elements - serializer = serializers.LabeledTrackSerializerFromDB( - list(tracks.values()), many=True - ) + serializer = serializers.LabeledTrackSerializerFromDB(list(tracks.values()), many=True) self.ir_data.tracks = serializer.data def _init_version_from_db(self): - self.ir_data.version = 0 # FIXME: should be removed in the future + self.ir_data.version = 0 # FIXME: should be removed in the future def init_from_db(self): self._init_tags_from_db() @@ -783,7 +703,7 @@ def init_from_db(self): def data(self): return self.ir_data.data - def export(self, dst_file, exporter, host="", **options): + def export(self, dst_file, exporter, host='', **options): job_data = JobData( annotation_ir=self.ir_data, db_job=self.db_job, @@ -822,40 +742,30 @@ def import_annotations(self, src_file, importer, **options): self.create(job_data.data.slice(self.start_frame, self.stop_frame).serialize()) - class TaskAnnotation: def __init__(self, pk): self.db_task = models.Task.objects.prefetch_related( - Prefetch("data__images", queryset=models.Image.objects.order_by("frame")) + Prefetch('data__images', queryset=models.Image.objects.order_by('frame')) ).get(id=pk) # Postgres doesn't guarantee an order by default without explicit order_by - self.db_jobs = ( - models.Job.objects.select_related("segment") - .filter( - segment__task_id=pk, - type=models.JobType.ANNOTATION.value, - ) - .order_by("id") - ) + self.db_jobs = models.Job.objects.select_related("segment").filter( + segment__task_id=pk, type=models.JobType.ANNOTATION.value, + ).order_by('id') self.ir_data = AnnotationIR(self.db_task.dimension) def reset(self): self.ir_data.reset() def _patch_data(self, data, action): - _data = ( - data - if isinstance(data, AnnotationIR) - else AnnotationIR(self.db_task.dimension, data) - ) + _data = data if isinstance(data, AnnotationIR) else AnnotationIR(self.db_task.dimension, data) splitted_data = {} jobs = {} for db_job in self.db_jobs: jid = db_job.id start = db_job.segment.start_frame stop = db_job.segment.stop_frame - jobs[jid] = {"start": start, "stop": stop} + jobs[jid] = { "start": start, "stop": stop } splitted_data[jid] = _data.slice(start, stop) for jid, job_data in splitted_data.items(): @@ -866,9 +776,7 @@ def _patch_data(self, data, action): _data.data = patch_job_data(jid, job_data, action) if _data.version > self.ir_data.version: self.ir_data.version = _data.version - self._merge_data( - _data, jobs[jid]["start"], self.db_task.overlap, self.db_task.dimension - ) + self._merge_data(_data, jobs[jid]["start"], self.db_task.overlap, self.db_task.dimension) def _merge_data(self, data, start_frame, overlap, dimension): annotation_manager = AnnotationManager(self.ir_data) @@ -907,7 +815,7 @@ def init_from_db(self): dimension = self.db_task.dimension self._merge_data(annotation.ir_data, start_frame, overlap, dimension) - def export(self, dst_file, exporter, host="", **options): + def export(self, dst_file, exporter, host='', **options): task_data = TaskData( annotation_ir=self.ir_data, db_task=self.db_task, @@ -959,7 +867,6 @@ def get_job_data(pk): return annotation.data - @silk_profile(name="POST job data") @transaction.atomic def put_job_data(pk, data): @@ -968,7 +875,6 @@ def put_job_data(pk, data): return annotation.data - @silk_profile(name="UPDATE job data") @plugin_decorator @transaction.atomic @@ -983,14 +889,12 @@ def patch_job_data(pk, data, action): return annotation.data - @silk_profile(name="DELETE job data") @transaction.atomic def delete_job_data(pk): annotation = JobAnnotation(pk) annotation.delete() - def export_job(job_id, dst_file, format_name, server_url=None, save_images=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. @@ -1002,24 +906,18 @@ def export_job(job_id, dst_file, format_name, server_url=None, save_images=False job.init_from_db() exporter = make_exporter(format_name) - with open(dst_file, "wb") as f: + with open(dst_file, 'wb') as f: job.export(f, exporter, host=server_url, save_images=save_images) - -def jobChunkPathGetter( - db_data, start, stop, task_dimension, data_quality, data_num, job -): +def jobChunkPathGetter(db_data, start, stop, task_dimension, data_quality, data_num, job): # db_data = Task Data frame_provider = FrameProvider(db_data, task_dimension) # self.type = data_type number = int(data_num) if data_num is not None else None - quality = ( - FrameProvider.Quality.COMPRESSED - if data_quality == "compressed" - else FrameProvider.Quality.ORIGINAL - ) + quality = FrameProvider.Quality.COMPRESSED \ + if data_quality == 'compressed' else FrameProvider.Quality.ORIGINAL path = os.path.realpath(frame_provider.get_chunk(number, quality)) # pylint: disable=superfluous-parens @@ -1028,13 +926,10 @@ def jobChunkPathGetter( return path - def chunk_annotation_audio(concat_array, output_folder, annotations): # Convert NumPy array to AudioSegment - sr = 44100 # sampling rate - audio_segment = AudioSegment( - concat_array.tobytes(), frame_rate=sr, channels=1, sample_width=4 - ) + sr = 44100 # sampling rate + audio_segment = AudioSegment(concat_array.tobytes(), frame_rate=sr, channels=1, sample_width=4) try: y = audio_segment.get_array_of_samples() @@ -1045,8 +940,8 @@ def chunk_annotation_audio(concat_array, output_folder, annotations): for _, shape in enumerate(annotations, 1): - start_time = min(shape["points"][:2]) - end_time = max(shape["points"][2:]) + start_time = min(shape['points'][:2]) + end_time = max(shape['points'][2:]) # Convert time points to sample indices start_sample = int(start_time * sr) @@ -1063,12 +958,9 @@ def chunk_annotation_audio(concat_array, output_folder, annotations): return data - -def create_annotation_clips_zip( - annotation_audio_chunk_file_paths, meta_data_file_path, output_folder, dst_file -): - data_folder = os.path.join(output_folder, "data") - clips_folder = os.path.join(data_folder, "clips") +def create_annotation_clips_zip(annotation_audio_chunk_file_paths, meta_data_file_path, output_folder, dst_file): + data_folder = os.path.join(output_folder, 'data') + clips_folder = os.path.join(data_folder, 'clips') os.makedirs(clips_folder, exist_ok=True) # Copy audio files to clips folder @@ -1080,8 +972,8 @@ def create_annotation_clips_zip( shutil.copy(meta_data_file_path, os.path.join(data_folder, "data.tsv")) # Create zip file - zip_filename = os.path.join(output_folder, "common_voice.zip") - with zipfile.ZipFile(zip_filename, "w") as zipf: + zip_filename = os.path.join(output_folder, 'common_voice.zip') + with zipfile.ZipFile(zip_filename, 'w') as zipf: for root, _, files in os.walk(data_folder): for file in files: file_path = os.path.join(root, file) @@ -1093,7 +985,6 @@ def create_annotation_clips_zip( # Move the zip to the dst_file location shutil.move(zip_filename, dst_file) - def get_np_audio_array_from_job(job_id): with transaction.atomic(): @@ -1103,25 +994,17 @@ def get_np_audio_array_from_job(job_id): job_data_chunk_size = job.db_job.segment.task.data.chunk_size task_dimension = job.db_job.segment.task.dimension - start = job.start_frame / job_data_chunk_size - stop = job.stop_frame / job_data_chunk_size + start = job.start_frame/job_data_chunk_size + stop = job.stop_frame/job_data_chunk_size audio_array_buffer = [] - for i in range(math.trunc(start), math.trunc(stop) + 1): + for i in range(math.trunc(start), math.trunc(stop)+1): db_job = job.db_job # data_type = "chunk" data_num = i - data_quality = "compressed" - - chunk_path = jobChunkPathGetter( - job.db_job.segment.task.data, - job.start_frame, - job.stop_frame, - task_dimension, - data_quality, - data_num, - db_job, - ) + data_quality = 'compressed' + + chunk_path = jobChunkPathGetter(job.db_job.segment.task.data, job.start_frame, job.stop_frame, task_dimension, data_quality, data_num, db_job) _, audio_data = wavfile.read(chunk_path) @@ -1134,7 +1017,6 @@ def get_np_audio_array_from_job(job_id): return concat_array - def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): concat_array = get_np_audio_array_from_job(job_id) @@ -1165,6 +1047,7 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): slogger.glob.debug("JOB LABELS ATTRIBUTES") slogger.glob.debug(json.dumps(attributes_list)) + slogger.glob.debug("JOB LABELS") slogger.glob.debug(json.dumps(labels_list)) @@ -1175,9 +1058,7 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): # wave_file.setframerate(44100) # wave_file.writeframes(concat_array) - annotation_audio_chunk_file_paths = chunk_annotation_audio( - concat_array, temp_dir, annotations - ) + annotation_audio_chunk_file_paths = chunk_annotation_audio(concat_array, temp_dir, annotations) for i, annotation in enumerate(annotations): entry = { @@ -1191,15 +1072,13 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): "emotion": annotation.get("emotion", ""), "label": labels_mapping[annotation["label_id"]]["name"], "start": annotation["points"][0], - "end": annotation["points"][3], + "end": annotation["points"][3] } attributes = annotation.get("attributes", []) for idx, attr in enumerate(attributes): annotation_attribute_id = attr.get("spec_id", "") - label_attributes = labels_mapping[annotation["label_id"]].get( - "attributes", {} - ) + label_attributes = labels_mapping[annotation["label_id"]].get("attributes", {}) annotation_attribute = label_attributes.get(annotation_attribute_id, {}) attribute_name = annotation_attribute.get("name", f"attribute_{idx}_name") attribute_val = attr.get("value", "") @@ -1209,13 +1088,13 @@ def get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir): final_data.append(entry) + slogger.glob.debug("JOB ANNOTATION DATA") slogger.glob.debug(json.dumps(final_data)) slogger.glob.debug("All ANNOTATIONs DATA") slogger.glob.debug(json.dumps(annotations)) return final_data, annotation_audio_chunk_file_paths - def convert_annotation_data_format(data, format_name): if format_name == "Common Voice": return data @@ -1231,28 +1110,15 @@ def convert_annotation_data_format(data, format_name): "text": entry["sentence"], "label": entry["label"], "start": entry["start"], - "end": entry["end"], + "end": entry["end"] } - attribute_keys = [ - key for key in entry.keys() if key.startswith("attribute_") - ] + attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) return formatted_data elif format_name == "VoxPopuli": - language_id_mapping = { - "en-US": 0, - "es-ES": 1, - "fr-FR": 2, - "zh-CN": 3, - "hi-IN": 4, - "ar-EG": 5, - "pt-BR": 6, - "ja-JP": 7, - "de-DE": 8, - "ru-RU": 9, - } + language_id_mapping = {"en-US": 0,"es-ES":1,"fr-FR":2,"zh-CN":3,"hi-IN":4,"ar-EG":5,"pt-BR":6,"ja-JP":7,"de-DE":8,"ru-RU":9} formatted_data = [] for entry in data: formatted_entry = { @@ -1268,11 +1134,10 @@ def convert_annotation_data_format(data, format_name): "accent": entry["accents"], "label": entry["label"], "start": entry["start"], - "end": entry["end"], + "end": entry["end"] } - attribute_keys = [ - key for key in entry.keys() if key.startswith("attribute_") - ] + x = entry["locale"] + attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) @@ -1289,15 +1154,14 @@ def convert_annotation_data_format(data, format_name): "speaker_id": "", "label": entry["label"], "start": entry["start"], - "end": entry["end"], + "end": entry["end"] } - attribute_keys = [ - key for key in entry.keys() if key.startswith("attribute_") - ] + attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) return formatted_data + elif format_name == "VoxCeleb": formatted_data = [] for entry in data: @@ -1306,21 +1170,20 @@ def convert_annotation_data_format(data, format_name): "file": entry["path"], "text": entry["sentence"], "gender": entry["gender"], - "nationality": "", + "nationality" : "", "age": entry["age"], "id": str(uuid.uuid4()), "speaker_id": "", "label": entry["label"], "start": entry["start"], - "end": entry["end"], + "end": entry["end"] } - attribute_keys = [ - key for key in entry.keys() if key.startswith("attribute_") - ] + attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) return formatted_data + elif format_name == "VCTK_Corpus": formatted_data = [] for entry in data: @@ -1335,29 +1198,18 @@ def convert_annotation_data_format(data, format_name): "speaker_id": "", "label": entry["label"], "start": entry["start"], - "end": entry["end"], + "end": entry["end"] } - attribute_keys = [ - key for key in entry.keys() if key.startswith("attribute_") - ] + attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) return formatted_data + elif format_name == "LibriVox": - language_id_mapping = { - "en-US": 0, - "es-ES": 1, - "fr-FR": 2, - "zh-CN": 3, - "hi-IN": 4, - "ar-EG": 5, - "pt-BR": 6, - "ja-JP": 7, - "de-DE": 8, - "ru-RU": 9, - } + formatted_data = [] + language_id_mapping = {"en-US": 0,"es-ES":1,"fr-FR":2,"zh-CN":3,"hi-IN":4,"ar-EG":5,"pt-BR":6,"ja-JP":7,"de-DE":8,"ru-RU":9 } for entry in data: formatted_entry = { "job_id": entry["job_id"], @@ -1372,11 +1224,9 @@ def convert_annotation_data_format(data, format_name): "speaker_id": "", "label": entry["label"], "start": entry["start"], - "end": entry["end"], + "end": entry["end"] } - attribute_keys = [ - key for key in entry.keys() if key.startswith("attribute_") - ] + attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] for key in attribute_keys: formatted_entry[key] = entry[key] formatted_data.append(formatted_entry) @@ -1384,10 +1234,7 @@ def convert_annotation_data_format(data, format_name): return data - -def export_audino_job( - job_id, dst_file, format_name, server_url=None, save_images=False -): +def export_audino_job(job_id, dst_file, format_name, server_url=None, save_images=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. @@ -1405,33 +1252,25 @@ def export_audino_job( with TemporaryDirectory(dir=temp_dir_base) as temp_dir: - final_data, annotation_audio_chunk_file_paths = get_audio_job_export_data( - job_id, dst_file, job, temp_dir_base, temp_dir - ) + final_data, annotation_audio_chunk_file_paths = get_audio_job_export_data(job_id, dst_file, job, temp_dir_base, temp_dir) # Convert the data into a format final_data = convert_annotation_data_format(final_data, format_name) df = pd.DataFrame(final_data) - # sorting by start column in ascending order - df = df.sort_values(by="start") + # sorting by start_time column in ascending order + df = df.sort_values(by='start') + + # Saving the metadata file meta_data_file_path = os.path.join(temp_dir_base, str(job_id) + ".tsv") - df.to_csv(meta_data_file_path, sep="\t", index=False) - - create_annotation_clips_zip( - annotation_audio_chunk_file_paths, - meta_data_file_path, - temp_dir_base, - dst_file, - ) + df.to_csv(meta_data_file_path, sep='\t', index=False) + create_annotation_clips_zip(annotation_audio_chunk_file_paths, meta_data_file_path, temp_dir_base, dst_file) -def export_audino_task( - task_id, dst_file, format_name, server_url=None, save_images=False -): +def export_audino_task(task_id, dst_file, format_name, server_url=None, save_images=False): with transaction.atomic(): task = TaskAnnotation(task_id) @@ -1453,9 +1292,7 @@ def export_audino_task( job = JobAnnotation(job.id) job.init_from_db() - final_data, annotation_audio_chunk_file_paths = get_audio_job_export_data( - job.db_job.id, dst_file, job, temp_dir_base, temp_dir - ) + final_data, annotation_audio_chunk_file_paths = get_audio_job_export_data(job.db_job.id, dst_file, job, temp_dir_base, temp_dir) # Convert the data into a format final_data = convert_annotation_data_format(final_data, format_name) @@ -1466,27 +1303,18 @@ def export_audino_task( # Saving the metadata file meta_data_file_path = os.path.join(temp_dir_base, str(task_id) + ".tsv") - final_task_data_flatten = [ - item for sublist in final_task_data for item in sublist - ] - final_annotation_chunk_paths_flatten = [ - item for sublist in final_annotation_chunk_paths for item in sublist - ] + final_task_data_flatten = [item for sublist in final_task_data for item in sublist] + final_annotation_chunk_paths_flatten = [item for sublist in final_annotation_chunk_paths for item in sublist] df = pd.DataFrame(final_task_data_flatten) - # sorting by start column in pandas dataframe - df = df.sort_values(by="start") + # sorting by start_time column in ascending order + df = df.sort_values(by='start') - df.to_csv(meta_data_file_path, sep="\t", index=False) - create_annotation_clips_zip( - final_annotation_chunk_paths_flatten, - meta_data_file_path, - temp_dir_base, - dst_file, - ) + df.to_csv(meta_data_file_path, sep='\t', index=False) + create_annotation_clips_zip(final_annotation_chunk_paths_flatten, meta_data_file_path, temp_dir_base, dst_file) @silk_profile(name="GET task data") @transaction.atomic @@ -1496,7 +1324,6 @@ def get_task_data(pk): return annotation.data - @silk_profile(name="POST task data") @transaction.atomic def put_task_data(pk, data): @@ -1505,7 +1332,6 @@ def put_task_data(pk, data): return annotation.data - @silk_profile(name="UPDATE task data") @transaction.atomic def patch_task_data(pk, data, action): @@ -1519,14 +1345,12 @@ def patch_task_data(pk, data, action): return annotation.data - @silk_profile(name="DELETE task data") @transaction.atomic def delete_task_data(pk): annotation = TaskAnnotation(pk) annotation.delete() - def export_task(task_id, dst_file, format_name, server_url=None, save_images=False): # For big tasks dump function may run for a long time and # we dont need to acquire lock after the task has been initialized from DB. @@ -1538,30 +1362,28 @@ def export_task(task_id, dst_file, format_name, server_url=None, save_images=Fal task.init_from_db() exporter = make_exporter(format_name) - with open(dst_file, "wb") as f: + with open(dst_file, 'wb') as f: task.export(f, exporter, host=server_url, save_images=save_images) - @transaction.atomic def import_task_annotations(src_file, task_id, format_name, conv_mask_to_poly): task = TaskAnnotation(task_id) task.init_from_db() importer = make_importer(format_name) - with open(src_file, "rb") as f: + with open(src_file, 'rb') as f: try: task.import_annotations(f, importer, conv_mask_to_poly=conv_mask_to_poly) except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex: raise CvatImportError(str(ex)) - @transaction.atomic def import_job_annotations(src_file, job_id, format_name, conv_mask_to_poly): job = JobAnnotation(job_id) job.init_from_db() importer = make_importer(format_name) - with open(src_file, "rb") as f: + with open(src_file, 'rb') as f: try: job.import_annotations(f, importer, conv_mask_to_poly=conv_mask_to_poly) except (DatasetError, DatasetImportError, DatasetNotFoundError) as ex: From 19e8dab40c9895736a1e0b0a597759ba632e6a4d Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Tue, 1 Oct 2024 18:52:14 +0530 Subject: [PATCH 13/14] formatting changes --- cvat/apps/dataset_manager/task.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cvat/apps/dataset_manager/task.py b/cvat/apps/dataset_manager/task.py index eb7e90bf3b14..a1af03aa1407 100644 --- a/cvat/apps/dataset_manager/task.py +++ b/cvat/apps/dataset_manager/task.py @@ -1136,7 +1136,6 @@ def convert_annotation_data_format(data, format_name): "start": entry["start"], "end": entry["end"] } - x = entry["locale"] attribute_keys = [key for key in entry.keys() if key.startswith("attribute_")] for key in attribute_keys: formatted_entry[key] = entry[key] From c005331a22afc73c6fb16bf47e870f288d40b2d7 Mon Sep 17 00:00:00 2001 From: siesto1elemento Date: Wed, 30 Oct 2024 11:42:24 +0530 Subject: [PATCH 14/14] added common voice versions and some fixes in other formats --- cvat/apps/dataset_manager/formats/LibriVox.py | 26 +- .../dataset_manager/formats/VCTK_Corpus.py | 26 +- cvat/apps/dataset_manager/formats/VoxCeleb.py | 26 +- .../apps/dataset_manager/formats/Voxpopuli.py | 26 +- .../dataset_manager/formats/common_voice.py | 406 ++++++++++++++++-- .../dataset_manager/formats/librispeech.py | 26 +- cvat/apps/dataset_manager/formats/tedlium.py | 26 +- 7 files changed, 503 insertions(+), 59 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/LibriVox.py b/cvat/apps/dataset_manager/formats/LibriVox.py index 4967fba1c16f..5d32f2852fe6 100644 --- a/cvat/apps/dataset_manager/formats/LibriVox.py +++ b/cvat/apps/dataset_manager/formats/LibriVox.py @@ -2,6 +2,7 @@ import os.path as osp import zipfile import csv +import math from django.db import transaction from glob import glob from pydub import AudioSegment @@ -21,11 +22,24 @@ def calculate_duration(row): return end_time - start_time -def split_rows_by_time(all_rows, time_threshold=600): +def split_rows_by_time(all_rows, clips_folder, time_threshold=600): result = [] total_duration = 0 + for row in all_rows: + if not row.get("start") or not row.get("end"): + audio_file_name = row["file"] + audio_file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(audio_file_path): + audio_segment = AudioSegment.from_file(audio_file_path) + audio_duration = audio_segment.duration_seconds + + # Set start to 0 if missing, and end to the audio duration + row["start"] = row.get("start", "0") + row["end"] = row.get("end", str(audio_duration)) + for row in all_rows: start_time = float(row["start"]) end_time = float(row["end"]) @@ -198,9 +212,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs num_tsv_rows = len(tsv_rows) num_clips = len(os.listdir(clips_folder)) - if num_tsv_rows != num_clips: + if num_tsv_rows > num_clips: raise ValueError( - f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. Clips must be equal or more." ) # Combined audio that will be the final output @@ -221,6 +235,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs combined_audio += ( audio_segment # Append the audio in the order from TSV ) + else: + raise FileNotFoundError(f"File not found: {file_path}") # Create raw folder to store combined audio raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") @@ -254,7 +270,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs reader = csv.DictReader(tsvfile, delimiter="\t") all_rows = list(reader) - new_rows = split_rows_by_time(all_rows) + new_rows = split_rows_by_time(all_rows, clips_folder) jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") @@ -361,7 +377,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs record_index += 1 total_duration = round(end_time, 2) - if 599.9 <= total_duration <= 600: + if math.isclose(total_duration, 600, abs_tol=1e-6): break else: diff --git a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py index 344dec795504..61996401be27 100644 --- a/cvat/apps/dataset_manager/formats/VCTK_Corpus.py +++ b/cvat/apps/dataset_manager/formats/VCTK_Corpus.py @@ -2,6 +2,7 @@ import os.path as osp import zipfile import csv +import math from django.db import transaction from glob import glob from pydub import AudioSegment @@ -21,11 +22,24 @@ def calculate_duration(row): return end_time - start_time -def split_rows_by_time(all_rows, time_threshold=600): +def split_rows_by_time(all_rows, clips_folder, time_threshold=600): result = [] total_duration = 0 + for row in all_rows: + if not row.get("start") or not row.get("end"): + audio_file_name = row["file"] + audio_file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(audio_file_path): + audio_segment = AudioSegment.from_file(audio_file_path) + audio_duration = audio_segment.duration_seconds + + # Set start to 0 if missing, and end to the audio duration + row["start"] = row.get("start", "0") + row["end"] = row.get("end", str(audio_duration)) + for row in all_rows: start_time = float(row["start"]) end_time = float(row["end"]) @@ -198,9 +212,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs num_tsv_rows = len(tsv_rows) num_clips = len(os.listdir(clips_folder)) - if num_tsv_rows != num_clips: + if num_tsv_rows > num_clips: raise ValueError( - f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. Clips must be equal or more." ) # Combined audio that will be the final output @@ -221,6 +235,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs combined_audio += ( audio_segment # Append the audio in the order from TSV ) + else: + raise FileNotFoundError(f"File not found: {file_path}") # Create raw folder to store combined audio raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") @@ -254,7 +270,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs reader = csv.DictReader(tsvfile, delimiter="\t") all_rows = list(reader) - new_rows = split_rows_by_time(all_rows) + new_rows = split_rows_by_time(all_rows, clips_folder) jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") @@ -361,7 +377,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs record_index += 1 total_duration = round(end_time, 2) - if 599.9 <= total_duration <= 600: + if math.isclose(total_duration, 600, abs_tol=1e-6): break else: diff --git a/cvat/apps/dataset_manager/formats/VoxCeleb.py b/cvat/apps/dataset_manager/formats/VoxCeleb.py index c557ea472bfa..2e336867a290 100644 --- a/cvat/apps/dataset_manager/formats/VoxCeleb.py +++ b/cvat/apps/dataset_manager/formats/VoxCeleb.py @@ -2,6 +2,7 @@ import os.path as osp import zipfile import csv +import math from django.db import transaction from glob import glob from pydub import AudioSegment @@ -21,11 +22,24 @@ def calculate_duration(row): return end_time - start_time -def split_rows_by_time(all_rows, time_threshold=600): +def split_rows_by_time(all_rows, clips_folder, time_threshold=600): result = [] total_duration = 0 + for row in all_rows: + if not row.get("start") or not row.get("end"): + audio_file_name = row["file"] + audio_file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(audio_file_path): + audio_segment = AudioSegment.from_file(audio_file_path) + audio_duration = audio_segment.duration_seconds + + # Set start to 0 if missing, and end to the audio duration + row["start"] = row.get("start", "0") + row["end"] = row.get("end", str(audio_duration)) + for row in all_rows: start_time = float(row["start"]) end_time = float(row["end"]) @@ -198,9 +212,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs num_tsv_rows = len(tsv_rows) num_clips = len(os.listdir(clips_folder)) - if num_tsv_rows != num_clips: + if num_tsv_rows > num_clips: raise ValueError( - f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. Clips must be equal or more." ) # Combined audio that will be the final output @@ -221,6 +235,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs combined_audio += ( audio_segment # Append the audio in the order from TSV ) + else: + raise FileNotFoundError(f"File not found: {file_path}") # Create raw folder to store combined audio raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") @@ -254,7 +270,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs reader = csv.DictReader(tsvfile, delimiter="\t") all_rows = list(reader) - new_rows = split_rows_by_time(all_rows) + new_rows = split_rows_by_time(all_rows, clips_folder) jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") @@ -361,7 +377,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs record_index += 1 total_duration = round(end_time, 2) - if 599.9 <= total_duration <= 600: + if math.isclose(total_duration, 600, abs_tol=1e-6): break else: diff --git a/cvat/apps/dataset_manager/formats/Voxpopuli.py b/cvat/apps/dataset_manager/formats/Voxpopuli.py index 774ba4ed3a79..62c9400251c6 100644 --- a/cvat/apps/dataset_manager/formats/Voxpopuli.py +++ b/cvat/apps/dataset_manager/formats/Voxpopuli.py @@ -2,6 +2,7 @@ import os.path as osp import zipfile import csv +import math from django.db import transaction from glob import glob from pydub import AudioSegment @@ -21,11 +22,24 @@ def calculate_duration(row): return end_time - start_time -def split_rows_by_time(all_rows, time_threshold=600): +def split_rows_by_time(all_rows, clips_folder, time_threshold=600): result = [] total_duration = 0 + for row in all_rows: + if not row.get("start") or not row.get("end"): + audio_file_name = row["audio_path"] + audio_file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(audio_file_path): + audio_segment = AudioSegment.from_file(audio_file_path) + audio_duration = audio_segment.duration_seconds + + # Set start to 0 if missing, and end to the audio duration + row["start"] = row.get("start", "0") + row["end"] = row.get("end", str(audio_duration)) + for row in all_rows: start_time = float(row["start"]) end_time = float(row["end"]) @@ -198,9 +212,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs num_tsv_rows = len(tsv_rows) num_clips = len(os.listdir(clips_folder)) - if num_tsv_rows != num_clips: + if num_tsv_rows > num_clips: raise ValueError( - f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. Clips must be equal or more." ) # Combined audio that will be the final output @@ -221,6 +235,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs combined_audio += ( audio_segment # Append the audio in the order from TSV ) + else: + raise FileNotFoundError(f"File not found: {file_path}") # Create raw folder to store combined audio raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") @@ -254,7 +270,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs reader = csv.DictReader(tsvfile, delimiter="\t") all_rows = list(reader) - new_rows = split_rows_by_time(all_rows) + new_rows = split_rows_by_time(all_rows, clips_folder) jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") @@ -361,7 +377,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs record_index += 1 total_duration = round(end_time, 2) - if 599.9 <= total_duration <= 600: + if math.isclose(total_duration, 600, abs_tol=1e-6): break else: diff --git a/cvat/apps/dataset_manager/formats/common_voice.py b/cvat/apps/dataset_manager/formats/common_voice.py index 0be3c228100c..9ed8fcc4bad9 100644 --- a/cvat/apps/dataset_manager/formats/common_voice.py +++ b/cvat/apps/dataset_manager/formats/common_voice.py @@ -2,6 +2,7 @@ import os.path as osp import zipfile import csv +import math from django.db import transaction from glob import glob from pydub import AudioSegment @@ -18,14 +19,28 @@ def calculate_duration(row): start_time = float(row["start"]) # Assuming start and end times are in seconds end_time = float(row["end"]) + return end_time - start_time -def split_rows_by_time(all_rows, time_threshold=600): +def split_rows_by_time(all_rows, clips_folder, time_threshold=600): result = [] total_duration = 0 + for row in all_rows: + if not row.get("start") or not row.get("end"): + audio_file_name = row["path"] + audio_file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(audio_file_path): + audio_segment = AudioSegment.from_file(audio_file_path) + audio_duration = audio_segment.duration_seconds + + # Set start to 0 if missing, and end to the audio duration + row["start"] = row.get("start", "0") + row["end"] = row.get("end", str(audio_duration)) + for row in all_rows: start_time = float(row["start"]) end_time = float(row["end"]) @@ -142,8 +157,7 @@ def load_anno(file_object, annotations): data = dm.task.patch_job_data(pk, serializer.data, action) -@importer(name="Common Voice", ext="TSV, ZIP", version=" ") -def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): +def _common_voice_import(src_file, temp_dir, instance_data, importer_name, version): is_zip = zipfile.is_zipfile(src_file) src_file.seek(0) file_name = os.path.basename(src_file.name) @@ -179,9 +193,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs num_tsv_rows = len(tsv_rows) num_clips = len(os.listdir(clips_folder)) - if num_tsv_rows != num_clips: + if num_tsv_rows > num_clips: raise ValueError( - f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. Clips must be equal or more." ) # Combined audio that will be the final output @@ -202,6 +216,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs combined_audio += ( audio_segment # Append the audio in the order from TSV ) + else: + raise FileNotFoundError(f"File not found: {file_path}") # Create raw folder to store combined audio raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") @@ -235,7 +251,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs reader = csv.DictReader(tsvfile, delimiter="\t") all_rows = list(reader) - new_rows = split_rows_by_time(all_rows) + new_rows = split_rows_by_time(all_rows, clips_folder) jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") @@ -253,7 +269,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs end_time = start_time + record_duration label_name = record.get("label") - label_id = label_data._get_label_id(label_name) + label_id = label_data._get_label_id( + label_name + ) # Assuming start and end times are in seconds attributes = [] @@ -286,28 +304,156 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs else: break # Exit the loop when no more attributes are found - shapes_data = [ - { - "type": "rectangle", - "label": record.get("label", ""), - "points": [start_time, start_time, end_time, end_time], - "frame": 0, - "occluded": False, - "z_order": 0, - "group": None, - "source": "manual", - "transcript": record.get("sentence", ""), - "gender": record.get("gender", ""), - "age": record.get("age", ""), - "locale": record.get("locale", ""), - "accent": record.get("accents", ""), - "emotion": record.get("emotion", ""), - "rotation": 0.0, - "label_id": label_id, - "attributes": attributes, - } + importer_versions_1 = [ + ("Common Voice Corpus", 19.0), + ("Common Voice Delta Segment", 19.0), + ("Common Voice Corpus", 18.0), + ("Common Voice Delta Segment", 18.0), + ("Common Voice Corpus", 17.0), + ("Common Voice Delta Segment", 17.0), + ] + + if (importer_name, version) in importer_versions_1: + + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "client_id": record.get("client_id", ""), + "path": record.get("path", ""), + "sentence_id": record.get("sentence_id", ""), + "sentence_domain": record.get("sentence_domain", ""), + "up_votes": record.get("up_votes", ""), + "down_votes": record.get("down_votes"), + "transcript": record.get("sentence", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": record.get("locale", ""), + "accent": record.get("accents", ""), + "variant": record.get("variant", ""), + "segment": record.get("segment", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + importer_versions_2 = [ + ("Common Voice Corpus", 16.1), + ("Common Voice Delta Segment", 16.1), + ("Common Voice Corpus", 15.0), + ("Common Voice Delta Segment", 15.0), + ("Common Voice Corpus", 14.0), + ("Common Voice Delta Segment", 14.0), + ("Common Voice Delta Segment", 13.0), + ] + + if (importer_name, version) in importer_versions_2: + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "client_id": record.get("client_id", ""), + "path": record.get("path", ""), + "up_votes": record.get("up_votes", ""), + "down_votes": record.get("down_votes"), + "transcript": record.get("sentence", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": record.get("locale", ""), + "accent": record.get("accents", ""), + "variant": record.get("variant", ""), + "segment": record.get("segment", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + + importer_versions_3 = [ + ("Common Voice Corpus", 13.0), + ("Common Voice Corpus", 12.0), + ("Common Voice Delta Segment", 12.0), + ("Common Voice Corpus", 11.0), + ("Common Voice Corpus", 10.0), + ("Common Voice Delta Segment", 10.0), + ("Common Voice Corpus", 9.0), + ("Common Voice Corpus", 8.0), + ("Common Voice Corpus", 7.0), + ("Common Voice Corpus", 6.1), + ("Common Voice Corpus", 5.1), + ] + + if (importer_name, version) in importer_versions_3: + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "client_id": record.get("client_id", ""), + "path": record.get("path", ""), + "up_votes": record.get("up_votes", ""), + "down_votes": record.get("down_votes"), + "transcript": record.get("sentence", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "locale": record.get("locale", ""), + "accent": record.get("accents", ""), + "segment": record.get("segment", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + + importer_versions_4 = [ + ("Common Voice Corpus", 4), + ("Common Voice Corpus", 3), + ("Common Voice Corpus", 2), + ("Common Voice Corpus", 1), ] + if (importer_name, version) in importer_versions_4: + shapes_data = [ + { + "type": "rectangle", + "label": record.get("label", ""), + "points": [start_time, start_time, end_time, end_time], + "frame": 0, + "occluded": False, + "z_order": 0, + "group": None, + "source": "manual", + "client_id": record.get("client_id", ""), + "path": record.get("path", ""), + "up_votes": record.get("up_votes", ""), + "down_votes": record.get("down_votes"), + "transcript": record.get("sentence", ""), + "gender": record.get("gender", ""), + "age": record.get("age", ""), + "accent": record.get("accents", ""), + "rotation": 0.0, + "label_id": label_id, + "attributes": attributes, + } + ] + data = {"shapes": shapes_data} start_time = end_time @@ -320,11 +466,213 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs record_index += 1 total_duration = round(end_time, 2) - if 599.9 <= total_duration <= 600: + if math.isclose(total_duration, 600, abs_tol=1e-6): break else: - anno_paths = glob(osp.join(temp_dir, "**", "*.tsv"), recursive=True) for p in anno_paths: load_anno(p, instance_data) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="1") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 1 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="2") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 2 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="3") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 3 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="4") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 4 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="5.1") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 5.1 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="6.1") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 6.1 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="7.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 7.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="8.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 8.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="9.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 9.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="10.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 10.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="10.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 10.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="11.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 11.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="11.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 11.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="12.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 12.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="12.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 12.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="13.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 13.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="13.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 13.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="14.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 14.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="14.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 14.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="15.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 15.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="15.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 15.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="16.1") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 16.1 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="16.1") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 16.1 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="17.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 17.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="17.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 17.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="18.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 18.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="18.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 18.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Delta Segment", ext="TSV, ZIP", version="19.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Delta Segment" + version = 19.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) + + +@importer(name="Common Voice Corpus", ext="TSV, ZIP", version="19.0") +def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs): + importer_name = "Common Voice Corpus" + version = 19.0 + _common_voice_import(src_file, temp_dir, instance_data, importer_name, version) diff --git a/cvat/apps/dataset_manager/formats/librispeech.py b/cvat/apps/dataset_manager/formats/librispeech.py index 715b0eb1cf1c..1e4a2b817621 100644 --- a/cvat/apps/dataset_manager/formats/librispeech.py +++ b/cvat/apps/dataset_manager/formats/librispeech.py @@ -2,6 +2,7 @@ import os.path as osp import zipfile import csv +import math from django.db import transaction from glob import glob from pydub import AudioSegment @@ -21,11 +22,24 @@ def calculate_duration(row): return end_time - start_time -def split_rows_by_time(all_rows, time_threshold=600): +def split_rows_by_time(all_rows, clips_folder, time_threshold=600): result = [] total_duration = 0 + for row in all_rows: + if not row.get("start") or not row.get("end"): + audio_file_name = row["file"] + audio_file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(audio_file_path): + audio_segment = AudioSegment.from_file(audio_file_path) + audio_duration = audio_segment.duration_seconds + + # Set start to 0 if missing, and end to the audio duration + row["start"] = row.get("start", "0") + row["end"] = row.get("end", str(audio_duration)) + for row in all_rows: start_time = float(row["start"]) end_time = float(row["end"]) @@ -198,9 +212,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs num_tsv_rows = len(tsv_rows) num_clips = len(os.listdir(clips_folder)) - if num_tsv_rows != num_clips: + if num_tsv_rows > num_clips: raise ValueError( - f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. Clips must be equal or more." ) # Combined audio that will be the final output @@ -221,6 +235,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs combined_audio += ( audio_segment # Append the audio in the order from TSV ) + else: + raise FileNotFoundError(f"File not found: {file_path}") # Create raw folder to store combined audio raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") @@ -254,7 +270,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs reader = csv.DictReader(tsvfile, delimiter="\t") all_rows = list(reader) - new_rows = split_rows_by_time(all_rows) + new_rows = split_rows_by_time(all_rows, clips_folder) jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") @@ -361,7 +377,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs record_index += 1 total_duration = round(end_time, 2) - if 599.9 <= total_duration <= 600: + if math.isclose(total_duration, 600, abs_tol=1e-6): break else: diff --git a/cvat/apps/dataset_manager/formats/tedlium.py b/cvat/apps/dataset_manager/formats/tedlium.py index 776434d0ec95..e68b24ea5d2b 100644 --- a/cvat/apps/dataset_manager/formats/tedlium.py +++ b/cvat/apps/dataset_manager/formats/tedlium.py @@ -2,6 +2,7 @@ import os.path as osp import zipfile import csv +import math from django.db import transaction from glob import glob from pydub import AudioSegment @@ -21,11 +22,24 @@ def calculate_duration(row): return end_time - start_time -def split_rows_by_time(all_rows, time_threshold=600): +def split_rows_by_time(all_rows, clips_folder, time_threshold=600): result = [] total_duration = 0 + for row in all_rows: + if not row.get("start") or not row.get("end"): + audio_file_name = row["file"] + audio_file_path = os.path.join(clips_folder, audio_file_name) + + if os.path.isfile(audio_file_path): + audio_segment = AudioSegment.from_file(audio_file_path) + audio_duration = audio_segment.duration_seconds + + # Set start to 0 if missing, and end to the audio duration + row["start"] = row.get("start", "0") + row["end"] = row.get("end", str(audio_duration)) + for row in all_rows: start_time = float(row["start"]) end_time = float(row["end"]) @@ -198,9 +212,9 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs num_tsv_rows = len(tsv_rows) num_clips = len(os.listdir(clips_folder)) - if num_tsv_rows != num_clips: + if num_tsv_rows > num_clips: raise ValueError( - f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. The numbers must match." + f"Import failed: {num_tsv_rows} rows in TSV but {num_clips} audio clips in the clips folder. Clips must be equal or more." ) # Combined audio that will be the final output @@ -221,6 +235,8 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs combined_audio += ( audio_segment # Append the audio in the order from TSV ) + else: + raise FileNotFoundError(f"File not found: {file_path}") # Create raw folder to store combined audio raw_folder_path = os.path.join(task_data.get_data_dirname(), "raw") @@ -254,7 +270,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs reader = csv.DictReader(tsvfile, delimiter="\t") all_rows = list(reader) - new_rows = split_rows_by_time(all_rows) + new_rows = split_rows_by_time(all_rows, clips_folder) jobs = Job.objects.filter(segment__task=locked_instance).order_by("id") @@ -361,7 +377,7 @@ def _import(src_file, temp_dir, instance_data, load_data_callback=None, **kwargs record_index += 1 total_duration = round(end_time, 2) - if 599.9 <= total_duration <= 600: + if math.isclose(total_duration, 600, abs_tol=1e-6): break else: