Skip to content

Commit

Permalink
Fix validator with s3 json import (#755)
Browse files Browse the repository at this point in the history
* Fix validator with s3 json import

* Fix mock in tests'

Co-authored-by: nik <[email protected]>
  • Loading branch information
niklub and nik authored Apr 5, 2021
1 parent 76b9336 commit bf096fe
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 29 deletions.
22 changes: 4 additions & 18 deletions label_studio/io_storages/s3/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from io_storages.serializers import StorageAnnotationSerializer
from tasks.serializers import TaskSerializerBulk
from tasks.models import Annotation
from data_import.serializers import ImportApiSerializer

logger = logging.getLogger(__name__)
logging.getLogger('botocore').setLevel(logging.CRITICAL)
Expand Down Expand Up @@ -128,25 +129,10 @@ def scan_and_create_links(self):
def _get_validated_task(self, parsed_data, key):
""" Validate parsed data with labeling config and task structure
"""
is_list = isinstance(parsed_data, list)
# we support only one task per JSON file
if not (is_list and len(parsed_data) == 1 or isinstance(parsed_data, dict)):
if not isinstance(parsed_data, dict):
raise TaskValidationError('Error at ' + str(key) + ':\n'
'Cloud storage supports one task per JSON file only. '
'Task must be {} or [{}] with length = 1')

# classic validation for one task
serializer = TaskSerializerBulk(context={'project': self.project})
try:
new_tasks = serializer.to_internal_value(parsed_data if is_list else [parsed_data])
except TaskValidationError as e:
# pretty format of errors
messages = e.msg_to_list()
out = [(str(key) + ' :: ' + msg) for msg in messages]
out = "\n".join(out)
raise TaskValidationError(out)

return new_tasks[0]
'Cloud storage supports one task (one dict object) per JSON file only. ')
return parsed_data

def get_data(self, key):
uri = f's3://{self.bucket}/{key}'
Expand Down
34 changes: 23 additions & 11 deletions label_studio/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,21 +54,33 @@ def s3_with_images(s3):
s3://pytest-s3-images/subdir/image1.jpg
s3://pytest-s3-images/subdir/image2.jpg
"""
with mock_s3():
bucket_name = 'pytest-s3-images'
s3.create_bucket(Bucket=bucket_name)
s3.put_object(Bucket=bucket_name, Key='image1.jpg', Body='123')
s3.put_object(Bucket=bucket_name, Key='subdir/image1.jpg', Body='456')
s3.put_object(Bucket=bucket_name, Key='subdir/image2.jpg', Body='789')
yield s3
bucket_name = 'pytest-s3-images'
s3.create_bucket(Bucket=bucket_name)
s3.put_object(Bucket=bucket_name, Key='image1.jpg', Body='123')
s3.put_object(Bucket=bucket_name, Key='subdir/image1.jpg', Body='456')
s3.put_object(Bucket=bucket_name, Key='subdir/image2.jpg', Body='789')
yield s3


@pytest.fixture(autouse=True)
def s3_with_jsons(s3):
"""
Bucket structure:
s3://pytest-s3-images/image1.jpg
s3://pytest-s3-images/subdir/image1.jpg
s3://pytest-s3-images/subdir/image2.jpg
"""
bucket_name = 'pytest-s3-jsons'
s3.create_bucket(Bucket=bucket_name)
s3.put_object(Bucket=bucket_name, Key='test.json', Body=json.dumps({'image_url': 'http://ggg.com/image.jpg'}))
yield s3


@pytest.fixture(autouse=True)
def s3_export_bucket(s3):
with mock_s3():
bucket_name = 'pytest-export-s3-bucket'
s3.create_bucket(Bucket=bucket_name)
yield s3
bucket_name = 'pytest-export-s3-bucket'
s3.create_bucket(Bucket=bucket_name)
yield s3


@pytest.fixture(autouse=True)
Expand Down
41 changes: 41 additions & 0 deletions label_studio/tests/test_suites/io_storages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -455,3 +455,44 @@
status_code: 201
response:
id: '{annotation_pk}'


# s3 storage with JSON data
- test_import_jsons_from_s3:
# Create project
- /api/projects:
method: POST
data:
title: test_s3_storage_with_json
label_config: <View><Image name="image" value="$image_url"/><Choices name="label" toName="image"><Choice value="pos"/><Choice value="neg"/></Choices></View>
is_published: true
status_code: 201
response:
id: '{project_pk}'

# Create storage
- /api/storages/s3:
method: POST
data:
project: '{project_pk}'
title: Testing S3 storage 2 (bucket from conftest.py)
bucket: pytest-s3-jsons
use_blob_urls: false
status_code: 201
response:
id: '{storage_pk}'

# Sync storage
- /api/storages/s3/{storage_pk}/sync:
method: POST
status_code: 200
response:
last_sync_count: 1

# Get all tasks
- /api/projects/{project_pk}/tasks:
method: GET
status_code: 200
response:
- data:
image_url: http://ggg.com/image.jpg

0 comments on commit bf096fe

Please sign in to comment.