From 4cb90b2109ce0d56557a1c369a583b7c0024ed8f Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Thu, 18 Jan 2024 11:03:47 +0000 Subject: [PATCH 01/10] Fix share-fs portfolio link + remove sub-dir on store --- .../oasisapi/portfolios/v1_api/serializers.py | 60 +++++++++++++------ 1 file changed, 42 insertions(+), 18 deletions(-) diff --git a/src/server/oasisapi/portfolios/v1_api/serializers.py b/src/server/oasisapi/portfolios/v1_api/serializers.py index 5cb1d8284..d3784574e 100644 --- a/src/server/oasisapi/portfolios/v1_api/serializers.py +++ b/src/server/oasisapi/portfolios/v1_api/serializers.py @@ -1,4 +1,6 @@ from os import path +import mimetypes + from drf_yasg.utils import swagger_serializer_method from rest_framework import serializers from rest_framework.exceptions import ValidationError @@ -260,29 +262,52 @@ def validate(self, attrs): raise serializers.ValidationError(errors) return super(PortfolioStorageSerializer, self).validate(attrs) + def inferr_content_type(self, stored_filename): + inferred_type = mimetypes.MimeTypes().guess_type(stored_filename)[0] + if not inferred_type and stored_filename.lower().endswith('parquet'): + # mimetypes dosn't work for parquet so handle that here + inferred_type = 'application/octet-stream' + if not inferred_type: + inferred_type = default_storage.default_content_type + return inferred_type + def get_content_type(self, stored_filename): try: # fetch content_type stored in Django's DB return RelatedFile.objects.get(file=path.basename(stored_filename)).content_type except ObjectDoesNotExist: - try: # Find content_type from S3 Object header - object_header = default_storage.connection.meta.client.head_object( - Bucket=default_storage.bucket_name, - Key=stored_filename) - return object_header['ContentType'] - except ClientError: - # fallback to the default content_type - return default_storage.default_content_type + # Find content_type from S3 Object header + if hasattr(default_storage, 'bucket'): + try: + object_header = default_storage.connection.meta.client.head_object( + Bucket=default_storage.bucket_name, + Key=stored_filename) + return object_header['ContentType'] + except S3_ClientError: + return self.inferr_content_type(stored_filename) + + # Find content_type from Blob Storage + elif hasattr(default_storage, 'azure_container'): + blob_client = default_storage.client.get_blob_client(stored_filename) + blob_properties = blob_client.get_blob_properties() + return blob_properties.content_settings.content_type + + else: + return self.inferr_content_type(stored_filename) + def update(self, instance, validated_data): files_for_removal = list() for field in validated_data: - content_type = self.get_content_type(validated_data[field]) + old_file_name = validated_data[field] + content_type = self.get_content_type(old_file_name) + fname = path.basename(old_file_name) + new_file_name = default_storage.get_alternative_name(fname, '') + # S3 storage - File copy needed if hasattr(default_storage, 'bucket'): - fname = path.basename(validated_data[field]) new_file = ContentFile(b'') - new_file.name = default_storage.get_alternative_name(fname, '') + new_file.name = new_file_name new_related_file = RelatedFile.objects.create( file=new_file, filename=fname, @@ -292,17 +317,16 @@ def update(self, instance, validated_data): ) bucket = default_storage.bucket stored_file = default_storage.open(new_related_file.file.name) - stored_file.obj.copy({"Bucket": bucket.name, "Key": validated_data[field]}) + stored_file.obj.copy({"Bucket": bucket.name, "Key": old_file_name}) stored_file.obj.wait_until_exists() elif hasattr(default_storage, 'azure_container'): # https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-copy?tabs=python - fname = path.basename(validated_data[field]) - new_file_name = default_storage.get_alternative_name(validated_data[field], '') + new_file_name = default_storage.get_alternative_name(old_file_name, '') new_blobname = path.basename(new_file_name) # Copies a blob asynchronously. - source_blob = default_storage.client.get_blob_client(validated_data[field]) + source_blob = default_storage.client.get_blob_client(old_file_name) dest_blob = default_storage.client.get_blob_client(new_file_name) try: @@ -327,11 +351,11 @@ def update(self, instance, validated_data): # Shared-fs else: - stored_file = default_storage.open(validated_data[field]) - new_file = File(stored_file, name=validated_data[field]) + stored_file = default_storage.open(old_file_name) + new_file = File(stored_file, name=new_file_name) new_related_file = RelatedFile.objects.create( file=new_file, - filename=validated_data[field], + filename=fname, content_type=content_type, creator=self.context['request'].user, store_as_filename=True, From 685803c4190b3ec3555dbf3e23bfac6c4554ea5d Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Thu, 18 Jan 2024 11:24:03 +0000 Subject: [PATCH 02/10] Update v2 file linking --- .../oasisapi/portfolios/v1_api/serializers.py | 8 ++++---- .../oasisapi/portfolios/v2_api/serializers.py | 20 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/server/oasisapi/portfolios/v1_api/serializers.py b/src/server/oasisapi/portfolios/v1_api/serializers.py index d3784574e..bedcbd94c 100644 --- a/src/server/oasisapi/portfolios/v1_api/serializers.py +++ b/src/server/oasisapi/portfolios/v1_api/serializers.py @@ -323,11 +323,11 @@ def update(self, instance, validated_data): elif hasattr(default_storage, 'azure_container'): # https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-copy?tabs=python new_file_name = default_storage.get_alternative_name(old_file_name, '') - new_blobname = path.basename(new_file_name) + new_blobname = '/'.join([default_storage.location, path.basename(new_file_name)]) # Copies a blob asynchronously. source_blob = default_storage.client.get_blob_client(old_file_name) - dest_blob = default_storage.client.get_blob_client(new_file_name) + dest_blob = default_storage.client.get_blob_client(new_blobname) try: lease = BlobLeaseClient(source_blob) @@ -340,9 +340,9 @@ def update(self, instance, validated_data): lease.break_lease() raise e - stored_blob = default_storage.open(new_blobname) + stored_blob = default_storage.open(new_file_name) new_related_file = RelatedFile.objects.create( - file=File(stored_blob, name=new_blobname), + file=File(stored_blob, name=new_file_name), filename=fname, content_type=content_type, creator=self.context['request'].user, diff --git a/src/server/oasisapi/portfolios/v2_api/serializers.py b/src/server/oasisapi/portfolios/v2_api/serializers.py index 04442a446..f1e600d46 100644 --- a/src/server/oasisapi/portfolios/v2_api/serializers.py +++ b/src/server/oasisapi/portfolios/v2_api/serializers.py @@ -324,13 +324,14 @@ def update(self, instance, validated_data): files_for_removal = list() user = self.context['request'].user for field in validated_data: - content_type = self.get_content_type(validated_data[field]) + old_file_name = validated_data[field] + content_type = self.get_content_type(old_file_name) + fname = path.basename(old_file_name) + new_file_name = default_storage.get_alternative_name(fname, '') # S3 storage - File copy needed if hasattr(default_storage, 'bucket'): - fname = path.basename(validated_data[field]) new_file = ContentFile(b'') - new_file.name = default_storage.get_alternative_name(fname, '') new_related_file = RelatedFile.objects.create( file=new_file, filename=fname, @@ -347,12 +348,11 @@ def update(self, instance, validated_data): elif hasattr(default_storage, 'azure_container'): # https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-copy?tabs=python - fname = path.basename(validated_data[field]) - new_file_name = default_storage.get_alternative_name(validated_data[field], '') + new_file_name = default_storage.get_alternative_name(old_file_name, '') new_blobname = '/'.join([default_storage.location, path.basename(new_file_name)]) # Copies a blob asynchronously. - source_blob = default_storage.client.get_blob_client(validated_data[field]) + source_blob = default_storage.client.get_blob_client(old_file_name) dest_blob = default_storage.client.get_blob_client(new_blobname) try: @@ -371,17 +371,17 @@ def update(self, instance, validated_data): file=File(stored_blob, name=new_file_name), filename=fname, content_type=content_type, - creator=self.context['request'].user, + creator=user, store_as_filename=True, ) # Shared-fs else: - stored_file = default_storage.open(validated_data[field]) - new_file = File(stored_file, name=validated_data[field]) + stored_file = default_storage.open(old_file_name) + new_file = File(stored_file, name=new_file_name) new_related_file = RelatedFile.objects.create( file=new_file, - filename=validated_data[field], + filename=fname, content_type=content_type, creator=user, store_as_filename=True, From bbc9001ad8264654a129ce3abd0fbb3893682a99 Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Thu, 18 Jan 2024 11:25:29 +0000 Subject: [PATCH 03/10] PEP --- src/server/oasisapi/portfolios/v1_api/serializers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/server/oasisapi/portfolios/v1_api/serializers.py b/src/server/oasisapi/portfolios/v1_api/serializers.py index bedcbd94c..c79494461 100644 --- a/src/server/oasisapi/portfolios/v1_api/serializers.py +++ b/src/server/oasisapi/portfolios/v1_api/serializers.py @@ -294,7 +294,6 @@ def get_content_type(self, stored_filename): else: return self.inferr_content_type(stored_filename) - def update(self, instance, validated_data): files_for_removal = list() From bda02402b76cf6cb1101e7c72e76bb5ec137e651 Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Thu, 18 Jan 2024 11:57:54 +0000 Subject: [PATCH 04/10] Add missing S3 storage check --- .github/workflows/test-images.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/test-images.yml b/.github/workflows/test-images.yml index 17fd00fd6..61244b471 100644 --- a/.github/workflows/test-images.yml +++ b/.github/workflows/test-images.yml @@ -180,6 +180,22 @@ jobs: pytest_opts: "--docker-compose=./docker/plat2-v2.docker-compose.yml " storage_suffix: '-all-checks' + storage_s3: + name: Storage Compatibility (S3) + secrets: inherit + needs: [setup] + uses: OasisLMF/OasisPiWind/.github/workflows/integration.yml@main + with: + piwind_branch: ${{ needs.setup.outputs.piwind_branch }} + server_image: ${{ needs.setup.outputs.build_server_img }} + server_tag: ${{ needs.setup.outputs.build_server_tag }} + worker_image: ${{ needs.setup.outputs.build_worker_img }} + worker_tag: ${{ needs.setup.outputs.build_worker_tag }} + worker_api_ver: 'v1' + debug_mode: 1 + pytest_opts: "--docker-compose=./docker/s3.docker-compose.yml ${{ needs.setup.outputs.pytest_opts }}" + storage_suffix: '_s3' + worker_debian: name: Worker Debian secrets: inherit @@ -259,3 +275,4 @@ jobs: debug_mode: 1 pytest_opts: "--docker-compose=./docker/plat2-v2.docker-compose.yml ${{ needs.setup.outputs.pytest_opts }}" storage_suffix: "_worker-${{ needs.setup.outputs.release_stable_28 }}" + From 90c5199202706ae26b0a67a37175420d34eba6ef Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Thu, 18 Jan 2024 12:43:53 +0000 Subject: [PATCH 05/10] Update image testing - S3 storage --- .github/workflows/test-images.yml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-images.yml b/.github/workflows/test-images.yml index 61244b471..23626be60 100644 --- a/.github/workflows/test-images.yml +++ b/.github/workflows/test-images.yml @@ -180,7 +180,7 @@ jobs: pytest_opts: "--docker-compose=./docker/plat2-v2.docker-compose.yml " storage_suffix: '-all-checks' - storage_s3: + storage_s3_v1: name: Storage Compatibility (S3) secrets: inherit needs: [setup] @@ -193,7 +193,23 @@ jobs: worker_tag: ${{ needs.setup.outputs.build_worker_tag }} worker_api_ver: 'v1' debug_mode: 1 - pytest_opts: "--docker-compose=./docker/s3.docker-compose.yml ${{ needs.setup.outputs.pytest_opts }}" + pytest_opts: "--docker-compose=./docker/plat2-v2.s3.docker-compose.yml ${{ needs.setup.outputs.pytest_opts }}" + storage_suffix: '_s3' + + storage_s3_v2: + name: Storage Compatibility (S3) + secrets: inherit + needs: [setup] + uses: OasisLMF/OasisPiWind/.github/workflows/integration.yml@main + with: + piwind_branch: ${{ needs.setup.outputs.piwind_branch }} + server_image: ${{ needs.setup.outputs.build_server_img }} + server_tag: ${{ needs.setup.outputs.build_server_tag }} + worker_image: ${{ needs.setup.outputs.build_worker_img }} + worker_tag: ${{ needs.setup.outputs.build_worker_tag }} + worker_api_ver: 'v2' + debug_mode: 1 + pytest_opts: "--docker-compose=./docker/plat2-v2.s3.docker-compose.yml ${{ needs.setup.outputs.pytest_opts }}" storage_suffix: '_s3' worker_debian: From 9dbce9903ffae4a629c7194c7f92bb1bc241199d Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Thu, 18 Jan 2024 13:18:56 +0000 Subject: [PATCH 06/10] retest From 2a7bcd5cf00295ccbf5f63ad00380af3e85597fd Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Thu, 18 Jan 2024 13:26:37 +0000 Subject: [PATCH 07/10] trigger test again From c173f357a29aaea949d1803f9c590f41327e517b Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Thu, 18 Jan 2024 13:42:00 +0000 Subject: [PATCH 08/10] Name S3 checks by version --- .github/workflows/test-images.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-images.yml b/.github/workflows/test-images.yml index 23626be60..1416571d7 100644 --- a/.github/workflows/test-images.yml +++ b/.github/workflows/test-images.yml @@ -181,7 +181,7 @@ jobs: storage_suffix: '-all-checks' storage_s3_v1: - name: Storage Compatibility (S3) + name: V1 Storage Compatibility (S3) secrets: inherit needs: [setup] uses: OasisLMF/OasisPiWind/.github/workflows/integration.yml@main @@ -197,7 +197,7 @@ jobs: storage_suffix: '_s3' storage_s3_v2: - name: Storage Compatibility (S3) + name: V2 Storage Compatibility (S3) secrets: inherit needs: [setup] uses: OasisLMF/OasisPiWind/.github/workflows/integration.yml@main From 0cc765abfa759bbfd996b412209da60000f58312 Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Fri, 19 Jan 2024 10:35:39 +0000 Subject: [PATCH 09/10] Test fix for input tar extract --- src/model_execution_worker/storage_manager.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/model_execution_worker/storage_manager.py b/src/model_execution_worker/storage_manager.py index 72ee4783d..a7ca17913 100755 --- a/src/model_execution_worker/storage_manager.py +++ b/src/model_execution_worker/storage_manager.py @@ -178,7 +178,7 @@ def _fetch_file(self, reference, output_path, subdir): raise MissingInputsException(fpath) def filepath(self, reference): - """ return the absolute filepath + """ return the absolute filepath """ fpath = os.path.join( self.media_root, @@ -204,11 +204,16 @@ def extract(self, archive_fp, directory, storage_subdir=''): temp_dir = tempfile.TemporaryDirectory() try: temp_dir_path = temp_dir.__enter__() - local_archive_path = self.get( - archive_fp, - os.path.join(temp_dir_path, os.path.basename(archive_fp)), - subdir=storage_subdir - ) + + if self._is_locally_stored(archive_fp): + local_archive_path = archive_fp + else: + local_archive_path = self.get( + archive_fp, + os.path.join(temp_dir_path, os.path.basename(archive_fp)), + subdir=storage_subdir + ) + with tarfile.open(local_archive_path) as f: f.extractall(directory) finally: From 3d14fb14af0cda2129e2e49cdeb7ec1b13f08b7f Mon Sep 17 00:00:00 2001 From: Sam Gamble Date: Fri, 19 Jan 2024 12:20:04 +0000 Subject: [PATCH 10/10] try fix again --- src/model_execution_worker/storage_manager.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/model_execution_worker/storage_manager.py b/src/model_execution_worker/storage_manager.py index a7ca17913..d6d68ad31 100755 --- a/src/model_execution_worker/storage_manager.py +++ b/src/model_execution_worker/storage_manager.py @@ -204,9 +204,8 @@ def extract(self, archive_fp, directory, storage_subdir=''): temp_dir = tempfile.TemporaryDirectory() try: temp_dir_path = temp_dir.__enter__() - - if self._is_locally_stored(archive_fp): - local_archive_path = archive_fp + if os.path.isfile(archive_fp): + local_archive_path = os.path.abspath(archive_fp) else: local_archive_path = self.get( archive_fp,