diff --git a/.github/workflows/test-images.yml b/.github/workflows/test-images.yml index 17fd00fd6..1416571d7 100644 --- a/.github/workflows/test-images.yml +++ b/.github/workflows/test-images.yml @@ -180,6 +180,38 @@ jobs: pytest_opts: "--docker-compose=./docker/plat2-v2.docker-compose.yml " storage_suffix: '-all-checks' + storage_s3_v1: + name: V1 Storage Compatibility (S3) + secrets: inherit + needs: [setup] + uses: OasisLMF/OasisPiWind/.github/workflows/integration.yml@main + with: + piwind_branch: ${{ needs.setup.outputs.piwind_branch }} + server_image: ${{ needs.setup.outputs.build_server_img }} + server_tag: ${{ needs.setup.outputs.build_server_tag }} + worker_image: ${{ needs.setup.outputs.build_worker_img }} + worker_tag: ${{ needs.setup.outputs.build_worker_tag }} + worker_api_ver: 'v1' + debug_mode: 1 + pytest_opts: "--docker-compose=./docker/plat2-v2.s3.docker-compose.yml ${{ needs.setup.outputs.pytest_opts }}" + storage_suffix: '_s3' + + storage_s3_v2: + name: V2 Storage Compatibility (S3) + secrets: inherit + needs: [setup] + uses: OasisLMF/OasisPiWind/.github/workflows/integration.yml@main + with: + piwind_branch: ${{ needs.setup.outputs.piwind_branch }} + server_image: ${{ needs.setup.outputs.build_server_img }} + server_tag: ${{ needs.setup.outputs.build_server_tag }} + worker_image: ${{ needs.setup.outputs.build_worker_img }} + worker_tag: ${{ needs.setup.outputs.build_worker_tag }} + worker_api_ver: 'v2' + debug_mode: 1 + pytest_opts: "--docker-compose=./docker/plat2-v2.s3.docker-compose.yml ${{ needs.setup.outputs.pytest_opts }}" + storage_suffix: '_s3' + worker_debian: name: Worker Debian secrets: inherit @@ -259,3 +291,4 @@ jobs: debug_mode: 1 pytest_opts: "--docker-compose=./docker/plat2-v2.docker-compose.yml ${{ needs.setup.outputs.pytest_opts }}" storage_suffix: "_worker-${{ needs.setup.outputs.release_stable_28 }}" + diff --git a/src/model_execution_worker/storage_manager.py b/src/model_execution_worker/storage_manager.py index 72ee4783d..d6d68ad31 100755 --- a/src/model_execution_worker/storage_manager.py +++ b/src/model_execution_worker/storage_manager.py @@ -178,7 +178,7 @@ def _fetch_file(self, reference, output_path, subdir): raise MissingInputsException(fpath) def filepath(self, reference): - """ return the absolute filepath + """ return the absolute filepath """ fpath = os.path.join( self.media_root, @@ -204,11 +204,15 @@ def extract(self, archive_fp, directory, storage_subdir=''): temp_dir = tempfile.TemporaryDirectory() try: temp_dir_path = temp_dir.__enter__() - local_archive_path = self.get( - archive_fp, - os.path.join(temp_dir_path, os.path.basename(archive_fp)), - subdir=storage_subdir - ) + if os.path.isfile(archive_fp): + local_archive_path = os.path.abspath(archive_fp) + else: + local_archive_path = self.get( + archive_fp, + os.path.join(temp_dir_path, os.path.basename(archive_fp)), + subdir=storage_subdir + ) + with tarfile.open(local_archive_path) as f: f.extractall(directory) finally: diff --git a/src/server/oasisapi/portfolios/v1_api/serializers.py b/src/server/oasisapi/portfolios/v1_api/serializers.py index 5cb1d8284..c79494461 100644 --- a/src/server/oasisapi/portfolios/v1_api/serializers.py +++ b/src/server/oasisapi/portfolios/v1_api/serializers.py @@ -1,4 +1,6 @@ from os import path +import mimetypes + from drf_yasg.utils import swagger_serializer_method from rest_framework import serializers from rest_framework.exceptions import ValidationError @@ -260,29 +262,51 @@ def validate(self, attrs): raise serializers.ValidationError(errors) return super(PortfolioStorageSerializer, self).validate(attrs) + def inferr_content_type(self, stored_filename): + inferred_type = mimetypes.MimeTypes().guess_type(stored_filename)[0] + if not inferred_type and stored_filename.lower().endswith('parquet'): + # mimetypes dosn't work for parquet so handle that here + inferred_type = 'application/octet-stream' + if not inferred_type: + inferred_type = default_storage.default_content_type + return inferred_type + def get_content_type(self, stored_filename): try: # fetch content_type stored in Django's DB return RelatedFile.objects.get(file=path.basename(stored_filename)).content_type except ObjectDoesNotExist: - try: # Find content_type from S3 Object header - object_header = default_storage.connection.meta.client.head_object( - Bucket=default_storage.bucket_name, - Key=stored_filename) - return object_header['ContentType'] - except ClientError: - # fallback to the default content_type - return default_storage.default_content_type + # Find content_type from S3 Object header + if hasattr(default_storage, 'bucket'): + try: + object_header = default_storage.connection.meta.client.head_object( + Bucket=default_storage.bucket_name, + Key=stored_filename) + return object_header['ContentType'] + except S3_ClientError: + return self.inferr_content_type(stored_filename) + + # Find content_type from Blob Storage + elif hasattr(default_storage, 'azure_container'): + blob_client = default_storage.client.get_blob_client(stored_filename) + blob_properties = blob_client.get_blob_properties() + return blob_properties.content_settings.content_type + + else: + return self.inferr_content_type(stored_filename) def update(self, instance, validated_data): files_for_removal = list() for field in validated_data: - content_type = self.get_content_type(validated_data[field]) + old_file_name = validated_data[field] + content_type = self.get_content_type(old_file_name) + fname = path.basename(old_file_name) + new_file_name = default_storage.get_alternative_name(fname, '') + # S3 storage - File copy needed if hasattr(default_storage, 'bucket'): - fname = path.basename(validated_data[field]) new_file = ContentFile(b'') - new_file.name = default_storage.get_alternative_name(fname, '') + new_file.name = new_file_name new_related_file = RelatedFile.objects.create( file=new_file, filename=fname, @@ -292,18 +316,17 @@ def update(self, instance, validated_data): ) bucket = default_storage.bucket stored_file = default_storage.open(new_related_file.file.name) - stored_file.obj.copy({"Bucket": bucket.name, "Key": validated_data[field]}) + stored_file.obj.copy({"Bucket": bucket.name, "Key": old_file_name}) stored_file.obj.wait_until_exists() elif hasattr(default_storage, 'azure_container'): # https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-copy?tabs=python - fname = path.basename(validated_data[field]) - new_file_name = default_storage.get_alternative_name(validated_data[field], '') - new_blobname = path.basename(new_file_name) + new_file_name = default_storage.get_alternative_name(old_file_name, '') + new_blobname = '/'.join([default_storage.location, path.basename(new_file_name)]) # Copies a blob asynchronously. - source_blob = default_storage.client.get_blob_client(validated_data[field]) - dest_blob = default_storage.client.get_blob_client(new_file_name) + source_blob = default_storage.client.get_blob_client(old_file_name) + dest_blob = default_storage.client.get_blob_client(new_blobname) try: lease = BlobLeaseClient(source_blob) @@ -316,9 +339,9 @@ def update(self, instance, validated_data): lease.break_lease() raise e - stored_blob = default_storage.open(new_blobname) + stored_blob = default_storage.open(new_file_name) new_related_file = RelatedFile.objects.create( - file=File(stored_blob, name=new_blobname), + file=File(stored_blob, name=new_file_name), filename=fname, content_type=content_type, creator=self.context['request'].user, @@ -327,11 +350,11 @@ def update(self, instance, validated_data): # Shared-fs else: - stored_file = default_storage.open(validated_data[field]) - new_file = File(stored_file, name=validated_data[field]) + stored_file = default_storage.open(old_file_name) + new_file = File(stored_file, name=new_file_name) new_related_file = RelatedFile.objects.create( file=new_file, - filename=validated_data[field], + filename=fname, content_type=content_type, creator=self.context['request'].user, store_as_filename=True, diff --git a/src/server/oasisapi/portfolios/v2_api/serializers.py b/src/server/oasisapi/portfolios/v2_api/serializers.py index 04442a446..f1e600d46 100644 --- a/src/server/oasisapi/portfolios/v2_api/serializers.py +++ b/src/server/oasisapi/portfolios/v2_api/serializers.py @@ -324,13 +324,14 @@ def update(self, instance, validated_data): files_for_removal = list() user = self.context['request'].user for field in validated_data: - content_type = self.get_content_type(validated_data[field]) + old_file_name = validated_data[field] + content_type = self.get_content_type(old_file_name) + fname = path.basename(old_file_name) + new_file_name = default_storage.get_alternative_name(fname, '') # S3 storage - File copy needed if hasattr(default_storage, 'bucket'): - fname = path.basename(validated_data[field]) new_file = ContentFile(b'') - new_file.name = default_storage.get_alternative_name(fname, '') new_related_file = RelatedFile.objects.create( file=new_file, filename=fname, @@ -347,12 +348,11 @@ def update(self, instance, validated_data): elif hasattr(default_storage, 'azure_container'): # https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-copy?tabs=python - fname = path.basename(validated_data[field]) - new_file_name = default_storage.get_alternative_name(validated_data[field], '') + new_file_name = default_storage.get_alternative_name(old_file_name, '') new_blobname = '/'.join([default_storage.location, path.basename(new_file_name)]) # Copies a blob asynchronously. - source_blob = default_storage.client.get_blob_client(validated_data[field]) + source_blob = default_storage.client.get_blob_client(old_file_name) dest_blob = default_storage.client.get_blob_client(new_blobname) try: @@ -371,17 +371,17 @@ def update(self, instance, validated_data): file=File(stored_blob, name=new_file_name), filename=fname, content_type=content_type, - creator=self.context['request'].user, + creator=user, store_as_filename=True, ) # Shared-fs else: - stored_file = default_storage.open(validated_data[field]) - new_file = File(stored_file, name=validated_data[field]) + stored_file = default_storage.open(old_file_name) + new_file = File(stored_file, name=new_file_name) new_related_file = RelatedFile.objects.create( file=new_file, - filename=validated_data[field], + filename=fname, content_type=content_type, creator=user, store_as_filename=True,