Fix integration tests and update Python versions (#154)

* Fix integration tests and update Python versions * Fix test dependency versions, change GPU instance type, update GPU MNIST script, add environment variable to GPU tests * Modify Dockerfiles and set default value of env_vars to None * Remove instance type from SageMaker integration tests * Add instance_type to SageMaker tests * Fix buildspec.yml
aws · Oct 9, 2023 · 1fbd3f7 · 1fbd3f7
1 parent c302b64
commit 1fbd3f7
Show file tree

Hide file tree

Showing 14 changed files with 188 additions and 107 deletions.
diff --git a/buildspec-release.yml b/buildspec-release.yml
@@ -12,7 +12,7 @@ phases:
       # run unit tests
       - AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_SESSION_TOKEN=
         AWS_CONTAINER_CREDENTIALS_RELATIVE_URI= AWS_DEFAULT_REGION=
-        tox -e py36,py37 -- test/unit
+        tox -e py38,py39,py310 -- test/unit
 
       # run local integ tests
       #- $(aws ecr get-login --no-include-email --region us-west-2)

diff --git a/buildspec.yml b/buildspec.yml
@@ -2,16 +2,16 @@ version: 0.2
 
 env:
   variables:
-    FRAMEWORK_VERSION: '1.10.2'
-    EIA_FRAMEWORK_VERSION: '1.3.1'
+    FRAMEWORK_VERSIONS: '2.0.0 2.0.1'
+    # EIA_FRAMEWORK_VERSION: '1.3.1'
     CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
-    GPU_INSTANCE_TYPE: 'ml.p3.8xlarge'
-    EIA_ACCELERATOR_TYPE: 'ml.eia2.medium'
+    GPU_INSTANCE_TYPE: 'ml.g4dn.12xlarge'
+    # EIA_ACCELERATOR_TYPE: 'ml.eia2.medium'
     ECR_REPO: 'sagemaker-test'
     GITHUB_REPO: 'sagemaker-pytorch-serving-container'
     DLC_ACCOUNT: '763104351884'
     SETUP_FILE: 'setup_cmds.sh'
-    SETUP_CMDS: '#!/bin/bash\npython3.6 -m pip install --upgrade pip\npython3.6 -m pip install -U -e .\npython3.6 -m pip install -U -e .[test]'
+    SETUP_CMDS: '#!/bin/bash\npython3.8 -m pip install --upgrade pip\npython3.8 -m pip install -U -e .\npython3.8 -m pip install -U -e .[test]'
 
 
 phases:
@@ -33,68 +33,81 @@ phases:
       - tox -e flake8,twine
 
       # run unit tests
-      - tox -e py36,py37 test/unit
+      - tox -e py38,py39,py310 test/unit
 
-      # define tags
-      - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID"
-      - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID"
-      - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID"
-      - DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"
+      # define EIA tag
+      # - DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"
 
       # run local CPU integration tests (build and push the image to ECR repo)
-      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
-      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
-
-      # launch remote GPU instance
+      - |
+        for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS; 
+          do
+            DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID";
+            test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG";
+            execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
+            docker system prune --all --force;
+          done
+
+      # launch remote GPU instance with Deep Learning AMI GPU PyTorch 1.9 (Ubuntu 20.04)
       - prefix='ml.'
       - instance_type=${GPU_INSTANCE_TYPE#"$prefix"}
-      - create-key-pair
-      - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest
-
+
       # build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test
       - python3 setup.py sdist
-      - build_dir="test/container/$FRAMEWORK_VERSION"
       - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
-      - docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION .
-      # push DLC GPU image to ECR
-      - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION)
-      - docker push $PREPROD_IMAGE:$DLC_GPU_TAG
-
-      # run GPU local integration tests
-      - printf "$SETUP_CMDS" > $SETUP_FILE
-      # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests
-      - generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
-      - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
-      - dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
-      - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
+      - |
+        for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS;
+          do
+            create-key-pair;
+            launch-ec2-instance --instance-type $instance_type --ami-name ami-03e3ef8c92fdb39ad;
+            DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID";
+            build_dir="test/container/$FRAMEWORK_VERSION";
+            docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION .;
+            $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION);
+            docker push $PREPROD_IMAGE:$DLC_GPU_TAG;
+            printf "$SETUP_CMDS" > $SETUP_FILE;
+            dlc_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG";
+            test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --python-version \"3.8\"";
+            execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
+            docker system prune --all --force;
+            cleanup-gpu-instances;
+            cleanup-key-pairs;
+          done
 
       # run CPU sagemaker integration tests
-      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
-      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
+      - |
+        for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS; 
+          do
+            DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID";
+            test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG";
+            execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
+            docker system prune --all --force;
+          done
 
       # run GPU sagemaker integration tests
-      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
-      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
+      - |
+        for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS;
+          do
+            DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID";
+            test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG";
+            execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg";
+            docker system prune --all --force;
+          done
 
       # run EIA sagemaker integration tests
-      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --build-image --push-image --dockerfile-type dlc.eia --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $EIA_FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --accelerator-type $EIA_ACCELERATOR_TYPE --tag $DLC_EIA_TAG"
-      - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
+      # - test_cmd="IGNORE_COVERAGE=- tox -e py38 -- test/integration/sagemaker --build-image --push-image --dockerfile-type dlc.eia --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $EIA_FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --accelerator-type $EIA_ACCELERATOR_TYPE --tag $DLC_EIA_TAG"
+      # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg"
 
     finally:
-      # shut down remote GPU instance
-      - cleanup-gpu-instances
-      - cleanup-key-pairs
 
       # remove ECR image
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG
-      - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_EIA_TAG
+      - |
+        for FRAMEWORK_VERSION in $FRAMEWORK_VERSIONS; 
+          do
+            DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID";
+            DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID";
+            aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG;
+            aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG;
+          done
+      
+      # - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_EIA_TAG
diff --git a/setup.py b/setup.py
@@ -45,19 +45,19 @@ def read(fname):
         "Natural Language :: English",
         "License :: OSI Approved :: Apache Software License",
         "Programming Language :: Python",
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3.6',
-        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10'
     ],
 
     # We don't declare our dependency on torch here because we build with
     # different packages for different variants
-    install_requires=['numpy', 'retrying', 'sagemaker-inference>=1.3.1'],
+    install_requires=['numpy==1.24.4', 'retrying==1.3.4', 'sagemaker-inference==1.10.0'],
     extras_require={
-        'test': ['boto3>=1.10.44', 'coverage==4.5.3', 'docker-compose==1.23.2', 'flake8==3.7.7', 'Flask==1.1.1',
-                 'mock==2.0.0', 'pytest==4.4.0', 'pytest-cov==2.7.1', 'pytest-xdist==1.28.0', 'PyYAML==3.10',
-                 'sagemaker==1.56.3', 'sagemaker-containers>=2.5.4', 'six==1.12.0', 'requests==2.20.0',
-                 'requests_mock==1.6.0', 'torch==1.6.0', 'torchvision==0.7.0', 'tox==3.7.0']
+        'test': ['boto3==1.28.60', 'coverage==7.3.2', 'docker-compose==1.29.2', 'flake8==6.1.0', 'Flask==3.0.0',
+                 'mock==5.1.0', 'pytest==7.4.2', 'pytest-cov==4.1.0', 'pytest-xdist==3.3.1', 'PyYAML==5.4.1',
+                 'sagemaker==2.125.0', 'six==1.16.0', 'requests==2.31.0',
+                 'requests_mock==1.11.0', 'torch==2.1.0', 'torchvision==0.16.0', 'tox==4.11.3']
     },
 
     entry_points={

diff --git a/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py b/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py
@@ -135,7 +135,7 @@ def default_output_fn(self, prediction, accept):
 
         Returns: output data serialized
         """
-        if type(prediction) == torch.Tensor:
+        if type(prediction) is torch.Tensor:
             prediction = prediction.detach().cpu().numpy().tolist()
 
         for content_type in utils.parse_accept(accept):

diff --git a/test/conftest.py b/test/conftest.py
@@ -45,15 +45,16 @@ def pytest_addoption(parser):
     parser.addoption('--build-image', '-B', action='store_true')
     parser.addoption('--push-image', '-P', action='store_true')
     parser.addoption('--dockerfile-type', '-T',
-                     choices=['dlc.cpu', 'dlc.gpu', 'dlc.eia', 'pytorch'],
-                     default='pytorch')
+                     # choices=['dlc.cpu', 'dlc.gpu', 'dlc.eia', 'pytorch'],
+                     choices=['dlc.cpu', 'dlc.gpu'],
+                     default='dlc.cpu')
     parser.addoption('--dockerfile', '-D', default=None)
     parser.addoption('--aws-id', default=None)
     parser.addoption('--instance-type')
     parser.addoption('--accelerator-type')
     parser.addoption('--docker-base-name', default='sagemaker-pytorch-inference')
     parser.addoption('--region', default='us-west-2')
-    parser.addoption('--framework-version', default="1.6.0")
+    parser.addoption('--framework-version', default="2.0.0")
     parser.addoption('--py-version', choices=['2', '3'], default='3')
     # Processor is still "cpu" for EIA tests
     parser.addoption('--processor', choices=['gpu', 'cpu'], default='cpu')

diff --git a/test/container/2.0.0/Dockerfile.dlc.cpu b/test/container/2.0.0/Dockerfile.dlc.cpu
@@ -0,0 +1,8 @@
+ARG region
+FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:2.0.0-cpu-py310-ubuntu20.04-sagemaker
+
+COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
+
+RUN pip uninstall -y sagemaker_pytorch_inference && \
+    pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
+    rm /sagemaker_pytorch_inference.tar.gz
diff --git a/test/container/2.0.0/Dockerfile.dlc.gpu b/test/container/2.0.0/Dockerfile.dlc.gpu
@@ -0,0 +1,8 @@
+ARG region
+FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:2.0.0-gpu-py310-cu118-ubuntu20.04-sagemaker
+
+COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
+
+RUN pip uninstall -y sagemaker_pytorch_inference && \
+    pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
+    rm /sagemaker_pytorch_inference.tar.gz
diff --git a/test/container/2.0.1/Dockerfile.dlc.cpu b/test/container/2.0.1/Dockerfile.dlc.cpu
@@ -0,0 +1,8 @@
+ARG region
+FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:2.0.1-cpu-py310-ubuntu20.04-sagemaker
+
+COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
+
+RUN pip uninstall -y sagemaker_pytorch_inference && \
+    pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
+    rm /sagemaker_pytorch_inference.tar.gz
diff --git a/test/container/2.0.1/Dockerfile.dlc.gpu b/test/container/2.0.1/Dockerfile.dlc.gpu
@@ -0,0 +1,8 @@
+ARG region
+FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:2.0.1-gpu-py310-cu118-ubuntu20.04-sagemaker
+
+COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
+
+RUN pip uninstall -y sagemaker_pytorch_inference && \
+    pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
+    rm /sagemaker_pytorch_inference.tar.gz