Skip to content

Commit

Permalink
pass model directory as input to torchserve (#118)
Browse files Browse the repository at this point in the history
* update torchserve

* remove repackaging fn

* update assert check

* update torchserve in general dockerfile

* test with dlc dockerfiles

* uninstall auto confirmation

* uninstall auto confirmation

* run only gpu tests

* run all integ tests

* set default service handler in ts config file

* test

* test

* revert passing handler service to ts config

* Revert "revert passing handler service to ts config"

This reverts commit d62f5ff.

* add pytest logs

* build/push dockerfile

* pass handler fn

* skip unit test

* add logging to sm tests

* test

* test

* test

* fix flake8

* fix unit test

* test gpu sm generic

* skip sm integration tests with generic image

* test generic image

* enable all tests
  • Loading branch information
mseth10 committed Apr 6, 2022
1 parent 17613f1 commit 3fa2dea
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 116 deletions.
8 changes: 4 additions & 4 deletions buildspec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ phases:
- DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"

# run local CPU integration tests (build and push the image to ECR repo)
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
- test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"

# launch remote GPU instance
Expand All @@ -65,10 +65,10 @@ phases:
# run GPU local integration tests
- printf "$SETUP_CMDS" > $SETUP_FILE
# no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests
- generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
- generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
- dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
- dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
- test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
- execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"

Expand Down
48 changes: 12 additions & 36 deletions src/sagemaker_pytorch_serving_container/torchserve.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,12 @@

import sagemaker_pytorch_serving_container
from sagemaker_pytorch_serving_container import ts_environment
from sagemaker_inference import default_handler_service, environment, utils
from sagemaker_inference import environment, utils
from sagemaker_inference.environment import code_dir

logger = logging.getLogger()

TS_CONFIG_FILE = os.path.join("/etc", "sagemaker-ts.properties")
DEFAULT_HANDLER_SERVICE = default_handler_service.__name__
DEFAULT_TS_CONFIG_FILE = pkg_resources.resource_filename(
sagemaker_pytorch_serving_container.__name__, "/etc/default-ts.properties"
)
Expand All @@ -41,13 +40,11 @@
DEFAULT_TS_LOG_FILE = pkg_resources.resource_filename(
sagemaker_pytorch_serving_container.__name__, "/etc/log4j2.xml"
)
DEFAULT_TS_MODEL_DIRECTORY = os.path.join(os.getcwd(), ".sagemaker", "ts", "models")
DEFAULT_TS_MODEL_NAME = "model"
DEFAULT_TS_CODE_DIR = "code"
DEFAULT_HANDLER_SERVICE = "sagemaker_pytorch_serving_container.handler_service"

ENABLE_MULTI_MODEL = os.getenv("SAGEMAKER_MULTI_MODEL", "false") == "true"
MODEL_STORE = "/" if ENABLE_MULTI_MODEL else DEFAULT_TS_MODEL_DIRECTORY
MODEL_STORE = "/" if ENABLE_MULTI_MODEL else os.path.join(os.getcwd(), ".sagemaker", "ts", "models")

PYTHON_PATH_ENV = "PYTHONPATH"
REQUIREMENTS_PATH = os.path.join(code_dir, "requirements.txt")
Expand All @@ -73,11 +70,13 @@ def start_torchserve(handler_service=DEFAULT_HANDLER_SERVICE):
if ENABLE_MULTI_MODEL:
if "SAGEMAKER_HANDLER" not in os.environ:
os.environ["SAGEMAKER_HANDLER"] = handler_service
_set_python_path()
else:
_adapt_to_ts_format(handler_service)
if not os.path.exists(MODEL_STORE):
os.makedirs(MODEL_STORE)

_create_torchserve_config_file()
_set_python_path()

_create_torchserve_config_file(handler_service)

if os.path.exists(REQUIREMENTS_PATH):
_install_requirements()
Expand All @@ -92,7 +91,7 @@ def start_torchserve(handler_service=DEFAULT_HANDLER_SERVICE):
"--log-config",
DEFAULT_TS_LOG_FILE,
"--models",
"model.mar"
DEFAULT_TS_MODEL_NAME + "=" + environment.model_dir
]

print(ts_torchserve_cmd)
Expand All @@ -107,30 +106,6 @@ def start_torchserve(handler_service=DEFAULT_HANDLER_SERVICE):
ts_process.wait()


def _adapt_to_ts_format(handler_service):
if not os.path.exists(DEFAULT_TS_MODEL_DIRECTORY):
os.makedirs(DEFAULT_TS_MODEL_DIRECTORY)

model_archiver_cmd = [
"torch-model-archiver",
"--model-name",
DEFAULT_TS_MODEL_NAME,
"--handler",
handler_service,
"--export-path",
DEFAULT_TS_MODEL_DIRECTORY,
"--version",
"1",
"--extra-files",
os.path.join(environment.model_dir)
]

logger.info(model_archiver_cmd)
subprocess.check_call(model_archiver_cmd)

_set_python_path()


def _set_python_path():
# Torchserve handles code execution by appending the export path, provided
# to the model archiver, to the PYTHONPATH env var.
Expand All @@ -142,19 +117,20 @@ def _set_python_path():
os.environ[PYTHON_PATH_ENV] = environment.code_dir


def _create_torchserve_config_file():
configuration_properties = _generate_ts_config_properties()
def _create_torchserve_config_file(handler_service):
configuration_properties = _generate_ts_config_properties(handler_service)

utils.write_file(TS_CONFIG_FILE, configuration_properties)


def _generate_ts_config_properties():
def _generate_ts_config_properties(handler_service):
env = environment.Environment()
user_defined_configuration = {
"default_response_timeout": env.model_server_timeout,
"default_workers_per_model": env.model_server_workers,
"inference_address": "http://0.0.0.0:{}".format(env.inference_http_port),
"management_address": "http://0.0.0.0:{}".format(env.management_http_port),
"default_service_handler": handler_service + ":handle",
}

ts_env = ts_environment.TorchServeEnvironment()
Expand Down
3 changes: 3 additions & 0 deletions test/container/1.10.2/Dockerfile.dlc.cpu
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
ARG region
FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.10.2-cpu-py38-ubuntu20.04-sagemaker

RUN pip uninstall torchserve -y && \
pip install torchserve-nightly==2022.3.23.post2

COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
rm /sagemaker_pytorch_inference.tar.gz
3 changes: 3 additions & 0 deletions test/container/1.10.2/Dockerfile.dlc.gpu
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
ARG region
FROM 763104351884.dkr.ecr.$region.amazonaws.com/pytorch-inference:1.10.2-gpu-py38-cu113-ubuntu20.04-sagemaker

RUN pip uninstall torchserve -y && \
pip install torchserve-nightly==2022.3.23.post2

COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \
rm /sagemaker_pytorch_inference.tar.gz
2 changes: 1 addition & 1 deletion test/container/1.10.2/Dockerfile.pytorch
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ RUN apt-get update \
RUN conda install -c conda-forge opencv \
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3

RUN pip install torchserve==$TS_VERSION \
RUN pip install torchserve-nightly==2022.3.23.post2 \
&& pip install torch-model-archiver==$TS_ARCHIVER_VERSION

COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz
Expand Down
90 changes: 15 additions & 75 deletions test/unit/test_model_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
@patch("sagemaker_pytorch_serving_container.torchserve._install_requirements")
@patch("os.path.exists", return_value=True)
@patch("sagemaker_pytorch_serving_container.torchserve._create_torchserve_config_file")
@patch("sagemaker_pytorch_serving_container.torchserve._adapt_to_ts_format")
@patch("sagemaker_pytorch_serving_container.torchserve._set_python_path")
def test_start_torchserve_default_service_handler(
adapt,
set_python_path,
create_config,
exists,
install_requirements,
Expand All @@ -47,9 +47,8 @@ def test_start_torchserve_default_service_handler(
):
torchserve.start_torchserve()

adapt.assert_called_once_with(torchserve.DEFAULT_HANDLER_SERVICE)
create_config.assert_called_once_with()
exists.assert_called_once_with(REQUIREMENTS_PATH)
set_python_path.assert_called_once_with()
create_config.assert_called_once_with(torchserve.DEFAULT_HANDLER_SERVICE)
install_requirements.assert_called_once_with()

ts_model_server_cmd = [
Expand All @@ -62,7 +61,7 @@ def test_start_torchserve_default_service_handler(
"--log-config",
torchserve.DEFAULT_TS_LOG_FILE,
"--models",
"model.mar"
"model=/opt/ml/model"
]

subprocess_popen.assert_called_once_with(ts_model_server_cmd)
Expand All @@ -76,9 +75,9 @@ def test_start_torchserve_default_service_handler(
@patch("sagemaker_pytorch_serving_container.torchserve._install_requirements")
@patch("os.path.exists", return_value=True)
@patch("sagemaker_pytorch_serving_container.torchserve._create_torchserve_config_file")
@patch("sagemaker_pytorch_serving_container.torchserve._adapt_to_ts_format")
@patch("sagemaker_pytorch_serving_container.torchserve._set_python_path")
def test_start_torchserve_default_service_handler_multi_model(
adapt,
set_python_path,
create_config,
exists,
install_requirements,
Expand All @@ -90,7 +89,9 @@ def test_start_torchserve_default_service_handler_multi_model(
torchserve.ENABLE_MULTI_MODEL = True
torchserve.start_torchserve()
torchserve.ENABLE_MULTI_MODEL = False
create_config.assert_called_once_with()

set_python_path.assert_called_once_with()
create_config.assert_called_once_with(torchserve.DEFAULT_HANDLER_SERVICE)
exists.assert_called_once_with(REQUIREMENTS_PATH)
install_requirements.assert_called_once_with()

Expand All @@ -104,74 +105,13 @@ def test_start_torchserve_default_service_handler_multi_model(
"--log-config",
torchserve.DEFAULT_TS_LOG_FILE,
"--models",
"model.mar"
"model=/opt/ml/model"
]

subprocess_popen.assert_called_once_with(ts_model_server_cmd)
sigterm.assert_called_once_with(retrieve.return_value)


@patch("subprocess.call")
@patch("subprocess.Popen")
@patch("sagemaker_pytorch_serving_container.torchserve._retrieve_ts_server_process")
@patch("sagemaker_pytorch_serving_container.torchserve._add_sigterm_handler")
@patch("sagemaker_pytorch_serving_container.torchserve._create_torchserve_config_file")
@patch("sagemaker_pytorch_serving_container.torchserve._adapt_to_ts_format")
def test_start_torchserve_custom_handler_service(
adapt, create_config, sigterm, retrieve, subprocess_popen, subprocess_call
):
handler_service = Mock()

torchserve.start_torchserve(handler_service)

adapt.assert_called_once_with(handler_service)


@patch("sagemaker_pytorch_serving_container.torchserve._set_python_path")
@patch("subprocess.check_call")
@patch("os.makedirs")
@patch("os.path.exists", return_value=False)
def test_adapt_to_ts_format(path_exists, make_dir, subprocess_check_call, set_python_path):
handler_service = Mock()

torchserve._adapt_to_ts_format(handler_service)

path_exists.assert_called_once_with(torchserve.DEFAULT_TS_MODEL_DIRECTORY)
make_dir.assert_called_once_with(torchserve.DEFAULT_TS_MODEL_DIRECTORY)

model_archiver_cmd = [
"torch-model-archiver",
"--model-name",
torchserve.DEFAULT_TS_MODEL_NAME,
"--handler",
handler_service,
"--export-path",
torchserve.DEFAULT_TS_MODEL_DIRECTORY,
"--version",
"1",
"--extra-files",
environment.model_dir
]

subprocess_check_call.assert_called_once_with(model_archiver_cmd)
set_python_path.assert_called_once_with()


@patch("sagemaker_pytorch_serving_container.torchserve._set_python_path")
@patch("subprocess.check_call")
@patch("os.makedirs")
@patch("os.path.exists", return_value=True)
def test_adapt_to_ts_format_existing_path(
path_exists, make_dir, subprocess_check_call, set_python_path
):
handler_service = Mock()

torchserve._adapt_to_ts_format(handler_service)

path_exists.assert_called_once_with(torchserve.DEFAULT_TS_MODEL_DIRECTORY)
make_dir.assert_not_called()


@patch.dict(os.environ, {torchserve.PYTHON_PATH_ENV: PYTHON_PATH}, clear=True)
def test_set_existing_python_path():
torchserve._set_python_path()
Expand All @@ -193,7 +133,7 @@ def test_new_python_path():
@patch("sagemaker_pytorch_serving_container.torchserve._generate_ts_config_properties")
@patch("sagemaker_inference.utils.write_file")
def test_create_torchserve_config_file(write_file, generate_ts_config_props):
torchserve._create_torchserve_config_file()
torchserve._create_torchserve_config_file(torchserve.DEFAULT_HANDLER_SERVICE)

write_file.assert_called_once_with(
torchserve.TS_CONFIG_FILE, generate_ts_config_props.return_value
Expand All @@ -211,7 +151,7 @@ def test_generate_ts_config_properties(env, read_file):
env.return_value.model_sever_workerse = model_server_workers
env.return_value.inference_http_port = http_port

ts_config_properties = torchserve._generate_ts_config_properties()
ts_config_properties = torchserve._generate_ts_config_properties(torchserve.DEFAULT_HANDLER_SERVICE)

inference_address = "inference_address=http://0.0.0.0:{}\n".format(http_port)
server_timeout = "default_response_timeout={}\n".format(model_server_timeout)
Expand All @@ -228,7 +168,7 @@ def test_generate_ts_config_properties(env, read_file):
def test_generate_ts_config_properties_default_workers(env, read_file):
env.return_value.model_server_workers = None

ts_config_properties = torchserve._generate_ts_config_properties()
ts_config_properties = torchserve._generate_ts_config_properties(torchserve.DEFAULT_HANDLER_SERVICE)

workers = "default_workers_per_model={}".format(None)

Expand All @@ -244,7 +184,7 @@ def test_generate_ts_config_properties_multi_model(env, read_file):
env.return_value.model_server_workers = None

torchserve.ENABLE_MULTI_MODEL = True
ts_config_properties = torchserve._generate_ts_config_properties()
ts_config_properties = torchserve._generate_ts_config_properties(torchserve.DEFAULT_HANDLER_SERVICE)
torchserve.ENABLE_MULTI_MODEL = False

workers = "default_workers_per_model={}".format(None)
Expand Down

0 comments on commit 3fa2dea

Please sign in to comment.