-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinvoke_sagemaker.py
59 lines (50 loc) · 2.29 KB
/
invoke_sagemaker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# Reference: <https://aws.amazon.com/ko/blogs/machine-learning/scale-yolov5-inference-with-amazon-sagemaker-endpoints-and-aws-lambda/>
import os, logging, json, time, urllib.parse
import boto3, botocore
import numpy as np
import cv2
logger = logging.getLogger()
logger.setLevel(logging.INFO)
client = boto3.client('lambda')
# S3 BUCKETS DETAILS
s3 = boto3.resource('s3')
BUCKET_NAME = "<NAME OF S3 BUCKET FOR INPUT IMAGE>"
IMAGE_LOCATION = "<S3 PATH TO IMAGE>/image.png"
# INFERENCE ENDPOINT DETAILS
ENDPOINT_NAME = 'MY_SAGEMAKER_ENDPOINT_NAME'
_inference_waiting_timeout = 80
config = botocore.config.Config(read_timeout=_inference_waiting_timeout)
runtime = boto3.client('runtime.sagemaker', config=config)
modelHeight, modelWidth = 640, 640
# RUNNING LAMBDA
def lambda_handler(event, context):
key = urllib.parse.unquote_plus(event['Records'][0]['s3']['object']['key'], encoding='utf-8')
# INPUTS - Download Image file from S3 to Lambda /tmp/
input_imagename = key.split('/')[-1]
logger.info(f'Input Imagename: {input_imagename}')
s3.Bucket(BUCKET_NAME).download_file(IMAGE_LOCATION + '/' + input_imagename, '/tmp/' + input_imagename)
# INFERENCE - Invoke the SageMaker Inference Endpoint
logger.info(f'Starting Inference ... ')
orig_image = cv2.imread('/tmp/' + input_imagename)
if orig_image is not None:
start_time_iter = time.time()
# pre-processing input image
image = cv2.resize(orig_image.copy(), (modelWidth, modelHeight), interpolation = cv2.INTER_AREA)
data = np.array(image.astype(np.float32)/255.)
payload = json.dumps([data.tolist()])
# run inference
response = runtime.invoke_endpoint(EndpointName=ENDPOINT_NAME, ContentType='application/json', Body=payload)
# get the output results
result = json.loads(response['Body'].read().decode())
end_time_iter = time.time()
# get the total time taken for inference
inference_time = round((end_time_iter - start_time_iter)*100)/100
logger.info(f'Inference Completed ... ')
# OUTPUTS - Using the output to utilize in other services downstream
return {
"statusCode": 200,
"body": json.dumps({
"message": "Inference Time:// " + str(inference_time) + " seconds.",
"results": result
}),
}