forked from sassoftware/python-sasctl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
full_lifecycle.py
89 lines (67 loc) · 2.79 KB
/
full_lifecycle.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python
# encoding: utf-8
#
# Copyright © 2019, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import pandas as pd
import sklearn.datasets
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sasctl import Session
from sasctl.tasks import register_model, publish_model, update_model_performance
from sasctl.services import model_repository as mr
from sasctl.services import model_management as mm
data = sklearn.datasets.load_boston()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.DataFrame(data.target, columns=['Price'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# Establish a session with SAS Viya
Session('hostname', 'username', 'password')
project = 'Boston Housing'
model_name = 'Boston Regression'
# Fit a linear regression model using sci-kit learn
lm = LinearRegression()
lm.fit(X_train, y_train)
# Register the model in SAS Model Manager
register_model(lm,
model_name,
input=X_train, # Use X to determine model inputs
project=project, # Register in "Iris" project
force=True) # Create project if it doesn't exist
# Update project properties. Target variable must be set before performance
# definitions can be created.
project = mr.get_project(project)
project['targetVariable'] = 'Price'
project = mr.update_project(project)
# Publish the model to the real-time scoring engine
module_lm = publish_model(model_name, 'maslocal')
# Select the first row of testing data
x = X_test.iloc[0, :]
# Call the published module and score the record
result = module_lm.predict(x)
print(result)
# Build a second model
dt = DecisionTreeRegressor()
dt.fit(X_train, y_train)
# Register the second model in Model Manager
model_dt = register_model(dt, 'Decision Tree', project, input=X)
# Publish from Model Manager -> MAS
module_dt = publish_model(model_dt, 'maslocal')
# Use MAS to score some new data
result = module_dt.predict(x)
print(result)
# Instruct the project to look for tables in the "Public" CAS library with
# names starting with "boston_" and use these tables to track model
# performance over time.
mm.create_performance_definition(model_name, 'Public', 'boston')
# Model Manager can track model performance over time if provided with
# historical model observations & predictions. SIMULATE historical data by
# repeatedly sampling from the test set.
perf_df = X_test.copy()
perf_df['var1'] = lm.predict(X_test)
perf_df['Price'] = y
# For each (simulated) historical period, upload model results
for period in ('q1', 'q2', 'q3', 'q4'):
sample = perf_df.sample(frac=0.2)
update_model_performance(sample, model_name, period)