-
Notifications
You must be signed in to change notification settings - Fork 8
/
lgdModel.py
85 lines (60 loc) · 3.84 KB
/
lgdModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# (c) 2019 - 2024 Open Risk (https://www.openriskmanagement.com)
#
# openLGD is licensed under the Apache 2.0 license a copy of which is included
# in the source distribution of openLGD. This is notwithstanding any licenses of
# third-party software included in this distribution. You may not use this file except in
# compliance with the License.
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
# either express or implied. See the License for the specific language governing permissions and
# limitations under the License.
""" This module provides the statistical lgd estimation algorithms
* Currently linear regression using iterative stochastic gradient descent
"""
from sklearn import linear_model
from dataSource import dataSource
def lgdModel(server=1, choice=1, intercept=None, coef=None):
""" Iterate a generalized linear model
:param server: the id of the server
:type server: integer
:param intercept: an intercept parameter
:type intercept: float
:param coef: a coefficient
:type coef: float
Linear model fitted by minimizing a regularized empirical loss with SGD.
SGD stands for Stochastic Gradient Descent: the gradient of the loss is
estimated each sample at a time and the model is updated along the way with
a decreasing strength schedule (aka learning rate).
Parameters
tol : float or None, default=1e-3 The stopping criterion. If it is not None, training will stop when (loss > best_loss - tol) for ``n_iter_no_change`` consecutive epochs. Convergence is checked against the training loss or the validation loss depending on the `early_stopping` parameter.
max_iter : int, default=1000 The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the ``fit`` method, and not the :meth:`partial_fit` method.
verbose : int, default=0 The verbosity level.
warm_start : bool, default=False
When set to True, reuse the solution of the previous call to fit as initialization, otherwise, just erase the previous solution.
Repeatedly calling fit or partial_fit when warm_start is True can result in a different solution than when calling fit a single time because of the way the data is shuffled. If a dynamic learning rate is used, the learning rate is adapted depending on the number of samples already seen. Calling ``fit`` resets this counter, while ``partial_fit`` will result in increasing the existing counter.
early_stopping : bool, default=False
Whether to use early stopping to terminate training when validation score is not improving. If set to True, it will automatically set aside a fraction of training data as validation and terminate training when validation score returned by the `score` method is not improving by at least `tol` for `n_iter_no_change` consecutive epochs.
"""
# A linear LGD model to be estimated iteratively
clf = linear_model.SGDRegressor(tol=None, max_iter=1, verbose=0, warm_start=False,
early_stopping=False)
# The server ID
n = server
# Fetch data from desired datasource
# choice = 1 - from local file
# choice = 2 - from database via REST API
df = dataSource(n, choice)
# Extract explanatory and target variables
# X is target variable (E.g., Loss Severity or LGD)
X = df[['X']]
# y is dataframe of explanatory variables
y = df['Y']
# Estimate model (initial or update mode)
if intercept is None or coef is None:
clf.fit(X, y)
else:
clf.fit(X, y, intercept_init=intercept, coef_init=coef)
# Return the current parameter estimates
fitted_params = {'intercept': clf.intercept_[0], 'coefficient': clf.coef_[0]}
return fitted_params