Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature json serialization #129

Open
wants to merge 40 commits into
base: cython-wrapper
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
d818cee
init serialization
AlexJoz Nov 30, 2017
a2bf716
submodule update
AlexJoz Nov 30, 2017
14756ac
temp test change
AlexJoz Nov 30, 2017
0ee1e50
upd submodule
AlexJoz Nov 30, 2017
51bd404
upd submodule
AlexJoz Dec 2, 2017
c3eb7a7
update core2
Feb 13, 2018
654bdf5
use docker
Feb 14, 2018
de5d0a3
add .
Feb 14, 2018
1febbcb
update
Feb 14, 2018
ec484c1
cache externals
Feb 14, 2018
385abdd
cache only openblas
Feb 14, 2018
3b29ad3
mv to before_install
Feb 14, 2018
028da05
rm cache
Feb 14, 2018
99fcae2
smoke test
Feb 14, 2018
04a9fa4
rm smoke test
Feb 14, 2018
25138f3
clean out Dockerfile
Feb 14, 2018
f3b269e
Merge pull request #131 from ibayer/fix_json
Feb 14, 2018
9fe4170
update fastFM-core2
Aug 7, 2018
3eef4be
Merge pull request #139 from ibayer/update_core
Aug 7, 2018
3b2ca6c
update core2
Aug 20, 2018
a6a022c
add loss and solver to json
Aug 21, 2018
8171cf7
Merge pull request #140 from ibayer/update_core2
Aug 21, 2018
d7517f9
update core2
Aug 22, 2018
ed32da0
add factory methods for Data and Model
Aug 22, 2018
0f75dba
add col-major support to cpp fit
Aug 22, 2018
1f0d958
add test for cpp sgd
Aug 22, 2018
204db6f
fix train_test_split depreciation warining
Aug 22, 2018
b30f70a
replace predict with cpp version
Aug 23, 2018
c9daed8
put intercept in np array
Aug 29, 2018
f8d4eec
replace sgd solver (use core2 now)
Aug 29, 2018
230e688
remove sgd wrapper for C solver
Aug 29, 2018
53577c5
Merge pull request #142 from ibayer/replace_sgd_solver
Aug 29, 2018
916ad29
update core2
Sep 21, 2018
fae6d7a
upgrade cd regression to core2
Sep 21, 2018
6d928e0
refactor class predict
Sep 21, 2018
bda4cad
rm als core (C) code
Sep 21, 2018
2832f59
Merge pull request #143 from ibayer/irls
Oct 7, 2018
8074170
update fastfm-core2
Oct 14, 2018
7b145d5
fix import
Oct 14, 2018
4cc2fa4
Merge pull request #144 from ibayer/update_core2
Oct 14, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
put intercept in np array
Immanuel Bayer committed Aug 29, 2018
commit c9daed86fc9b269989cd1fb151afc4a8b709292f
2 changes: 2 additions & 0 deletions fastFM/als.py
Original file line number Diff line number Diff line change
@@ -91,6 +91,7 @@ def fit(self, X_train, y_train, n_more_iter=0):
self.warm_start = True

self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train)
self.w0_ = np.array([self.w0_], dtype=np.float64)

if self.iter_count != 0:
self.iter_count = self.iter_count + n_more_iter
@@ -188,4 +189,5 @@ def fit(self, X_train, y_train):
y_train[~i_class1] = 1

self.w0_, self.w_, self.V_ = ffm.ffm_als_fit(self, X_train, y_train)
self.w0_ = np.array([self.w0_], dtype=np.float64)
return self
1 change: 1 addition & 0 deletions fastFM/bpr.py
Original file line number Diff line number Diff line change
@@ -92,4 +92,5 @@ def fit(self, X, pairs):
assert pairs.max() <= X.shape[1]
assert pairs.min() >= 0
self.w0_, self.w_, self.V_ = ffm.ffm_fit_sgd_bpr(self, X, pairs)
self.w0_ = np.array([self.w0_], dtype=np.float64)
return self
72 changes: 45 additions & 27 deletions fastFM/ffm2.pyx
Original file line number Diff line number Diff line change
@@ -14,13 +14,26 @@ cimport numpy as np
import numpy as np


cdef Model* _model_factory(double* w_0, double[:] w,
cdef Settings* _settings_factory(fm):
settings_dict = fm.get_params()
cdef Settings* s = new Settings(json.dumps(settings_dict).encode())
return s


def _init_parameter(fm, n_features):
fm.w0_ = np.zeros(1, dtype=np.float64)
fm.w_ = np.zeros(n_features, dtype=np.float64)
fm.V_ = np.zeros((fm.rank, n_features), dtype=np.float64)
return fm.w0_, fm.w_, fm.V_


cdef Model* _model_factory(double[:] w_0, double[:] w,
np.ndarray[np.float64_t, ndim = 2] V):

cdef Model *m = new Model()
rank = V.shape[0]
n_features = V.shape[1]
m.add_parameter(w_0)
m.add_parameter(&w_0[0])
m.add_parameter(&w[0], n_features)
m.add_parameter(<double *> V.data, rank, n_features, 2)

@@ -30,19 +43,14 @@ cdef Model* _model_factory(double* w_0, double[:] w,
cdef Model* _model_factory_self(fm):

n_features = fm.w_.shape[0]
cdef double w_0
cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w
cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V

w_0 = 0 if fm.ignore_w_0 else fm.w0_
w = np.zeros(n_features, dtype=np.float64) if fm.ignore_w else fm.w_
V = np.zeros((fm.rank, n_features), dtype=np.float64)\
if fm.rank == 0 else fm.V_
cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w_0 = fm.w0_
cdef np.ndarray[np.float64_t, ndim=1, mode='c'] w = fm.w_
cdef np.ndarray[np.float64_t, ndim=2, mode='c'] V = fm.V_

cdef Model *m = new Model()
rank = V.shape[0]
n_features = V.shape[1]
m.add_parameter(&w_0)
m.add_parameter(&w_0[0])
m.add_parameter(&w[0], n_features)
m.add_parameter(<double *> V.data, rank, n_features, 2)

@@ -76,7 +84,7 @@ cdef Data* _data_factory_fit(X, double[:] y_pred, double[:] y_true):
return d


def ffm_predict(double w_0, double[:] w,
def ffm_predict(double [:] w_0, double[:] w,
np.ndarray[np.float64_t, ndim = 2] V, X):
assert X.shape[1] == len(w)
assert X.shape[1] == V.shape[1]
@@ -85,7 +93,7 @@ def ffm_predict(double w_0, double[:] w,
cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y =\
np.zeros(X.shape[0], dtype=np.float64)

m = _model_factory(&w_0, w, V)
m = _model_factory(w_0, w, V)
d = _data_factory(X, y)

cpp_ffm.predict(m, d)
@@ -97,34 +105,44 @@ def ffm_predict(double w_0, double[:] w,


def ffm_predict_self(fm, X):
# allocate memory for predictions
cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y =\
np.zeros(X.shape[0], dtype=np.float64)
return ffm_predict(fm.w0_, fm.w_, fm.V_, X)

m = _model_factory_self(fm)
d = _data_factory(X, y)

cpp_ffm.predict(m, d)
def ffm_fit(double [:] w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V,
X, double[:] y, int rank, dict settings):
assert isinstance(settings, dict)
assert X.shape[0] == len(y) # test shapes

cdef Settings* s = new Settings(json.dumps(settings).encode())
m = _model_factory(w_0, w, V)

# allocate memory for prediction
cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y_pred = np.zeros(
X.shape[0], dtype=np.float64)

d = _data_factory_fit(X, y, y_pred)

cpp_ffm.fit(s, m, d)

del m
del d
del m
del s

return y
return w_0, w, V


def ffm_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V,
X, double[:] y, int rank, dict settings):
assert isinstance(settings, dict)
def ffm_fit_self(fm, X, double[:] y):
assert X.shape[0] == len(y) # test shapes

cdef Settings* s = new Settings(json.dumps(settings).encode())
m = _model_factory(&w_0, w, V)
_init_parameter(fm, X.shape[1])
m = _model_factory(fm.w0_, fm.w_, fm.V_)

# allocate memory for prediction
cdef np.ndarray[np.float64_t, ndim=1, mode='c'] y_pred = np.zeros(
X.shape[0], dtype=np.float64)

d = _data_factory_fit(X, y, y_pred)
s = _settings_factory(fm)

cpp_ffm.fit(s, m, d)

@@ -133,4 +151,4 @@ def ffm_fit(double w_0, double[:] w, np.ndarray[np.float64_t, ndim = 2] V,
del m
del s

return w_0, w, V
return fm.w0_, fm.w_, fm.V_
2 changes: 2 additions & 0 deletions fastFM/mcmc.py
Original file line number Diff line number Diff line change
@@ -122,6 +122,7 @@ def fit_predict(self, X_train, y_train, X_test, n_more_iter=0):
coef, y_pred = ffm.ffm_mcmc_fit_predict(self, X_train,
X_test, y_train)
self.w0_, self.w_, self.V_ = coef
self.w0_ = np.array([self.w0_], dtype=np.float64)
self.prediction_ = y_pred
self.warm_start = False

@@ -231,4 +232,5 @@ def fit_predict_proba(self, X_train, y_train, X_test):
coef, y_pred = ffm.ffm_mcmc_fit_predict(self, X_train,
X_test, y_train)
self.w0_, self.w_, self.V_ = coef
self.w0_ = np.array([self.w0_], dtype=np.float64)
return y_pred
2 changes: 2 additions & 0 deletions fastFM/sgd.py
Original file line number Diff line number Diff line change
@@ -91,6 +91,7 @@ def fit(self, X, y):
X = check_array(X, accept_sparse="csc", dtype=np.float64)

self.w0_, self.w_, self.V_ = ffm.ffm_sgd_fit(self, X, y)
self.w0_ = np.array([self.w0_], dtype=np.float64)
return self


@@ -188,4 +189,5 @@ def fit(self, X, y):
X = check_array(X, accept_sparse="csc", dtype=np.float64)

self.w0_, self.w_, self.V_ = ffm.ffm_sgd_fit(self, X, y)
self.w0_ = np.array([self.w0_], dtype=np.float64)
return self
2 changes: 1 addition & 1 deletion fastFM/tests/test_als.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ def get_test_problem(task='regression'):
V = np.array([[6, 0],
[5, 8]], dtype=np.float64)
w = np.array([9, 2], dtype=np.float64)
w0 = 2
w0 = np.array([2], dtype=np.float64)
if task == 'classification':
y_labels = np.ones_like(y)
y_labels[y < np.median(y)] = -1
8 changes: 4 additions & 4 deletions fastFM/tests/test_ffm.py
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@ def get_test_problem():
V = np.array([[6, 0],
[5, 8]], dtype=np.float64)
w = np.array([9, 2], dtype=np.float64)
w0 = 2
w0 = np.array([2], dtype=np.float64)
return w0, w, V, y, X

def test_ffm_predict():
@@ -37,11 +37,11 @@ def test_ffm2_predict_w0():
w[:] = 0
V[:, :] = 0
y_pred = ffm2.ffm_predict(w0, w, V, X)
assert_equal(y_pred, w0)
assert_equal(y_pred[0], w0)

def test_ffm2_fit_als():
w0, w, V, y, X = get_test_problem()
w0 = 0
w0[:] = 0
w[:] = 0
np.random.seed(123)
V = np.random.normal(loc=0.0, scale=1.0,
@@ -68,7 +68,7 @@ def test_ffm2_fit_als():

def test_ffm2_fit_sgd():
w0, w, V, y, X = get_test_problem()
w0 = 0
w0[:] = 0
w[:] = 0
np.random.seed(123)
V = np.random.normal(loc=0.0, scale=1.0,
2 changes: 1 addition & 1 deletion fastFM/tests/test_sgd.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ def get_test_problem(task='regression'):
V = np.array([[6, 0],
[5, 8]], dtype=np.float64)
w = np.array([9, 2], dtype=np.float64)
w0 = 2
w0 = np.array([2], dtype=np.float64)
if task == 'classification':
y_labels = np.ones_like(y)
y_labels[y < np.median(y)] = -1