diff --git a/recommenders/datasets/mind.py b/recommenders/datasets/mind.py index f396044c6..23b7a8db2 100644 --- a/recommenders/datasets/mind.py +++ b/recommenders/datasets/mind.py @@ -18,16 +18,16 @@ URL_MIND_LARGE_TRAIN = ( - "https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip" + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip" ) URL_MIND_LARGE_VALID = ( - "https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip" + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip" ) URL_MIND_SMALL_TRAIN = ( - "https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip" + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip" ) URL_MIND_SMALL_VALID = ( - "https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip" + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip" ) URL_MIND_DEMO_TRAIN = ( "https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip" diff --git a/recommenders/models/newsrec/newsrec_utils.py b/recommenders/models/newsrec/newsrec_utils.py index 429f24b83..48e1ce8f3 100644 --- a/recommenders/models/newsrec/newsrec_utils.py +++ b/recommenders/models/newsrec/newsrec_utils.py @@ -310,7 +310,7 @@ def get_mind_data_set(type): if type == "large": return ( - "https://mind201910small.blob.core.windows.net/release/", + "https://recodatasets.z20.web.core.windows.net/newsrec/", "MINDlarge_train.zip", "MINDlarge_dev.zip", "MINDlarge_utils.zip", @@ -318,7 +318,7 @@ def get_mind_data_set(type): elif type == "small": return ( - "https://mind201910small.blob.core.windows.net/release/", + "https://recodatasets.z20.web.core.windows.net/newsrec/", "MINDsmall_train.zip", "MINDsmall_dev.zip", "MINDsmall_utils.zip", diff --git a/recommenders/models/sasrec/model.py b/recommenders/models/sasrec/model.py index 4ac6fa93d..778eecc9e 100644 --- a/recommenders/models/sasrec/model.py +++ b/recommenders/models/sasrec/model.py @@ -240,7 +240,7 @@ def call(self, x, training, mask): Args: x (tf.Tensor): Input tensor. - training (tf.Tensor): Training tensor. + training (Boolean): True if in training mode. mask (tf.Tensor): Mask tensor. Returns: @@ -305,7 +305,7 @@ def call(self, x, training, mask): Args: x (tf.Tensor): Input tensor. - training (tf.Tensor): Training tensor. + training (Boolean): True if in training mode. mask (tf.Tensor): Mask tensor. Returns: @@ -313,7 +313,7 @@ def call(self, x, training, mask): """ for i in range(self.num_layers): - x = self.enc_layers[i](x, training, mask) + x = self.enc_layers[i](x, training=training, mask=mask) return x # (batch_size, input_seq_len, d_model) @@ -689,7 +689,7 @@ def train_step(inp, tar): for epoch in range(1, num_epochs + 1): step_loss = [] - train_loss.reset_states() + train_loss.reset_state() for step in tqdm( range(num_steps), total=num_steps, ncols=70, leave=False, unit="b" ): diff --git a/recommenders/models/sasrec/ssept.py b/recommenders/models/sasrec/ssept.py index dbf7abdce..15da43082 100644 --- a/recommenders/models/sasrec/ssept.py +++ b/recommenders/models/sasrec/ssept.py @@ -122,7 +122,7 @@ def call(self, x, training): # --- ATTENTION BLOCKS --- seq_attention = seq_embeddings # (b, s, h1 + h2) - seq_attention = self.encoder(seq_attention, training, mask) + seq_attention = self.encoder(seq_attention, training=training, mask=mask) seq_attention = self.layer_normalization(seq_attention) # (b, s, h1+h2) # --- PREDICTION LAYER --- @@ -197,7 +197,7 @@ def predict(self, inputs): seq_embeddings *= mask seq_attention = seq_embeddings - seq_attention = self.encoder(seq_attention, training, mask) + seq_attention = self.encoder(seq_attention, training=training, mask=mask) seq_attention = self.layer_normalization(seq_attention) # (b, s, h1+h2) seq_emb = tf.reshape( seq_attention, diff --git a/tests/ci/azureml_tests/aml_utils.py b/tests/ci/azureml_tests/aml_utils.py index d24ec1361..5a4d488e3 100644 --- a/tests/ci/azureml_tests/aml_utils.py +++ b/tests/ci/azureml_tests/aml_utils.py @@ -92,7 +92,6 @@ def get_or_create_environment( - {conda_pkg_jdk} - pip - pip: - - pymanopt@https://github.com/pymanopt/pymanopt/archive/fb36a272cdeecb21992cfd9271eb82baafeb316d.zip - recommenders[dev{",gpu" if use_gpu else ""}{",spark" if use_spark else ""}]@git+https://github.com/recommenders-team/recommenders.git@{commit_sha} """ # See https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04 diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py index aa2d78b6e..401603ad0 100644 --- a/tests/ci/azureml_tests/test_groups.py +++ b/tests/ci/azureml_tests/test_groups.py @@ -217,7 +217,6 @@ "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_item_serendipity_item_feature_vector", "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_serendipity_item_feature_vector", "tests/unit/recommenders/evaluation/test_python_evaluation.py::test_serendipity_item_feature_vector", - "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem", "tests/unit/recommenders/models/test_tfidf_utils.py::test_init", "tests/unit/recommenders/models/test_tfidf_utils.py::test_clean_dataframe", "tests/unit/recommenders/models/test_tfidf_utils.py::test_fit", @@ -228,13 +227,6 @@ "tests/unit/recommenders/models/test_tfidf_utils.py::test_get_top_k_recommendations", "tests/unit/recommenders/models/test_cornac_utils.py::test_predict", "tests/unit/recommenders/models/test_cornac_utils.py::test_recommend_k_items", - "tests/unit/recommenders/models/test_geoimc.py::test_dataptr", - "tests/unit/recommenders/models/test_geoimc.py::test_length_normalize", - "tests/unit/recommenders/models/test_geoimc.py::test_mean_center", - "tests/unit/recommenders/models/test_geoimc.py::test_reduce_dims", - "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem", - "tests/unit/recommenders/models/test_geoimc.py::test_inferer_init", - "tests/unit/recommenders/models/test_geoimc.py::test_inferer_infer", "tests/unit/recommenders/models/test_sar_singlenode.py::test_init", "tests/unit/recommenders/models/test_sar_singlenode.py::test_fit", "tests/unit/recommenders/models/test_sar_singlenode.py::test_predict", @@ -307,7 +299,6 @@ "tests/integration/recommenders/utils/test_k8s_utils.py", ], "group_notebooks_cpu_001": [ # Total group time: 226.42s - "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs", "tests/unit/examples/test_notebooks_python.py::test_sar_deep_dive_runs", "tests/unit/examples/test_notebooks_python.py::test_baseline_deep_dive_runs", "tests/unit/examples/test_notebooks_python.py::test_template_runs", @@ -456,5 +447,14 @@ "tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_users", "tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_items", "tests/functional/examples/test_notebooks_python.py::test_lightfm_functional", + "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem", + "tests/unit/recommenders/models/test_geoimc.py::test_dataptr", + "tests/unit/recommenders/models/test_geoimc.py::test_length_normalize", + "tests/unit/recommenders/models/test_geoimc.py::test_mean_center", + "tests/unit/recommenders/models/test_geoimc.py::test_reduce_dims", + "tests/unit/recommenders/models/test_geoimc.py::test_imcproblem", + "tests/unit/recommenders/models/test_geoimc.py::test_inferer_init", + "tests/unit/recommenders/models/test_geoimc.py::test_inferer_infer", + "tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs", ] } diff --git a/tests/data_validation/recommenders/datasets/test_mind.py b/tests/data_validation/recommenders/datasets/test_mind.py index d4f5f8c1f..8d835ad9b 100644 --- a/tests/data_validation/recommenders/datasets/test_mind.py +++ b/tests/data_validation/recommenders/datasets/test_mind.py @@ -27,34 +27,34 @@ '"0x8D8B8AD5B126C3B"', ), ( - "https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip", - "52952752", - "0x8D834F2EB31BDEC", + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip", + "52994575", + '"0x8DCC5A830190676"', ), ( - "https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip", - "30945572", - "0x8D834F2EBA8D865", + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip", + "30948560", + '"0x8DCC5A82E182A0F"', ), ( - "https://mind201910small.blob.core.windows.net/release/MINDsmall_utils.zip", + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip", "155178106", - "0x8D87F67F4AEB960", + '"0x8D8B8AD5B3677C6"', ), ( - "https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip", - "530196631", - "0x8D8244E90C15C07", + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip", + "531360717", + '"0x8DCC5A8375BDC1D"', ), ( - "https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip", - "103456245", - "0x8D8244E92005849", + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip", + "103592887", + '"0x8DCC5A82FE8609C"', ), ( - "https://mind201910small.blob.core.windows.net/release/MINDlarge_utils.zip", + "https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip", "150359301", - "0x8D87F67E6CA4364", + '"0x8D8B8AD5B2ED4C9"', ), ], ) @@ -72,14 +72,6 @@ def test_download_mind_demo(tmp): assert statinfo.st_size == 10080022 -def test_download_mind_small(tmp): - train_path, valid_path = download_mind(size="small", dest_path=tmp) - statinfo = os.stat(train_path) - assert statinfo.st_size == 52952752 - statinfo = os.stat(valid_path) - assert statinfo.st_size == 30945572 - - def test_extract_mind_demo(tmp): train_zip, valid_zip = download_mind(size="demo", dest_path=tmp) train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False) @@ -102,6 +94,14 @@ def test_extract_mind_demo(tmp): assert statinfo.st_size == 1044588 +def test_download_mind_small(tmp): + train_path, valid_path = download_mind(size="small", dest_path=tmp) + statinfo = os.stat(train_path) + assert statinfo.st_size == 52994575 + statinfo = os.stat(valid_path) + assert statinfo.st_size == 30948560 + + def test_extract_mind_small(tmp): train_zip, valid_zip = download_mind(size="small", dest_path=tmp) train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False) @@ -127,9 +127,9 @@ def test_extract_mind_small(tmp): def test_download_mind_large(tmp_path): train_path, valid_path = download_mind(size="large", dest_path=tmp_path) statinfo = os.stat(train_path) - assert statinfo.st_size == 530196631 + assert statinfo.st_size == 531360717 statinfo = os.stat(valid_path) - assert statinfo.st_size == 103456245 + assert statinfo.st_size == 103592887 def test_extract_mind_large(tmp): diff --git a/tests/unit/recommenders/models/test_lightfm_utils.py b/tests/unit/recommenders/models/test_lightfm_utils.py index 2155fb655..62b9d2ccc 100644 --- a/tests/unit/recommenders/models/test_lightfm_utils.py +++ b/tests/unit/recommenders/models/test_lightfm_utils.py @@ -6,14 +6,17 @@ import itertools import numpy as np import pandas as pd -from lightfm.data import Dataset -from lightfm import LightFM, cross_validation -from recommenders.models.lightfm.lightfm_utils import ( - track_model_metrics, - similar_users, - similar_items, -) +try: + from lightfm.data import Dataset + from lightfm import LightFM, cross_validation + from recommenders.models.lightfm.lightfm_utils import ( + track_model_metrics, + similar_users, + similar_items, + ) +except ModuleNotFoundError: + pass SEEDNO = 42 @@ -128,6 +131,7 @@ def sim_items(interactions, fitting): ) +@pytest.mark.experimental def test_interactions(interactions): train_interactions, test_interactions, item_features, user_features = interactions assert train_interactions.shape == (10, 10) @@ -136,6 +140,7 @@ def test_interactions(interactions): assert user_features.shape == (10, 17) +@pytest.mark.experimental @pytest.mark.skip(reason="Flaky test") def test_fitting(fitting): output, _ = fitting @@ -152,9 +157,11 @@ def test_fitting(fitting): np.testing.assert_array_equal(output, target) +@pytest.mark.experimental def test_sim_users(sim_users): assert sim_users.shape == (5, 2) +@pytest.mark.experimental def test_sim_items(sim_items): assert sim_items.shape == (5, 2)