Skip to content

Commit

Permalink
Merge pull request #2156 from recommenders-team/staging
Browse files Browse the repository at this point in the history
Staging to main: Move some tests to experimental, fix MIND dataset
  • Loading branch information
miguelgfierro authored Aug 27, 2024
2 parents 4f86e47 + 4f5861d commit 10d0c29
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 55 deletions.
8 changes: 4 additions & 4 deletions recommenders/datasets/mind.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@


URL_MIND_LARGE_TRAIN = (
"https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip"
)
URL_MIND_LARGE_VALID = (
"https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip"
)
URL_MIND_SMALL_TRAIN = (
"https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip"
)
URL_MIND_SMALL_VALID = (
"https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip"
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip"
)
URL_MIND_DEMO_TRAIN = (
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDdemo_train.zip"
Expand Down
4 changes: 2 additions & 2 deletions recommenders/models/newsrec/newsrec_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,15 +310,15 @@ def get_mind_data_set(type):

if type == "large":
return (
"https://mind201910small.blob.core.windows.net/release/",
"https://recodatasets.z20.web.core.windows.net/newsrec/",
"MINDlarge_train.zip",
"MINDlarge_dev.zip",
"MINDlarge_utils.zip",
)

elif type == "small":
return (
"https://mind201910small.blob.core.windows.net/release/",
"https://recodatasets.z20.web.core.windows.net/newsrec/",
"MINDsmall_train.zip",
"MINDsmall_dev.zip",
"MINDsmall_utils.zip",
Expand Down
8 changes: 4 additions & 4 deletions recommenders/models/sasrec/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def call(self, x, training, mask):
Args:
x (tf.Tensor): Input tensor.
training (tf.Tensor): Training tensor.
training (Boolean): True if in training mode.
mask (tf.Tensor): Mask tensor.
Returns:
Expand Down Expand Up @@ -305,15 +305,15 @@ def call(self, x, training, mask):
Args:
x (tf.Tensor): Input tensor.
training (tf.Tensor): Training tensor.
training (Boolean): True if in training mode.
mask (tf.Tensor): Mask tensor.
Returns:
tf.Tensor: Output tensor.
"""

for i in range(self.num_layers):
x = self.enc_layers[i](x, training, mask)
x = self.enc_layers[i](x, training=training, mask=mask)

return x # (batch_size, input_seq_len, d_model)

Expand Down Expand Up @@ -689,7 +689,7 @@ def train_step(inp, tar):
for epoch in range(1, num_epochs + 1):

step_loss = []
train_loss.reset_states()
train_loss.reset_state()
for step in tqdm(
range(num_steps), total=num_steps, ncols=70, leave=False, unit="b"
):
Expand Down
4 changes: 2 additions & 2 deletions recommenders/models/sasrec/ssept.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def call(self, x, training):
# --- ATTENTION BLOCKS ---
seq_attention = seq_embeddings # (b, s, h1 + h2)

seq_attention = self.encoder(seq_attention, training, mask)
seq_attention = self.encoder(seq_attention, training=training, mask=mask)
seq_attention = self.layer_normalization(seq_attention) # (b, s, h1+h2)

# --- PREDICTION LAYER ---
Expand Down Expand Up @@ -197,7 +197,7 @@ def predict(self, inputs):

seq_embeddings *= mask
seq_attention = seq_embeddings
seq_attention = self.encoder(seq_attention, training, mask)
seq_attention = self.encoder(seq_attention, training=training, mask=mask)
seq_attention = self.layer_normalization(seq_attention) # (b, s, h1+h2)
seq_emb = tf.reshape(
seq_attention,
Expand Down
1 change: 0 additions & 1 deletion tests/ci/azureml_tests/aml_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ def get_or_create_environment(
- {conda_pkg_jdk}
- pip
- pip:
- pymanopt@https://github.com/pymanopt/pymanopt/archive/fb36a272cdeecb21992cfd9271eb82baafeb316d.zip
- recommenders[dev{",gpu" if use_gpu else ""}{",spark" if use_spark else ""}]@git+https://github.com/recommenders-team/recommenders.git@{commit_sha}
"""
# See https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04
Expand Down
18 changes: 9 additions & 9 deletions tests/ci/azureml_tests/test_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@
"tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_item_serendipity_item_feature_vector",
"tests/unit/recommenders/evaluation/test_python_evaluation.py::test_user_serendipity_item_feature_vector",
"tests/unit/recommenders/evaluation/test_python_evaluation.py::test_serendipity_item_feature_vector",
"tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
"tests/unit/recommenders/models/test_tfidf_utils.py::test_init",
"tests/unit/recommenders/models/test_tfidf_utils.py::test_clean_dataframe",
"tests/unit/recommenders/models/test_tfidf_utils.py::test_fit",
Expand All @@ -228,13 +227,6 @@
"tests/unit/recommenders/models/test_tfidf_utils.py::test_get_top_k_recommendations",
"tests/unit/recommenders/models/test_cornac_utils.py::test_predict",
"tests/unit/recommenders/models/test_cornac_utils.py::test_recommend_k_items",
"tests/unit/recommenders/models/test_geoimc.py::test_dataptr",
"tests/unit/recommenders/models/test_geoimc.py::test_length_normalize",
"tests/unit/recommenders/models/test_geoimc.py::test_mean_center",
"tests/unit/recommenders/models/test_geoimc.py::test_reduce_dims",
"tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
"tests/unit/recommenders/models/test_geoimc.py::test_inferer_init",
"tests/unit/recommenders/models/test_geoimc.py::test_inferer_infer",
"tests/unit/recommenders/models/test_sar_singlenode.py::test_init",
"tests/unit/recommenders/models/test_sar_singlenode.py::test_fit",
"tests/unit/recommenders/models/test_sar_singlenode.py::test_predict",
Expand Down Expand Up @@ -307,7 +299,6 @@
"tests/integration/recommenders/utils/test_k8s_utils.py",
],
"group_notebooks_cpu_001": [ # Total group time: 226.42s
"tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
"tests/unit/examples/test_notebooks_python.py::test_sar_deep_dive_runs",
"tests/unit/examples/test_notebooks_python.py::test_baseline_deep_dive_runs",
"tests/unit/examples/test_notebooks_python.py::test_template_runs",
Expand Down Expand Up @@ -456,5 +447,14 @@
"tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_users",
"tests/unit/recommenders/models/test_lightfm_utils.py::test_sim_items",
"tests/functional/examples/test_notebooks_python.py::test_lightfm_functional",
"tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
"tests/unit/recommenders/models/test_geoimc.py::test_dataptr",
"tests/unit/recommenders/models/test_geoimc.py::test_length_normalize",
"tests/unit/recommenders/models/test_geoimc.py::test_mean_center",
"tests/unit/recommenders/models/test_geoimc.py::test_reduce_dims",
"tests/unit/recommenders/models/test_geoimc.py::test_imcproblem",
"tests/unit/recommenders/models/test_geoimc.py::test_inferer_init",
"tests/unit/recommenders/models/test_geoimc.py::test_inferer_infer",
"tests/unit/examples/test_notebooks_python.py::test_rlrmc_quickstart_runs",
]
}
52 changes: 26 additions & 26 deletions tests/data_validation/recommenders/datasets/test_mind.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,34 +27,34 @@
'"0x8D8B8AD5B126C3B"',
),
(
"https://mind201910small.blob.core.windows.net/release/MINDsmall_train.zip",
"52952752",
"0x8D834F2EB31BDEC",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_train.zip",
"52994575",
'"0x8DCC5A830190676"',
),
(
"https://mind201910small.blob.core.windows.net/release/MINDsmall_dev.zip",
"30945572",
"0x8D834F2EBA8D865",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_dev.zip",
"30948560",
'"0x8DCC5A82E182A0F"',
),
(
"https://mind201910small.blob.core.windows.net/release/MINDsmall_utils.zip",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDsmall_utils.zip",
"155178106",
"0x8D87F67F4AEB960",
'"0x8D8B8AD5B3677C6"',
),
(
"https://mind201910small.blob.core.windows.net/release/MINDlarge_train.zip",
"530196631",
"0x8D8244E90C15C07",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_train.zip",
"531360717",
'"0x8DCC5A8375BDC1D"',
),
(
"https://mind201910small.blob.core.windows.net/release/MINDlarge_dev.zip",
"103456245",
"0x8D8244E92005849",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_dev.zip",
"103592887",
'"0x8DCC5A82FE8609C"',
),
(
"https://mind201910small.blob.core.windows.net/release/MINDlarge_utils.zip",
"https://recodatasets.z20.web.core.windows.net/newsrec/MINDlarge_utils.zip",
"150359301",
"0x8D87F67E6CA4364",
'"0x8D8B8AD5B2ED4C9"',
),
],
)
Expand All @@ -72,14 +72,6 @@ def test_download_mind_demo(tmp):
assert statinfo.st_size == 10080022


def test_download_mind_small(tmp):
train_path, valid_path = download_mind(size="small", dest_path=tmp)
statinfo = os.stat(train_path)
assert statinfo.st_size == 52952752
statinfo = os.stat(valid_path)
assert statinfo.st_size == 30945572


def test_extract_mind_demo(tmp):
train_zip, valid_zip = download_mind(size="demo", dest_path=tmp)
train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False)
Expand All @@ -102,6 +94,14 @@ def test_extract_mind_demo(tmp):
assert statinfo.st_size == 1044588


def test_download_mind_small(tmp):
train_path, valid_path = download_mind(size="small", dest_path=tmp)
statinfo = os.stat(train_path)
assert statinfo.st_size == 52994575
statinfo = os.stat(valid_path)
assert statinfo.st_size == 30948560


def test_extract_mind_small(tmp):
train_zip, valid_zip = download_mind(size="small", dest_path=tmp)
train_path, valid_path = extract_mind(train_zip, valid_zip, clean_zip_file=False)
Expand All @@ -127,9 +127,9 @@ def test_extract_mind_small(tmp):
def test_download_mind_large(tmp_path):
train_path, valid_path = download_mind(size="large", dest_path=tmp_path)
statinfo = os.stat(train_path)
assert statinfo.st_size == 530196631
assert statinfo.st_size == 531360717
statinfo = os.stat(valid_path)
assert statinfo.st_size == 103456245
assert statinfo.st_size == 103592887


def test_extract_mind_large(tmp):
Expand Down
21 changes: 14 additions & 7 deletions tests/unit/recommenders/models/test_lightfm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@
import itertools
import numpy as np
import pandas as pd
from lightfm.data import Dataset
from lightfm import LightFM, cross_validation

from recommenders.models.lightfm.lightfm_utils import (
track_model_metrics,
similar_users,
similar_items,
)
try:
from lightfm.data import Dataset
from lightfm import LightFM, cross_validation
from recommenders.models.lightfm.lightfm_utils import (
track_model_metrics,
similar_users,
similar_items,
)
except ModuleNotFoundError:
pass


SEEDNO = 42
Expand Down Expand Up @@ -128,6 +131,7 @@ def sim_items(interactions, fitting):
)


@pytest.mark.experimental
def test_interactions(interactions):
train_interactions, test_interactions, item_features, user_features = interactions
assert train_interactions.shape == (10, 10)
Expand All @@ -136,6 +140,7 @@ def test_interactions(interactions):
assert user_features.shape == (10, 17)


@pytest.mark.experimental
@pytest.mark.skip(reason="Flaky test")
def test_fitting(fitting):
output, _ = fitting
Expand All @@ -152,9 +157,11 @@ def test_fitting(fitting):
np.testing.assert_array_equal(output, target)


@pytest.mark.experimental
def test_sim_users(sim_users):
assert sim_users.shape == (5, 2)


@pytest.mark.experimental
def test_sim_items(sim_items):
assert sim_items.shape == (5, 2)

0 comments on commit 10d0c29

Please sign in to comment.