From 0fdb741c34c625b4072a455f825e8622d411eeb8 Mon Sep 17 00:00:00 2001 From: Alberto Cattaneo <84471416+AlCatt91@users.noreply.github.com> Date: Fri, 27 Oct 2023 12:37:16 +0100 Subject: [PATCH] AllScoresBESS and high-level pipeline (#33) * implement AllScoresBESS and high-level pipeline * notation consistency * IpuModel for CI * increase CI timeout * reduce CI memory requirements * make return all scores optional, to avoid host OOM * fix typo * fix typo * add triple filtering to AllScoresPipeline * add return_topk option to AllScoresPipeline * fix typo --- .github/workflows/ci.yaml | 2 +- besskge/__init__.py | 1 + besskge/bess.py | 153 ++++++++++++++++- besskge/pipeline.py | 298 ++++++++++++++++++++++++++++++++++ besskge/utils.py | 36 ++++ docs/source/API_reference.rst | 1 + requirements-dev.txt | 1 + tests/test_pipeline.py | 178 ++++++++++++++++++++ 8 files changed, 663 insertions(+), 7 deletions(-) create mode 100644 besskge/pipeline.py create mode 100644 tests/test_pipeline.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 345118f..d3f3a96 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -14,7 +14,7 @@ jobs: ci: runs-on: ubuntu-latest container: graphcore/pytorch:3.3.0-ubuntu-20.04-20230703 - timeout-minutes: 15 + timeout-minutes: 25 steps: - uses: actions/checkout@v3 - name: Install dev-requirements diff --git a/besskge/__init__.py b/besskge/__init__.py index d22e654..8627096 100644 --- a/besskge/__init__.py +++ b/besskge/__init__.py @@ -45,6 +45,7 @@ def load_custom_ops_so() -> None: loss, metric, negative_sampler, + pipeline, scoring, sharding, utils, diff --git a/besskge/bess.py b/besskge/bess.py index 866dca9..798feed 100644 --- a/besskge/bess.py +++ b/besskge/bess.py @@ -707,22 +707,22 @@ def forward( and respective scores are kept to be used in the next iteration, while the rest are discarded. - :param relation: shape: (shard_bs,) + :param relation: shape: (1, shard_bs,) Relation indices. - :param head: shape: (shard_bs,) + :param head: shape: (1, shard_bs,) Head indices, if known. Default: None. - :param tail: shape: (shard_bs,) + :param tail: shape: (1, shard_bs,) Tail indices, if known. Default: None. - :param negative: shape: (n_shard, B, padded_negative) + :param negative: shape: (1, n_shard, B, padded_negative) Candidates to score against the queries. It can be the same set for all queries (B=1), or specific for each query in the batch (B=shard_bs). If None, score each query against all entities in the knowledge graph. Default: None. - :param triple_mask: shape: (shard_bs,) + :param triple_mask: shape: (1, shard_bs,) Mask to filter the triples in the micro-batch before computing metrics. Default: None. - :param negative_mask: shape: (n_shard, B, padded_negative) + :param negative_mask: shape: (1, n_shard, B, padded_negative) If candidates are provided, mask to discard padding negatives when computing best completions. Requires the use of :code:`mask_on_gather=True` in the candidate @@ -919,3 +919,144 @@ def loop_body( ) return out_dict + + +class AllScoresBESS(torch.nn.Module): + """ + Distributed scoring of (h, r, ?) or (?, r, t) queries against + the entities in the knowledge graph, returning all scores to + host in blocks, based on the BESS :cite:p:`BESS` + inference scheme. + To be used in combination with a batch sampler based on a + "h_shard"/"t_shard"-partitioned triple set. + Since each iteration on IPU computes only part of the scores + (based on the size of the sliding window), metrics should be + computed on host after aggregating data (see + :class:`besskge.pipeline.AllScoresPipeline`). + + Only to be used for inference. + """ + + def __init__( + self, + candidate_sampler: PlaceholderNegativeSampler, + score_fn: BaseScoreFunction, + window_size: int = 1000, + ) -> None: + """ + Initialize AllScores BESS-KGE module. + + :param candidate_sampler: + :class:`besskge.negative_sampler.PlaceholderNegativeSampler` class, + specifying corruption scheme. + :param score_fn: + Scoring function. + :param window_size: + Size of the sliding window, namely the number of negative entities + scored against each query at each step on IPU and returned to host. + Should be decreased with large batch sizes, to avoid an OOM error. + Default: 1000. + """ + super().__init__() + self.sharding = score_fn.sharding + self.score_fn = score_fn + self.negative_sampler = candidate_sampler + self.window_size = window_size + + if not score_fn.negative_sample_sharing: + raise ValueError("AllScoresBESS requires using negative sample sharing") + + if self.negative_sampler.corruption_scheme not in ["h", "t"]: + raise ValueError("AllScoresBESS only support 'h', 't' corruption scheme") + + if not isinstance(self.negative_sampler, PlaceholderNegativeSampler): + raise ValueError( + "AllScoresBESS requires a `PlaceholderNegativeSampler`" + " candidate_sampler" + ) + + self.entity_embedding = self.score_fn.entity_embedding + self.entity_embedding_size: int = self.entity_embedding.shape[-1] + + self.candidate = torch.arange(self.window_size, dtype=torch.int32) + self.n_step = int( + np.ceil(self.sharding.max_entity_per_shard / self.window_size) + ) + + def forward( + self, + step: torch.Tensor, + relation: torch.Tensor, + head: Optional[torch.Tensor] = None, + tail: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """ + Forward step. + + Similarly to :class:`ScoreMovingBessKGE`, candidates are scored on the + device where they are gathered, then scores for the same query against + candidates in different shards are collected together via an AllToAll. + + :param step: + The index of the block (of size self.window_size) of entities + on each IPU to score against queries. + :param relation: shape: (1, shard_bs,) + Relation indices. + :param head: shape: (1, shard_bs,) + Head indices, if known. Default: None. + :param tail: shape: (1, shard_bs,) + Tail indices, if known. Default: None. + + :return: + The scores for the completions. + """ + + relation = relation.squeeze(0) + if head is not None: + head = head.squeeze(0) + if tail is not None: + tail = tail.squeeze(0) + + n_shard = self.sharding.n_shard + shard_bs = relation.shape[0] + + relation_all = all_gather(relation, n_shard) + if self.negative_sampler.corruption_scheme == "h": + tail_embedding = self.entity_embedding[tail] + tail_embedding_all = all_gather(tail_embedding, n_shard) + elif self.negative_sampler.corruption_scheme == "t": + head_embedding = self.entity_embedding[head] + head_embedding_all = all_gather(head_embedding, n_shard) + + # Local indices of the entities to score against queries + ent_slice = torch.minimum( + step * self.window_size + + torch.arange(self.window_size, device=relation.device), + torch.tensor(self.sharding.max_entity_per_shard - 1), + ) + negative_embedding = self.entity_embedding[ent_slice] + + if self.negative_sampler.corruption_scheme == "h": + scores = self.score_fn.score_heads( + negative_embedding, + relation_all.flatten(end_dim=1), + tail_embedding_all.flatten(end_dim=1), + ) + elif self.negative_sampler.corruption_scheme == "t": + scores = self.score_fn.score_tails( + head_embedding_all.flatten(end_dim=1), + relation_all.flatten(end_dim=1), + negative_embedding, + ) + + # Send back queries to original shard + scores = ( + all_to_all( + scores.reshape(n_shard, shard_bs, self.window_size), + n_shard, + ) + .transpose(0, 1) + .flatten(start_dim=1) + ) # shape (bs, n_shard * ws) + + return scores diff --git a/besskge/pipeline.py b/besskge/pipeline.py new file mode 100644 index 0000000..49e8507 --- /dev/null +++ b/besskge/pipeline.py @@ -0,0 +1,298 @@ +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +""" +High-level APIs for training/inference with BESS. +""" + +from typing import Any, Dict, List, Optional, Union + +import numpy as np +import poptorch +import torch +from numpy.typing import NDArray +from tqdm import tqdm + +from besskge.batch_sampler import ShardedBatchSampler +from besskge.bess import AllScoresBESS +from besskge.metric import Evaluation +from besskge.negative_sampler import PlaceholderNegativeSampler +from besskge.scoring import BaseScoreFunction +from besskge.utils import get_entity_filter + + +class AllScoresPipeline(torch.nn.Module): + """ + Pipeline to compute scores of (h, r, ?) / (?, r, t) queries against all entities + in the KG, and related prediction metrics. + It supports filtering out the scores of specific completions that appear in a given + set of triples. + + To be used in combination with a batch sampler based on a + "h_shard"/"t_shard"-partitioned triple set. + """ + + def __init__( + self, + batch_sampler: ShardedBatchSampler, + corruption_scheme: str, + score_fn: BaseScoreFunction, + evaluation: Optional[Evaluation] = None, + filter_triples: Optional[List[Union[torch.Tensor, NDArray[np.int32]]]] = None, + return_scores: bool = False, + return_topk: bool = False, + k: int = 10, + window_size: int = 1000, + use_ipu_model: bool = False, + ) -> None: + """ + Initialize pipeline. + + :param batch_sampler: + Batch sampler, based on a + "h_shard"/"t_shard"-partitioned triple set. + :param corruption_scheme: + Set to "t" to score (h, r, ?) completions, or to + "h" to score (?, r, t) completions. + :param score_fn: + The trained scoring function. + :param evaluation: + Evaluation module, for computing metrics. + Default: None. + :param filter_triples: + The set of all triples whose scores need to be filtered. + The triples passed here must have GLOBAL IDs for head/tail + entities. Default: None. + :param return_scores: + If True, store and return scores of all queries' completions + (with filters applied, if specified). + For large number of queries/entities, this can cause the host + to go OOM. + Default: False. + :param return_topk: + If True, return for each query the global IDs of the most likely + completions, after filtering out the scores of `filter_triples`. + Default: False. + :param k: + If `return_topk` is set to True, for each query return the + top-k most likely predictions (after filtering). Default: 10. + :param window_size: + Size of the sliding window, namely the number of negative entities + scored against each query at each step on IPU and returned to host. + Should be decreased with large batch sizes, to avoid an OOM error. + Default: 1000. + :param use_ipu_model: + Run pipeline on IPU Model instead of actual hardware. Default: False. + """ + super().__init__() + self.batch_sampler = batch_sampler + if not (evaluation or return_scores): + raise ValueError( + "Nothing to return. Provide `evaluation` or set `return_scores=True`" + ) + if corruption_scheme not in ["h", "t"]: + raise ValueError("corruption_scheme needs to be either 'h' or 't'") + if ( + corruption_scheme == "h" + and self.batch_sampler.triple_partition_mode != "t_shard" + ): + raise ValueError( + "Corruption scheme 'h' requires 't-shard'-partitioned triples" + ) + elif ( + corruption_scheme == "t" + and self.batch_sampler.triple_partition_mode != "h_shard" + ): + raise ValueError( + "Corruption scheme 't' requires 'h-shard'-partitioned triples" + ) + self.candidate_sampler = PlaceholderNegativeSampler( + corruption_scheme=corruption_scheme + ) + self.score_fn = score_fn + self.evaluation = evaluation + self.return_scores = return_scores + self.return_topk = return_topk + self.k = k + self.window_size = window_size + self.corruption_scheme = corruption_scheme + self.bess_module = AllScoresBESS( + self.candidate_sampler, self.score_fn, self.window_size + ) + + inf_options = poptorch.Options() + inf_options.replication_factor = self.bess_module.sharding.n_shard + inf_options.deviceIterations(self.batch_sampler.batches_per_step) + inf_options.outputMode(poptorch.OutputMode.All) + if use_ipu_model: + inf_options.useIpuModel(True) + self.dl = self.batch_sampler.get_dataloader(options=inf_options, shuffle=False) + + self.poptorch_module = poptorch.inferenceModel( + self.bess_module, options=inf_options + ) + self.poptorch_module.entity_embedding.replicaGrouping( + poptorch.CommGroupType.NoGrouping, + 0, + poptorch.VariableRetrievalMode.OnePerGroup, + ) + self.filter_triples: Optional[torch.Tensor] = None + if filter_triples: + # Reconstruct global IDs for all entities in triples + local_id_col = ( + 0 if self.batch_sampler.triple_partition_mode == "h_shard" else 2 + ) + triple_shard_offset = np.concatenate( + [np.array([0]), np.cumsum(batch_sampler.triple_counts)] + ) + global_id_triples = [] + for i in range(len(triple_shard_offset) - 1): + shard_triples = np.copy( + batch_sampler.triples[ + triple_shard_offset[i] : triple_shard_offset[i + 1] + ] + ) + shard_triples[ + :, local_id_col + ] = self.bess_module.sharding.shard_and_idx_to_entity[i][ + shard_triples[:, local_id_col] + ] + global_id_triples.append(shard_triples) + self.triples = torch.from_numpy(np.concatenate(global_id_triples, axis=0)) + self.filter_triples = torch.concat( + [ + tr if isinstance(tr, torch.Tensor) else torch.from_numpy(tr) + for tr in filter_triples + ], + dim=0, + ) + + def forward(self) -> Dict[str, Any]: + """ + Compute scores of all completions and (possibly) metrics. + + :return: + Scores, metrics, and (if provided in batch sampler) IDs + of inference triples (wrt partitioned_triple_set.triples) + to order results. + """ + scores = [] + ids = [] + metrics = [] + ranks = [] + topk_ids = [] + n_triple = 0 + for batch in tqdm(iter(self.dl)): + triple_mask = batch.pop("triple_mask") + if ( + self.candidate_sampler.corruption_scheme == "h" + and "head" in batch.keys() + ): + ground_truth = batch.pop("head") + elif ( + self.candidate_sampler.corruption_scheme == "t" + and "tail" in batch.keys() + ): + ground_truth = batch.pop("tail") + if self.batch_sampler.return_triple_idx: + triple_id = batch.pop("triple_idx") + ids.append(triple_id[triple_mask]) + n_triple += triple_mask.sum() + + batch_res = [] + batch_idx = [] + for i in range(self.bess_module.n_step): + step = ( + torch.tensor([i], dtype=torch.int32) + .broadcast_to( + ( + self.bess_module.sharding.n_shard + * self.batch_sampler.batches_per_step, + 1, + ) + ) + .contiguous() + ) + ent_slice = torch.minimum( + i * self.bess_module.window_size + + torch.arange(self.bess_module.window_size), + torch.tensor(self.bess_module.sharding.max_entity_per_shard - 1), + ) + # Global indices of entities scored in the step + batch_idx.append( + self.bess_module.sharding.shard_and_idx_to_entity[ + :, ent_slice + ].flatten() + ) + inp = {k: v.flatten(end_dim=1) for k, v in batch.items()} + inp.update(dict(step=step)) + batch_res.append(self.poptorch_module(**inp)) + batch_scores = torch.concat(batch_res, dim=-1) + # Filter out padding scores + batch_scores_filt = batch_scores[triple_mask.flatten()][ + :, np.unique(np.concatenate(batch_idx), return_index=True)[1] + ][:, : self.bess_module.sharding.n_entity] + if ground_truth is not None: + # Scores of positive triples + true_scores = batch_scores_filt[ + torch.arange(batch_scores_filt.shape[0]), + ground_truth[triple_mask], + ] + if self.filter_triples is not None: + # Filter for triples in batch + batch_filter = get_entity_filter( + self.triples[triple_id[triple_mask]], + self.filter_triples, + filter_mode=self.corruption_scheme, + ) + batch_scores_filt[batch_filter[:, 0], batch_filter[:, 1]] = -torch.inf + + if self.evaluation: + assert ( + ground_truth is not None + ), "Evaluation requires providing ground truth entities" + # If not already masked, mask scores of true triples + # to compute metrics + batch_scores_filt[ + torch.arange(batch_scores_filt.shape[0]), + ground_truth[triple_mask], + ] = -torch.inf + batch_ranks = self.evaluation.ranks_from_scores( + true_scores, batch_scores_filt + ) + metrics.append(self.evaluation.dict_metrics_from_ranks(batch_ranks)) + if self.evaluation.return_ranks: + ranks.append(batch_ranks) + if ground_truth is not None: + # Restore positive scores in the returned scores + batch_scores_filt[ + torch.arange(batch_scores_filt.shape[0]), + ground_truth[triple_mask], + ] = true_scores + if self.return_scores: + scores.append(batch_scores_filt) + if self.return_topk: + topk_ids.append(torch.topk(batch_scores_filt, k=self.k, dim=-1).indices) + + out = dict() + if scores: + out["scores"] = torch.concat(scores, dim=0) + if topk_ids: + out["topk_global_id"] = torch.concat(topk_ids, dim=0) + if ids: + out["triple_idx"] = torch.concat(ids, dim=0) + if self.evaluation: + final_metrics = dict() + for m in metrics[0].keys(): + # Reduce metrics over all batches + final_metrics[m] = self.evaluation.reduction( + torch.concat([met[m].reshape(-1) for met in metrics]) + ) + out["metrics"] = final_metrics # type: ignore + # Average metrics over all triples + out["metrics_avg"] = { + m: v.sum() / n_triple for m, v in final_metrics.items() + } # type: ignore + if ranks: + out["ranks"] = torch.concat(ranks, dim=0) + + return out diff --git a/besskge/utils.py b/besskge/utils.py index e93fd40..5e8e5c8 100644 --- a/besskge/utils.py +++ b/besskge/utils.py @@ -33,6 +33,42 @@ def gather_indices(x: torch.Tensor, index: torch.Tensor) -> torch.Tensor: return x.view(-1, mask_size) +def get_entity_filter( + triples: torch.Tensor, filter_triples: torch.Tensor, filter_mode: str +) -> torch.Tensor: + """ + Compare two sets of triples: for each triple (h,r,t) in the first set, find + the entities `e` such that (e,r,t) (or (h,r,e), depending on `filter_mode`) + appears in the second set of triples. + + :param triples: shape (x, 3) + The set of triples to construct filters for. + :param filter_triples: shape (y, 3) + The set of triples determining the head/tail entities to filter. + :param filter_mode: + Set to "h" to look for entities appearing as heads of the same (r,t) pair, + or to "t" to look for entities appearing as tails of the same (h,r) pair. + + :return: shape (z, 2) + The sparse filters. Each row is given by a tuple (i, j), with i the index + of the triple in `triples` to which the filter applies to and j the global + ID of the entity to filter. + """ + if filter_mode == "t": + ent_col = 0 + elif filter_mode == "h": + ent_col = 2 + else: + raise ValueError("`filter_mode` needs to be either 'h' or 't'") + relation_filter = (filter_triples[:, 1]) == triples[:, 1].view(-1, 1) + entity_filter = (filter_triples[:, ent_col]) == triples[:, ent_col].view(-1, 1) + + filter = (entity_filter & relation_filter).nonzero(as_tuple=False) + filter[:, 1] = filter_triples[:, 2 - ent_col].view(1, -1)[:, filter[:, 1]] + + return filter + + def complex_multiplication(v1: torch.Tensor, v2: torch.Tensor) -> torch.Tensor: """ Batched complex multiplication. diff --git a/docs/source/API_reference.rst b/docs/source/API_reference.rst index 563839f..5179b0a 100644 --- a/docs/source/API_reference.rst +++ b/docs/source/API_reference.rst @@ -6,6 +6,7 @@ BESS-KGE API Reference :template: module.rst :recursive: + besskge.pipeline besskge.dataset besskge.sharding besskge.batch_sampler diff --git a/requirements-dev.txt b/requirements-dev.txt index b66e578..1b22695 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,6 +4,7 @@ flake8==6.0.0 isort==5.12.0 mypy==1.0.1 pandas-stubs==2.0.1.230501 +tqdm-stubs==0.2.1 types-requests==2.28.11.17 pytest==7.2.1 pytest-cov==4.0.0 diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py new file mode 100644 index 0000000..4f42f23 --- /dev/null +++ b/tests/test_pipeline.py @@ -0,0 +1,178 @@ +# Copyright (c) 2023 Graphcore Ltd. All rights reserved. + +import numpy as np +import pytest +import torch +from torch.testing import assert_close + +from besskge.batch_sampler import RigidShardedBatchSampler +from besskge.dataset import KGDataset +from besskge.metric import Evaluation +from besskge.negative_sampler import PlaceholderNegativeSampler +from besskge.pipeline import AllScoresPipeline +from besskge.scoring import ComplEx +from besskge.sharding import PartitionedTripleSet, Sharding +from besskge.utils import get_entity_filter + +seed = 1234 +n_entity = 5000 +n_relation_type = 50 +n_shard = 4 +n_test_triple = 1000 +batches_per_step = 2 +shard_bs = 400 +embedding_size = 128 + +np.random.seed(seed) +torch.manual_seed(seed) + +sharding = Sharding.create(n_entity, n_shard, seed=seed) + +unsharded_entity_table = torch.randn(size=(n_entity, 2 * embedding_size)) +relation_table = torch.randn(size=(n_relation_type, 2 * embedding_size)) + +test_triples_h = np.random.choice(n_entity - 1, size=n_test_triple, replace=False) +test_triples_t = np.random.choice(n_entity - 1, size=n_test_triple, replace=False) +test_triples_r = np.random.randint(n_relation_type, size=n_test_triple) +triples = {"test": np.stack([test_triples_h, test_triples_r, test_triples_t], axis=1)} + + +@pytest.mark.parametrize("corruption_scheme", ["h", "t"]) +@pytest.mark.parametrize( + "filter_scores, extra_only", [(True, True), (True, False), (False, False)] +) +def test_all_scores_pipeline( + corruption_scheme: str, filter_scores: bool, extra_only: bool +) -> None: + ds = KGDataset( + n_entity=n_entity, + n_relation_type=n_relation_type, + entity_dict=None, + relation_dict=None, + type_offsets=None, + triples=triples, + ) + + partition_mode = "h_shard" if corruption_scheme == "t" else "t_shard" + partitioned_triple_set = PartitionedTripleSet.create_from_dataset( + ds, "test", sharding, partition_mode=partition_mode + ) + + score_fn = ComplEx( + negative_sample_sharing=True, + sharding=sharding, + n_relation_type=ds.n_relation_type, + embedding_size=embedding_size, + entity_initializer=unsharded_entity_table, + relation_initializer=relation_table, + ) + placeholder_ns = PlaceholderNegativeSampler( + corruption_scheme=corruption_scheme, seed=seed + ) + + test_bs = RigidShardedBatchSampler( + partitioned_triple_set=partitioned_triple_set, + negative_sampler=placeholder_ns, + shard_bs=shard_bs, + batches_per_step=batches_per_step, + seed=seed, + hrt_freq_weighting=False, + duplicate_batch=False, + return_triple_idx=True, + ) + + evaluation = Evaluation( + ["mrr", "hits@10"], mode="average", reduction="sum", return_ranks=True + ) + + ground_truth_col = 0 if corruption_scheme == "h" else 2 + if filter_scores: + extra_filter_triples = np.copy(triples["test"]) + extra_filter_triples[:, ground_truth_col] += 1 + triples_to_filter = ( + [extra_filter_triples] + if extra_only + else [triples["test"], extra_filter_triples] + ) + else: + triples_to_filter = None + + pipeline = AllScoresPipeline( + test_bs, + corruption_scheme, + score_fn, + evaluation, + filter_triples=triples_to_filter, # type: ignore + return_scores=True, + return_topk=True, + k=10, + window_size=1000, + use_ipu_model=True, + ) + out = pipeline() + + # Shuffle triples in same order of out["scores"] + triple_reordered = torch.from_numpy( + ds.triples["test"][partitioned_triple_set.triple_sort_idx[out["triple_idx"]]] + ) + + # All scores, computed on CPU + if corruption_scheme == "t": + cpu_scores = score_fn.score_tails( + unsharded_entity_table[triple_reordered[:, 0]], + triple_reordered[:, 1], + unsharded_entity_table.unsqueeze(0), + ) + else: + cpu_scores = score_fn.score_heads( + unsharded_entity_table.unsqueeze(0), + triple_reordered[:, 1], + unsharded_entity_table[triple_reordered[:, 2]], + ) + + pos_scores = score_fn.score_triple( + unsharded_entity_table[triple_reordered[:, 0]], + triple_reordered[:, 1], + unsharded_entity_table[triple_reordered[:, 2]], + ).flatten() + # mask positive scores to compute metrics + cpu_scores[ + torch.arange(cpu_scores.shape[0]), triple_reordered[:, ground_truth_col] + ] = -torch.inf + if filter_scores: + filter_triples = torch.from_numpy( + np.concatenate(triples_to_filter, axis=0) # type: ignore + ) + tr_filter = get_entity_filter( + triple_reordered, filter_triples, corruption_scheme + ) + cpu_scores[tr_filter[:, 0], tr_filter[:, 1]] = -torch.inf + # check filters (for convenience, here h/t entities are non-repeating) + if extra_only: + assert torch.all(tr_filter[:, 0] == torch.arange(n_test_triple)) + assert torch.all( + tr_filter[:, 1] == triple_reordered[:, ground_truth_col] + 1 + ) + else: + assert torch.all(tr_filter[::2, 0] == tr_filter[1::2, 0]) + assert torch.all(tr_filter[::2, 0] == torch.arange(n_test_triple)) + assert torch.all(tr_filter[::2, 1] == triple_reordered[:, ground_truth_col]) + assert torch.all( + tr_filter[1::2, 1] == triple_reordered[:, ground_truth_col] + 1 + ) + + cpu_ranks = evaluation.ranks_from_scores(pos_scores, cpu_scores) + # we allow for a off-by-one rank difference on at most 1% of triples, + # due to rounding differences in CPU vs IPU score computations + assert torch.all(torch.abs(cpu_ranks - out["ranks"]) <= 1) + assert (cpu_ranks != out["ranks"]).sum() < n_test_triple / 100 + + # restore positive scores + cpu_scores[ + torch.arange(cpu_scores.shape[0]), triple_reordered[:, ground_truth_col] + ] = pos_scores + + assert_close(cpu_scores, out["scores"], atol=1e-3, rtol=1e-4) + assert torch.all( + torch.topk(cpu_scores, k=pipeline.k, dim=-1).indices == out["topk_global_id"] + )