Skip to content

Commit

Permalink
Use 0-vector for OOV lexemes (#8639)
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianeboyd committed Jul 16, 2021
1 parent 99a3f26 commit 6db9389
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion spacy/ml/models/multi_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from thinc.api import MultiSoftmax, list2array
from thinc.api import to_categorical, CosineDistance, L2Distance

from ...util import registry
from ...util import registry, OOV_RANK
from ...errors import Errors
from ...attrs import ID

Expand Down Expand Up @@ -70,6 +70,7 @@ def get_vectors_loss(ops, docs, prediction, distance):
# and look them up all at once. This prevents data copying.
ids = ops.flatten([doc.to_array(ID).ravel() for doc in docs])
target = docs[0].vocab.vectors.data[ids]
target[ids == OOV_RANK] = 0
d_target, loss = distance(prediction, target)
return loss, d_target

Expand Down

0 comments on commit 6db9389

Please sign in to comment.