Skip to content

Commit

Permalink
Update to PyTorch 1.13 (+ 1.14 compatibility) (#1074)
Browse files Browse the repository at this point in the history
  • Loading branch information
fhieber authored Nov 6, 2022
1 parent f852fbd commit 288baa7
Show file tree
Hide file tree
Showing 9 changed files with 45 additions and 39 deletions.
10 changes: 8 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@ Note that Sockeye has checks in place to not translate with an old model that wa

Each version section may have subsections for: _Added_, _Changed_, _Removed_, _Deprecated_, and _Fixed_.

## [3.1.27]

### Changed

- allow torch 1.13 in requirements.txt
- Replaced deprecated `torch.testing.assert_allclose` with `torch.testing.close` for PyTorch 1.14 compatibility.

## [3.1.26]

### Added
Expand All @@ -22,8 +29,7 @@ Each version section may have subsections for: _Added_, _Changed_, _Removed_, _D

### Changed

- device.init_device called by train, translate, and score

- `device.init_device()` called by train, translate, and score
- allow torch 1.12 in requirements.txt

## [3.1.25]
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
torch>=1.10.0,<1.13.0
torch>=1.10.0,<1.14.0
pyyaml>=5.1
numpy>1.16.0,<2.0.0
sacrebleu>=2.3.0
2 changes: 1 addition & 1 deletion sockeye/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
# express or implied. See the License for the specific language governing
# permissions and limitations under the License.

__version__ = '3.1.26'
__version__ = '3.1.27'
2 changes: 1 addition & 1 deletion sockeye/output_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def handle(self,
:param t_walltime: Total walltime for translation.
"""
result = "{:.6f}".format(t_output.score)
if hasattr(t_output, 'factor_scores') and t_output.factor_scores:
if hasattr(t_output, 'factor_scores') and t_output.factor_scores is not None:
factor_scores = "\t".join("{:.6f}".format(fs) for fs in t_output.factor_scores)
result = f"{result}\t{factor_scores}"
print(result, file=self.stream, flush=True)
Expand Down
26 changes: 13 additions & 13 deletions test/unit/test_beam_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ def test_length_penalty_default():
lengths = pt.tensor([[1], [2], [3]])
length_penalty = sockeye.beam_search.LengthPenalty(1.0, 0.0)
expected_lp = pt.tensor([[1.0], [2.], [3.]])
pt.testing.assert_allclose(length_penalty(lengths), expected_lp)
pt.testing.assert_close(length_penalty(lengths), expected_lp)


def test_length_penalty():
lengths = pt.tensor([[1], [2], [3]])
length_penalty = sockeye.beam_search.LengthPenalty(.2, 5.0)
expected_lp = pt.tensor([[6 ** 0.2 / 6 ** 0.2], [7 ** 0.2 / 6 ** 0.2], [8 ** 0.2 / 6 ** 0.2]])
pt.testing.assert_allclose(length_penalty(lengths), expected_lp)
pt.testing.assert_close(length_penalty(lengths), expected_lp)


def test_length_penalty_int_input():
Expand All @@ -51,15 +51,15 @@ def test_brevity_penalty_default():
ref_lengths = pt.tensor([[2], [3], [2]])
brevity_penalty = sockeye.beam_search.BrevityPenalty(0.0)
expected_bp = pt.tensor([[0], [0], [0]], dtype=pt.long)
pt.testing.assert_allclose(brevity_penalty(hyp_lengths, ref_lengths), expected_bp)
pt.testing.assert_close(brevity_penalty(hyp_lengths, ref_lengths), expected_bp)


def test_brevity_penalty():
hyp_lengths = pt.tensor([[1], [2], [3]])
ref_lengths = pt.tensor([[7], [2], [91]])
brevity_penalty = sockeye.beam_search.BrevityPenalty(3.5)
expected_bp = pt.tensor([[3.5 * (1 - 7 / 1)], [0.0], [3.5 * (1 - 91 / 3)]])
pt.testing.assert_allclose(brevity_penalty(hyp_lengths, ref_lengths), expected_bp)
pt.testing.assert_close(brevity_penalty(hyp_lengths, ref_lengths), expected_bp)


def test_brevity_penalty_int_input():
Expand All @@ -82,7 +82,7 @@ def test_candidate_scorer():

scores = scorer(raw_scores, lengths, reference_lengths)
unnormalized_scores = scorer.unnormalize(scores, lengths, reference_lengths)
pt.testing.assert_allclose(unnormalized_scores, raw_scores)
pt.testing.assert_close(unnormalized_scores, raw_scores)

# int/float input
raw_scores = 5.6
Expand Down Expand Up @@ -228,7 +228,7 @@ def test_update_scores(use_unk_dist):
pt.tensor(pad_dist), pt.tensor(eos_dist))
scores = scores.detach().numpy()
lengths = lengths
pt.testing.assert_allclose(lengths, pt.tensor([1, 1, 1])) # all lengths but finished updated + 1
pt.testing.assert_close(lengths, pt.tensor([1, 1, 1], dtype=pt.int32)) # all lengths but finished updated + 1
assert (scores[0] == (1. + target_dists[0] + eos_dist)).all() # 1 reached max length, force eos
assert (scores[1] == (1. + pad_dist[0]).tolist()).all() # 2 finished, force pad, keep score
if use_unk_dist:
Expand Down Expand Up @@ -341,7 +341,7 @@ def test_beam_search():

print('beam search lengths', r.lengths)
print('internal lengths', inference.states[0])
pt.testing.assert_allclose(r.lengths, inference.states[0].squeeze(1))
pt.testing.assert_close(r.lengths, inference.states[0].squeeze(1))
assert inference.states[1] == max_length


Expand All @@ -355,7 +355,7 @@ def test_get_nvs_vocab_slice_ids():
bow, output_vocab_size = sockeye.beam_search._get_nvs_vocab_slice_ids(nvs_thresh=0.5,
nvs_prediction=nvs_prediction)
assert output_vocab_size == expected_bow.shape[0]
pt.testing.assert_allclose(bow, expected_bow)
pt.testing.assert_close(bow, expected_bow)

# Batch size 1
# 0 1 2 3 4 5 6 7 8 9
Expand All @@ -364,7 +364,7 @@ def test_get_nvs_vocab_slice_ids():
bow, output_vocab_size = sockeye.beam_search._get_nvs_vocab_slice_ids(nvs_thresh=0.5,
nvs_prediction=nvs_prediction)
assert output_vocab_size == expected_bow.shape[0]
pt.testing.assert_allclose(bow, expected_bow)
pt.testing.assert_close(bow, expected_bow)

# Batch size 1 + higher thresh
# 0 1 2 3 4 5 6 7 8 9
Expand All @@ -373,7 +373,7 @@ def test_get_nvs_vocab_slice_ids():
bow, output_vocab_size = sockeye.beam_search._get_nvs_vocab_slice_ids(nvs_thresh=0.9,
nvs_prediction=nvs_prediction)
assert output_vocab_size == expected_bow.shape[0]
pt.testing.assert_allclose(bow, expected_bow)
pt.testing.assert_close(bow, expected_bow)

# Batch size 2 + target prefix
# Note: the first 4 tokens are special tokens (PAD, UNK etc.)
Expand All @@ -386,7 +386,7 @@ def test_get_nvs_vocab_slice_ids():
nvs_prediction=nvs_prediction,
target_prefix=target_prefix)
assert output_vocab_size == expected_bow.shape[0]
pt.testing.assert_allclose(bow, expected_bow)
pt.testing.assert_close(bow, expected_bow)

# Batch size 2 + blocking lexicon
# Note: the first 4 tokens are special tokens (PAD, UNK etc.)
Expand All @@ -401,7 +401,7 @@ def test_get_nvs_vocab_slice_ids():
nvs_prediction=nvs_prediction,
restrict_lexicon=restrict_lexicon)
assert output_vocab_size == expected_bow.shape[0]
pt.testing.assert_allclose(bow, expected_bow)
pt.testing.assert_close(bow, expected_bow)


def test_get_vocab_slice_ids_blocking():
Expand All @@ -419,4 +419,4 @@ def test_get_vocab_slice_ids_blocking():
output_vocab_size=6
)
expected_vocab_slice_ids = pt.tensor([0, 1, 2, 4, 5, C.EOS_ID, C.EOS_ID, C.EOS_ID])
pt.testing.assert_allclose(vocab_slice_ids, expected_vocab_slice_ids)
pt.testing.assert_close(vocab_slice_ids, expected_vocab_slice_ids)
24 changes: 12 additions & 12 deletions test/unit/test_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ def test_lhuc():
lhuc = sockeye.layers.LHUC(num_hidden=num_hidden)
pt.nn.init.zeros_(lhuc.weight)
out = lhuc(inp)
pt.testing.assert_allclose(inp, out)
pt.testing.assert_close(inp, out)

lhuc = sockeye.layers.LHUC(num_hidden=num_hidden)
pt.nn.init.constant_(lhuc.weight, 20.0)
out = lhuc(inp)
pt.testing.assert_allclose(2 * inp, out)
pt.testing.assert_close(2 * inp, out)


def test_positional_embeddings():
Expand All @@ -51,18 +51,18 @@ def test_positional_embeddings():
scale_down_positions=scale_down_positions)
# no steps
out = b(data, None)
pt.testing.assert_allclose(out[0], expected_fixed_embedding)
pt.testing.assert_allclose(out[1], expected_fixed_embedding)
pt.testing.assert_close(out[0], expected_fixed_embedding)
pt.testing.assert_close(out[1], expected_fixed_embedding)

# steps
steps = pt.tensor([2, 3, 1, 1, 1]).unsqueeze(0)
out = b(data, steps)
pt.testing.assert_allclose(out[0, 0], expected_fixed_embedding[2])
pt.testing.assert_allclose(out[1, 0], expected_fixed_embedding[2])
pt.testing.assert_allclose(out[0, 1], expected_fixed_embedding[3])
pt.testing.assert_allclose(out[1, 1], expected_fixed_embedding[3])
pt.testing.assert_allclose(out[0, 2], expected_fixed_embedding[1])
pt.testing.assert_allclose(out[1, 2], expected_fixed_embedding[1])
pt.testing.assert_close(out[0, 0], expected_fixed_embedding[2])
pt.testing.assert_close(out[1, 0], expected_fixed_embedding[2])
pt.testing.assert_close(out[0, 1], expected_fixed_embedding[3])
pt.testing.assert_close(out[1, 1], expected_fixed_embedding[3])
pt.testing.assert_close(out[0, 2], expected_fixed_embedding[1])
pt.testing.assert_close(out[1, 2], expected_fixed_embedding[1])

# learned embeddings
b = sockeye.layers.PositionalEmbeddings(weight_type='learned',
Expand All @@ -73,7 +73,7 @@ def test_positional_embeddings():
pt.nn.init.constant_(b.weight, val=1.0)
expected_learned_embeddings = pt.ones(data_len, num_embed)
out = b(data, None)
pt.testing.assert_allclose(out[0], expected_learned_embeddings)
pt.testing.assert_close(out[0], expected_learned_embeddings)


def test_output_layer():
Expand All @@ -92,7 +92,7 @@ def test_output_layer():
output_restricted = b(data, vocab_slice_ids)
assert output_restricted.shape == (2, 10, len(vocab_slice_ids))

pt.testing.assert_allclose(output_restricted, reduced_output)
pt.testing.assert_close(output_restricted, reduced_output, equal_nan=True)


@pytest.mark.parametrize('qlen, kvlen, batch_size, hidden, heads',
Expand Down
14 changes: 7 additions & 7 deletions test/unit/test_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ def test_cross_entropy_loss():
expected_loss_value = pt.tensor(
-(math.log(1 / 4) * 3) / num_valid) # 3 valid rows, all uniform, divided by num_valid

pt.testing.assert_allclose(loss_value, expected_loss_value)
pt.testing.assert_allclose(logits.grad, expected_logits_grad)
pt.testing.assert_close(loss_value, expected_loss_value)
pt.testing.assert_close(logits.grad, expected_logits_grad)


def test_label_to_bow():
Expand All @@ -109,8 +109,8 @@ def test_label_to_bow():
expected_bow = pt.tensor([
[0, 1, 0, 1],
[1, 0, 0, 0],
])
pt.testing.assert_allclose(bow, expected_bow)
], dtype=pt.float32)
pt.testing.assert_close(bow, expected_bow)


def test_binary_cross_entropy_loss():
Expand Down Expand Up @@ -147,9 +147,9 @@ def test_binary_cross_entropy_loss():
loss_value.backward()
assert loss_samples.item() == 1 # this loss returns always 1
expected_loss = -pt.log(pt.sigmoid(pt.tensor(1))) / vocab_size / batch_size
pt.testing.assert_allclose(loss_value, expected_loss)
expected_grad = - 1/ (pt.exp(pt.tensor(1)) + 1) / vocab_size / batch_size
pt.testing.assert_allclose(logits.grad,
pt.testing.assert_close(loss_value, expected_loss)
expected_grad = - 1 / (pt.exp(pt.tensor(1)) + 1) / vocab_size / batch_size
pt.testing.assert_close(logits.grad,
pt.tensor([[0.0000, 0.0000, 0.0000, expected_grad],
[0.0000, 0.0000, 0.0000, 0.0000]])
)
Expand Down
2 changes: 1 addition & 1 deletion test/unit/test_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_set_parameters():
name = 'output_layer.weight'
model.set_parameters({name: param})

pt.testing.assert_allclose(model_params['output_layer.weight'].data, param.data)
pt.testing.assert_close(model_params['output_layer.weight'].data, param.data)


def test_set_parameters_allow_missing():
Expand Down
2 changes: 1 addition & 1 deletion test/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def test_average_tensors():
expected_average += array
expected_average /= 4

pt.testing.assert_allclose(utils.average_tensors(arrays), expected_average)
pt.testing.assert_close(utils.average_tensors(arrays), expected_average)

with pytest.raises(utils.SockeyeError) as e:
other_shape = (12, 13)
Expand Down

0 comments on commit 288baa7

Please sign in to comment.