Update to PyTorch 1.13 (+ 1.14 compatibility) (#1074)

awslabs · Nov 6, 2022 · 288baa7 · 288baa7
1 parent f852fbd
commit 288baa7
Show file tree

Hide file tree

Showing 9 changed files with 45 additions and 39 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,13 @@ Note that Sockeye has checks in place to not translate with an old model that wa
 
 Each version section may have subsections for: _Added_, _Changed_, _Removed_, _Deprecated_, and _Fixed_.
 
+## [3.1.27]
+
+### Changed
+
+- allow torch 1.13 in requirements.txt
+- Replaced deprecated `torch.testing.assert_allclose` with `torch.testing.close` for PyTorch 1.14 compatibility.
+
 ## [3.1.26]
 
 ### Added
@@ -22,8 +29,7 @@ Each version section may have subsections for: _Added_, _Changed_, _Removed_, _D
 
 ### Changed
 
-- device.init_device called by train, translate, and score
-
+- `device.init_device()` called by train, translate, and score
 - allow torch 1.12 in requirements.txt
 
 ## [3.1.25]

diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -1,4 +1,4 @@
-torch>=1.10.0,<1.13.0
+torch>=1.10.0,<1.14.0
 pyyaml>=5.1
 numpy>1.16.0,<2.0.0
 sacrebleu>=2.3.0
diff --git a/sockeye/__init__.py b/sockeye/__init__.py
@@ -11,4 +11,4 @@
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
 
-__version__ = '3.1.26'
+__version__ = '3.1.27'
diff --git a/sockeye/output_handler.py b/sockeye/output_handler.py
@@ -147,7 +147,7 @@ def handle(self,
         :param t_walltime: Total walltime for translation.
         """
         result = "{:.6f}".format(t_output.score)
-        if hasattr(t_output, 'factor_scores') and t_output.factor_scores:
+        if hasattr(t_output, 'factor_scores') and t_output.factor_scores is not None:
             factor_scores = "\t".join("{:.6f}".format(fs) for fs in t_output.factor_scores)
             result = f"{result}\t{factor_scores}"
         print(result, file=self.stream, flush=True)

diff --git a/test/unit/test_beam_search.py b/test/unit/test_beam_search.py
@@ -29,14 +29,14 @@ def test_length_penalty_default():
     lengths = pt.tensor([[1], [2], [3]])
     length_penalty = sockeye.beam_search.LengthPenalty(1.0, 0.0)
     expected_lp = pt.tensor([[1.0], [2.], [3.]])
-    pt.testing.assert_allclose(length_penalty(lengths), expected_lp)
+    pt.testing.assert_close(length_penalty(lengths), expected_lp)
 
 
 def test_length_penalty():
     lengths = pt.tensor([[1], [2], [3]])
     length_penalty = sockeye.beam_search.LengthPenalty(.2, 5.0)
     expected_lp = pt.tensor([[6 ** 0.2 / 6 ** 0.2], [7 ** 0.2 / 6 ** 0.2], [8 ** 0.2 / 6 ** 0.2]])
-    pt.testing.assert_allclose(length_penalty(lengths), expected_lp)
+    pt.testing.assert_close(length_penalty(lengths), expected_lp)
 
 
 def test_length_penalty_int_input():
@@ -51,15 +51,15 @@ def test_brevity_penalty_default():
     ref_lengths = pt.tensor([[2], [3], [2]])
     brevity_penalty = sockeye.beam_search.BrevityPenalty(0.0)
     expected_bp = pt.tensor([[0], [0], [0]], dtype=pt.long)
-    pt.testing.assert_allclose(brevity_penalty(hyp_lengths, ref_lengths), expected_bp)
+    pt.testing.assert_close(brevity_penalty(hyp_lengths, ref_lengths), expected_bp)
 
 
 def test_brevity_penalty():
     hyp_lengths = pt.tensor([[1], [2], [3]])
     ref_lengths = pt.tensor([[7], [2], [91]])
     brevity_penalty = sockeye.beam_search.BrevityPenalty(3.5)
     expected_bp = pt.tensor([[3.5 * (1 - 7 / 1)], [0.0], [3.5 * (1 - 91 / 3)]])
-    pt.testing.assert_allclose(brevity_penalty(hyp_lengths, ref_lengths), expected_bp)
+    pt.testing.assert_close(brevity_penalty(hyp_lengths, ref_lengths), expected_bp)
 
 
 def test_brevity_penalty_int_input():
@@ -82,7 +82,7 @@ def test_candidate_scorer():
 
     scores = scorer(raw_scores, lengths, reference_lengths)
     unnormalized_scores = scorer.unnormalize(scores, lengths, reference_lengths)
-    pt.testing.assert_allclose(unnormalized_scores, raw_scores)
+    pt.testing.assert_close(unnormalized_scores, raw_scores)
 
     # int/float input
     raw_scores = 5.6
@@ -228,7 +228,7 @@ def test_update_scores(use_unk_dist):
                          pt.tensor(pad_dist), pt.tensor(eos_dist))
     scores = scores.detach().numpy()
     lengths = lengths
-    pt.testing.assert_allclose(lengths, pt.tensor([1, 1, 1]))  # all lengths but finished updated + 1
+    pt.testing.assert_close(lengths, pt.tensor([1, 1, 1], dtype=pt.int32))  # all lengths but finished updated + 1
     assert (scores[0] == (1. + target_dists[0] + eos_dist)).all()  # 1 reached max length, force eos
     assert (scores[1] == (1. + pad_dist[0]).tolist()).all()  # 2 finished, force pad, keep score
     if use_unk_dist:
@@ -341,7 +341,7 @@ def test_beam_search():
 
     print('beam search lengths', r.lengths)
     print('internal lengths', inference.states[0])
-    pt.testing.assert_allclose(r.lengths, inference.states[0].squeeze(1))
+    pt.testing.assert_close(r.lengths, inference.states[0].squeeze(1))
     assert inference.states[1] == max_length
 
 
@@ -355,7 +355,7 @@ def test_get_nvs_vocab_slice_ids():
     bow, output_vocab_size = sockeye.beam_search._get_nvs_vocab_slice_ids(nvs_thresh=0.5,
                                                                           nvs_prediction=nvs_prediction)
     assert output_vocab_size == expected_bow.shape[0]
-    pt.testing.assert_allclose(bow, expected_bow)
+    pt.testing.assert_close(bow, expected_bow)
 
     # Batch size 1
     #                             0    1    2    3    4     5    6     7     8    9
@@ -364,7 +364,7 @@ def test_get_nvs_vocab_slice_ids():
     bow, output_vocab_size = sockeye.beam_search._get_nvs_vocab_slice_ids(nvs_thresh=0.5,
                                                                           nvs_prediction=nvs_prediction)
     assert output_vocab_size == expected_bow.shape[0]
-    pt.testing.assert_allclose(bow, expected_bow)
+    pt.testing.assert_close(bow, expected_bow)
 
     # Batch size 1 + higher thresh
     #                             0    1    2    3    4     5    6     7     8    9
@@ -373,7 +373,7 @@ def test_get_nvs_vocab_slice_ids():
     bow, output_vocab_size = sockeye.beam_search._get_nvs_vocab_slice_ids(nvs_thresh=0.9,
                                                                           nvs_prediction=nvs_prediction)
     assert output_vocab_size == expected_bow.shape[0]
-    pt.testing.assert_allclose(bow, expected_bow)
+    pt.testing.assert_close(bow, expected_bow)
 
     # Batch size 2 + target prefix
     # Note: the first 4 tokens are special tokens (PAD, UNK etc.)
@@ -386,7 +386,7 @@ def test_get_nvs_vocab_slice_ids():
                                                                           nvs_prediction=nvs_prediction,
                                                                           target_prefix=target_prefix)
     assert output_vocab_size == expected_bow.shape[0]
-    pt.testing.assert_allclose(bow, expected_bow)
+    pt.testing.assert_close(bow, expected_bow)
 
     # Batch size 2 + blocking lexicon
     # Note: the first 4 tokens are special tokens (PAD, UNK etc.)
@@ -401,7 +401,7 @@ def test_get_nvs_vocab_slice_ids():
                                                                           nvs_prediction=nvs_prediction,
                                                                           restrict_lexicon=restrict_lexicon)
     assert output_vocab_size == expected_bow.shape[0]
-    pt.testing.assert_allclose(bow, expected_bow)
+    pt.testing.assert_close(bow, expected_bow)
 
 
 def test_get_vocab_slice_ids_blocking():
@@ -419,4 +419,4 @@ def test_get_vocab_slice_ids_blocking():
         output_vocab_size=6
     )
     expected_vocab_slice_ids = pt.tensor([0, 1, 2, 4, 5, C.EOS_ID, C.EOS_ID, C.EOS_ID])
-    pt.testing.assert_allclose(vocab_slice_ids, expected_vocab_slice_ids)
+    pt.testing.assert_close(vocab_slice_ids, expected_vocab_slice_ids)
diff --git a/test/unit/test_layers.py b/test/unit/test_layers.py
@@ -26,12 +26,12 @@ def test_lhuc():
     lhuc = sockeye.layers.LHUC(num_hidden=num_hidden)
     pt.nn.init.zeros_(lhuc.weight)
     out = lhuc(inp)
-    pt.testing.assert_allclose(inp, out)
+    pt.testing.assert_close(inp, out)
 
     lhuc = sockeye.layers.LHUC(num_hidden=num_hidden)
     pt.nn.init.constant_(lhuc.weight, 20.0)
     out = lhuc(inp)
-    pt.testing.assert_allclose(2 * inp, out)
+    pt.testing.assert_close(2 * inp, out)
 
 
 def test_positional_embeddings():
@@ -51,18 +51,18 @@ def test_positional_embeddings():
                                             scale_down_positions=scale_down_positions)
     # no steps
     out = b(data, None)
-    pt.testing.assert_allclose(out[0], expected_fixed_embedding)
-    pt.testing.assert_allclose(out[1], expected_fixed_embedding)
+    pt.testing.assert_close(out[0], expected_fixed_embedding)
+    pt.testing.assert_close(out[1], expected_fixed_embedding)
 
     # steps
     steps = pt.tensor([2, 3, 1, 1, 1]).unsqueeze(0)
     out = b(data, steps)
-    pt.testing.assert_allclose(out[0, 0], expected_fixed_embedding[2])
-    pt.testing.assert_allclose(out[1, 0], expected_fixed_embedding[2])
-    pt.testing.assert_allclose(out[0, 1], expected_fixed_embedding[3])
-    pt.testing.assert_allclose(out[1, 1], expected_fixed_embedding[3])
-    pt.testing.assert_allclose(out[0, 2], expected_fixed_embedding[1])
-    pt.testing.assert_allclose(out[1, 2], expected_fixed_embedding[1])
+    pt.testing.assert_close(out[0, 0], expected_fixed_embedding[2])
+    pt.testing.assert_close(out[1, 0], expected_fixed_embedding[2])
+    pt.testing.assert_close(out[0, 1], expected_fixed_embedding[3])
+    pt.testing.assert_close(out[1, 1], expected_fixed_embedding[3])
+    pt.testing.assert_close(out[0, 2], expected_fixed_embedding[1])
+    pt.testing.assert_close(out[1, 2], expected_fixed_embedding[1])
 
     # learned embeddings
     b = sockeye.layers.PositionalEmbeddings(weight_type='learned',
@@ -73,7 +73,7 @@ def test_positional_embeddings():
     pt.nn.init.constant_(b.weight, val=1.0)
     expected_learned_embeddings = pt.ones(data_len, num_embed)
     out = b(data, None)
-    pt.testing.assert_allclose(out[0], expected_learned_embeddings)
+    pt.testing.assert_close(out[0], expected_learned_embeddings)
 
 
 def test_output_layer():
@@ -92,7 +92,7 @@ def test_output_layer():
     output_restricted = b(data, vocab_slice_ids)
     assert output_restricted.shape == (2, 10, len(vocab_slice_ids))
 
-    pt.testing.assert_allclose(output_restricted, reduced_output)
+    pt.testing.assert_close(output_restricted, reduced_output, equal_nan=True)
 
 
 @pytest.mark.parametrize('qlen, kvlen, batch_size, hidden, heads',

diff --git a/test/unit/test_loss.py b/test/unit/test_loss.py
@@ -94,8 +94,8 @@ def test_cross_entropy_loss():
     expected_loss_value = pt.tensor(
         -(math.log(1 / 4) * 3) / num_valid)  # 3 valid rows, all uniform, divided by num_valid
 
-    pt.testing.assert_allclose(loss_value, expected_loss_value)
-    pt.testing.assert_allclose(logits.grad, expected_logits_grad)
+    pt.testing.assert_close(loss_value, expected_loss_value)
+    pt.testing.assert_close(logits.grad, expected_logits_grad)
 
 
 def test_label_to_bow():
@@ -109,8 +109,8 @@ def test_label_to_bow():
     expected_bow = pt.tensor([
         [0, 1, 0, 1],
         [1, 0, 0, 0],
-    ])
-    pt.testing.assert_allclose(bow, expected_bow)
+    ], dtype=pt.float32)
+    pt.testing.assert_close(bow, expected_bow)
 
 
 def test_binary_cross_entropy_loss():
@@ -147,9 +147,9 @@ def test_binary_cross_entropy_loss():
     loss_value.backward()
     assert loss_samples.item() == 1  # this loss returns always 1
     expected_loss = -pt.log(pt.sigmoid(pt.tensor(1))) / vocab_size / batch_size
-    pt.testing.assert_allclose(loss_value, expected_loss)
-    expected_grad = - 1/ (pt.exp(pt.tensor(1)) + 1)  / vocab_size / batch_size 
-    pt.testing.assert_allclose(logits.grad,
+    pt.testing.assert_close(loss_value, expected_loss)
+    expected_grad = - 1 / (pt.exp(pt.tensor(1)) + 1) / vocab_size / batch_size
+    pt.testing.assert_close(logits.grad,
         pt.tensor([[0.0000, 0.0000, 0.0000, expected_grad],
                    [0.0000, 0.0000, 0.0000, 0.0000]])
     )

diff --git a/test/unit/test_params.py b/test/unit/test_params.py
@@ -77,7 +77,7 @@ def test_set_parameters():
     name = 'output_layer.weight'
     model.set_parameters({name: param})
 
-    pt.testing.assert_allclose(model_params['output_layer.weight'].data, param.data)
+    pt.testing.assert_close(model_params['output_layer.weight'].data, param.data)
 
 
 def test_set_parameters_allow_missing():

diff --git a/test/unit/test_utils.py b/test/unit/test_utils.py
@@ -166,7 +166,7 @@ def test_average_tensors():
         expected_average += array
     expected_average /= 4
 
-    pt.testing.assert_allclose(utils.average_tensors(arrays), expected_average)
+    pt.testing.assert_close(utils.average_tensors(arrays), expected_average)
 
     with pytest.raises(utils.SockeyeError) as e:
         other_shape = (12, 13)