Make Sonnet use CudnnRNNV3

beckerhe · copybara-github · commit 906742228404 · 2024-04-02T09:32:53.000-07:00
CudnnRNN and CudnnRNNV2 are not compatible with cuDNN 9+, so
this change makes Sonnet use CudnnRNNV3 instead.

Note that this raises the minimum supported cuDNN version to 8.1
(which is below 8.9 - the minimum supported cuDNN version in Tensorflow anyway).

PiperOrigin-RevId: 621192321
diff --git a/examples/BUILD b/examples/BUILD
@@ -1,3 +1,4 @@
+# buildifier: disable=out-of-order-load - Breaks copybara otherwise
 load("//third_party/bazel_rules/rules_python/python:py_binary.bzl", "py_binary")
 load("//sonnet/src:build_defs.bzl", "snt_py_library", "snt_py_test")
 
diff --git a/sonnet/src/recurrent.py b/sonnet/src/recurrent.py
@@ -1069,12 +1069,20 @@ def _block_unrolled_lstm(input_sequence, initial_state, w_i, w_h, b):
 
 def _cudnn_unrolled_lstm(input_sequence, initial_state, w_i, w_h, b):
   """GPU/CuDNN-RNN specialization of :class:`UnrolledLSTM`."""
+  max_sequence_length = tf.shape(input_sequence)[0]
+  batch_dim = tf.expand_dims(tf.shape(input_sequence)[1], axis=0)
+
+  # cuDNN 9+ always requires the sequence_length array argument to be present,
+  # so we generate it here with the max_sequence_length in all positions.
+  sequence_lengths = tf.broadcast_to(max_sequence_length, batch_dim)
+
   # Intuitively, concat/transpose is not free but we did not see
   # it significantly affecting performance in benchmarks.
-  output_sequence, all_hidden, all_cell, _ = tf.raw_ops.CudnnRNN(
+  output_sequence, all_hidden, all_cell, _, _ = tf.raw_ops.CudnnRNNV3(
       input=input_sequence,
       input_h=tf.expand_dims(initial_state.hidden, axis=0),
       input_c=tf.expand_dims(initial_state.cell, axis=0),
+      sequence_lengths=sequence_lengths,
       params=tf.concat(
           [
               tf.reshape(tf.transpose(w_i), [-1]),
@@ -1659,7 +1667,15 @@ def __call__(self, inputs, prev_state):
     w_hz, w_hr, w_ha = tf.split(self._w_h, num_or_size_splits=3, axis=1)
     b_z, b_r, b_a = tf.split(self.b, num_or_size_splits=3)
     b_h_zero = tf.zeros([self._hidden_size])
-    outputs, next_hidden, _, _ = tf.raw_ops.CudnnRNN(
+
+    max_sequence_length = tf.shape(inputs)[0]
+    batch_dim = tf.expand_dims(tf.shape(inputs)[1], axis=0)
+
+    # cuDNN 9+ always requires the sequence_length array argument to be present,
+    # so we generate it here with the max_sequence_length in all positions.
+    sequence_lengths = tf.broadcast_to(max_sequence_length, batch_dim)
+
+    outputs, next_hidden, _, _, _ = tf.raw_ops.CudnnRNNV3(
         input=inputs,
         input_h=tf.expand_dims(prev_state, axis=0),
         input_c=0,
@@ -1681,7 +1697,8 @@ def __call__(self, inputs, prev_state):
                 b_h_zero,
             ],
             axis=0),
-        rnn_mode="gru")
+        rnn_mode="gru",
+        sequence_lengths=sequence_lengths)
 
     return outputs, next_hidden
 

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# buildifier: disable=out-of-order-load - Breaks copybara otherwise`
`1`	`2`	`load("//third_party/bazel_rules/rules_python/python:py_binary.bzl", "py_binary")`
`2`	`3`	`load("//sonnet/src:build_defs.bzl", "snt_py_library", "snt_py_test")`
`3`	`4`