From 2ade7e75265a7c1da0c6d2cb373b08de631627a1 Mon Sep 17 00:00:00 2001
From: George Necula <necula@google.com>
Date: Mon, 10 Jun 2024 03:12:39 -0700
Subject: [PATCH] [pallas] Move the hardware_generation query in the code path
 that needs it

This change allows us to lower and export Pallas calls even
on machines that do not have TPUs, in many cases.

PiperOrigin-RevId: 641841079
---
 jax/_src/tpu_custom_call.py        | 14 +++++---
 tests/pallas/BUILD                 | 29 ++++++++++++++++
 tests/pallas/export_pallas_test.py | 56 ++++++++++++++++++++++++++++++
 3 files changed, 94 insertions(+), 5 deletions(-)
 create mode 100644 tests/pallas/export_pallas_test.py

diff --git a/jax/_src/tpu_custom_call.py b/jax/_src/tpu_custom_call.py
index 3699e7ee4209..9b120d36cee4 100644
--- a/jax/_src/tpu_custom_call.py
+++ b/jax/_src/tpu_custom_call.py
@@ -384,16 +384,12 @@ def as_tpu_kernel(
 ) -> Callable[..., Any]:
   """Turns an MLIR Mosaic kernel into a JAX-compatible function."""
   # We use jax.jit to make sure we hit the fast compilation cache.
-  some_tpu = jax.devices(backend)[0]
-  device_kind = some_tpu.device_kind
-  if not device_kind.startswith("TPU v"):
-    raise ValueError(f"Unrecognized TPU device kind: {device_kind}.")
+
   if vmem_limit_bytes is not None and not isinstance(vmem_limit_bytes, int):
     raise ValueError(
         "vmem_limit_bytes must be an int: provided with a"
         f" {type(vmem_limit_bytes)}."
     )
-  hardware_generation = int(device_kind[len("TPU v")])
   has_communication, has_custom_barrier = tpu.private_has_communication(
       module.operation
   )
@@ -405,6 +401,14 @@ def as_tpu_kernel(
         module.operation.get_asm(binary=True, enable_debug_info=True)
     )
     if needs_layout_passes and _MOSAIC_USE_PYTHON_PIPELINE.value:
+      some_tpu = jax.devices(backend)[0]
+      device_kind = some_tpu.device_kind
+      if not device_kind.startswith("TPU v"):
+        raise ValueError(
+            f"Unrecognized TPU device kind: {device_kind}. "
+            "tpu_custom_call cannot be lowered on a machine without TPUs "
+            "when mosaic_use_python_pipeline=True.")
+      hardware_generation = int(device_kind[len("TPU v")])
       module = _lower_tpu_kernel(module, hardware_generation)
       needs_hlo_passes = False
       needs_layout_passes = False
diff --git a/tests/pallas/BUILD b/tests/pallas/BUILD
index 69cf99b26318..070fd077d41d 100644
--- a/tests/pallas/BUILD
+++ b/tests/pallas/BUILD
@@ -330,3 +330,32 @@ jax_test(
         "//jax:pallas_gpu",  # build_cleaner: keep
     ],
 )
+
+jax_test(
+    name = "export_pallas_test",
+    srcs = ["export_pallas_test.py"],
+    config_tags_overrides = {
+        "gpu_a100_x32": {
+            "ondemand": False,  # Include in presubmit.
+        },
+    },
+    disable_configs = [
+        "gpu",
+        "gpu_x32",
+        "gpu_a100",
+        "gpu_h100",
+        "gpu_p100",
+        "gpu_p100_x32",
+        "gpu_pjrt_c_api",
+    ],
+    enable_configs = [
+        "gpu_a100_x32",
+    ],
+    tags = [],
+    deps = [
+        "//jax:pallas",
+        "//jax:pallas_gpu",  # build_cleaner: keep
+        "//jax:pallas_tpu",  # build_cleaner: keep
+        "//jax/experimental/export",
+    ],
+)
diff --git a/tests/pallas/export_pallas_test.py b/tests/pallas/export_pallas_test.py
new file mode 100644
index 000000000000..1c46a226e3df
--- /dev/null
+++ b/tests/pallas/export_pallas_test.py
@@ -0,0 +1,56 @@
+# Copyright 2023 The JAX Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test exporting Pallas kernels."""
+
+from absl.testing import absltest
+import jax
+from jax._src import test_util as jtu
+from jax.experimental import export
+# Import mosaic for flag definitions
+from jax.experimental import mosaic as _  # noqa: F401
+from jax.experimental import pallas as pl
+import numpy as np
+
+
+jax.config.parse_flags_with_absl()
+
+
+class ExportTest(jtu.JaxTestCase):
+
+  def test_cross_platform(self):
+    def add_vectors_kernel(x_ref, y_ref, o_ref):
+      x, y = x_ref[...], y_ref[...]
+      o_ref[...] = x + y
+
+    @jax.jit
+    def add_vectors(x: jax.Array, y: jax.Array) -> jax.Array:
+      return pl.pallas_call(add_vectors_kernel,
+                            out_shape=jax.ShapeDtypeStruct(x.shape, x.dtype)
+                            )(x, y)
+
+    a = np.arange(8)
+    exp = export.export(
+        add_vectors,
+        # TODO(necula): Make this test work on GPU also
+        lowering_platforms=["tpu"],
+    )(a, a)
+
+    if jtu.device_under_test() == "tpu":
+      res = export.call(exp)(a, a)
+      self.assertAllClose(res, a + a)
+
+
+if __name__ == '__main__':
+  absltest.main(testLoader=jtu.JaxTestLoader())