From 91b65817a32ebb90072af0e4a19567907d81c66d Mon Sep 17 00:00:00 2001
From: Junwhan Ahn <junwhan@google.com>
Date: Fri, 3 Jan 2025 17:13:33 -0800
Subject: [PATCH] Change IFRT and PjRt layout API to return
 `std::shared_ptr<const xla::PjRtLayout>` instead of
 `std::unique_ptr<xla::PjRtLayout>`

The current API design that uses `std::unique_ptr<xla::PjRtLayout>` has several issues:

* The API requires `xla::PjRtLayout` to be copied in some scenarios, e.g., `xla::ifrt::Array` internally stores a layout and returns its copy every time `layout()` is called. This forces implementations to break the abstraction boundary because `xla::PjRtLayout` is an abstract class and `std::unique_ptr<xla::PjRtLayout>` is not copyable. The current implementation either stores `xla::Layout` and creates `xla::PjRtLayout` every time, or downcasts `xla::PjRtLayout` to `xla::PjRtXlaLayout` to perform the copy.
* `xla::Layout` is expensive to copy (`sizeof(xla::Layout)` is 248 bytes as of 2025-01-03) and copying `xla::PjRtXlaLayout` requires copying or moving `xla::Layout`.

To address these two problems, this CL changes PjRt and IFRT APIs that return `xla::PjRtLayout` to instead use `std::shared_ptr<const xla::PjRtLayout>`, so that PjRt layouts can be cheaply copied. Similar patterns have been used in other places such as `xla::ifrt::Sharding` and `xla::PjRtExecutable::GetHloModules()`.

Some implementations have been updated to take advantage of this change. For example, `PjRtCApiBuffer::layout()` no longer performs a layout copy and instead reuses an internally cached instance of `std::shared_ptr<const xla::PjRtLayout>`.

PiperOrigin-RevId: 711892970
---
 xla/pjrt/c/pjrt_c_api_wrapper_impl.cc         |  8 +++---
 xla/pjrt/c/pjrt_c_api_wrapper_impl.h          |  2 +-
 xla/pjrt/pjrt_c_api_client.cc                 | 11 ++++----
 xla/pjrt/pjrt_c_api_client.h                  |  4 +--
 xla/pjrt/pjrt_client.h                        |  6 ++---
 xla/pjrt/pjrt_executable.cc                   |  8 +++---
 xla/pjrt/pjrt_executable.h                    |  4 +--
 xla/pjrt/pjrt_layout.h                        |  6 ++---
 xla/python/ifrt/array.h                       |  2 +-
 xla/python/ifrt/client.h                      |  2 +-
 xla/python/ifrt/executable.h                  |  8 +++---
 xla/python/ifrt/mock.cc                       |  7 ++---
 xla/python/ifrt/mock.h                        | 12 ++++-----
 xla/python/ifrt_proxy/client/array.h          |  2 +-
 xla/python/ifrt_proxy/client/client.h         |  7 ++---
 xla/python/ifrt_proxy/client/executable.cc    | 27 +++++--------------
 xla/python/ifrt_proxy/client/executable.h     | 11 +++++---
 .../ifrt_proxy/client/executable_test.cc      | 21 ++++++++-------
 xla/python/ifrt_proxy/server/ifrt_backend.cc  |  4 +--
 .../ifrt_proxy/server/ifrt_backend_test.cc    | 10 +++----
 xla/python/jax_jit.cc                         |  6 ++---
 xla/python/jax_jit.h                          |  2 +-
 xla/python/pjrt_ifrt/basic_string_array.cc    |  6 +++--
 xla/python/pjrt_ifrt/basic_string_array.h     |  3 ++-
 xla/python/pjrt_ifrt/pjrt_array.cc            |  8 +++---
 xla/python/pjrt_ifrt/pjrt_array.h             |  2 +-
 xla/python/pjrt_ifrt/pjrt_client.cc           |  2 +-
 xla/python/pjrt_ifrt/pjrt_client.h            |  6 ++---
 xla/python/pjrt_ifrt/pjrt_executable.h        |  8 +++---
 xla/python/py_array.cc                        |  1 +
 xla/python/py_array.h                         |  2 +-
 xla/python/py_client.cc                       |  3 ++-
 xla/python/py_compile_only_client.cc          |  2 +-
 xla/python/py_executable.cc                   |  4 +--
 xla/python/py_executable.h                    |  8 +++---
 .../functional_hlo_runner.cc                  |  4 +--
 36 files changed, 114 insertions(+), 115 deletions(-)
diff --git a/xla/pjrt/c/pjrt_c_api_wrapper_impl.cc b/xla/pjrt/c/pjrt_c_api_wrapper_impl.cc
index ec697b08af7841..64aa20bac3c0e2 100644
--- a/xla/pjrt/c/pjrt_c_api_wrapper_impl.cc
+++ b/xla/pjrt/c/pjrt_c_api_wrapper_impl.cc
@@ -1797,10 +1797,10 @@ PJRT_Error* PJRT_Buffer_GetMemoryLayout(
     absl::MutexLock lock(&args->buffer->mu);
     if (!layout_data.has_value()) {
       // TODO(skyewm): change PJRT C API to also use opaque layout type
-      std::unique_ptr<xla::PjRtLayout> pjrt_layout =
+      std::shared_ptr<const xla::PjRtLayout> pjrt_layout =
           args->buffer->buffer->layout();
-      xla::PjRtXlaLayout* pjrt_xla_layout =
-          tensorflow::down_cast<xla::PjRtXlaLayout*>(pjrt_layout.get());
+      const xla::PjRtXlaLayout* pjrt_xla_layout =
+          tensorflow::down_cast<const xla::PjRtXlaLayout*>(pjrt_layout.get());
       CHECK(pjrt_xla_layout != nullptr) << "Got unexpected layout type";
       const xla::Layout& xla_layout = pjrt_xla_layout->xla_layout();
 
@@ -2283,7 +2283,7 @@ PJRT_Error* PJRT_Layouts_PJRT_Client_GetDefaultLayout(
                         args->client->client->GetDefaultLayout(
                             pjrt::ConvertFromPjRtBufferType(args->type),
                             {args->dims, args->num_dims}));
-  auto pjrt_xla_layout = std::make_unique<xla::PjRtXlaLayout>(xla_layout);
+  auto pjrt_xla_layout = std::make_shared<xla::PjRtXlaLayout>(xla_layout);
   args->layout = new PJRT_Layouts_MemoryLayout{std::move(pjrt_xla_layout)};
   return nullptr;
 }
diff --git a/xla/pjrt/c/pjrt_c_api_wrapper_impl.h b/xla/pjrt/c/pjrt_c_api_wrapper_impl.h
index 0ebecc0c251734..04463410ee7e08 100644
--- a/xla/pjrt/c/pjrt_c_api_wrapper_impl.h
+++ b/xla/pjrt/c/pjrt_c_api_wrapper_impl.h
@@ -218,7 +218,7 @@ struct PJRT_CopyToDeviceStream {
 };
 
 struct PJRT_Layouts_MemoryLayout {
-  std::unique_ptr<xla::PjRtLayout> layout;
+  std::shared_ptr<const xla::PjRtLayout> layout;
 };
 
 struct PJRT_Layouts_SerializedLayout {
diff --git a/xla/pjrt/pjrt_c_api_client.cc b/xla/pjrt/pjrt_c_api_client.cc
index a1b8966bd34e9b..18ca751766412b 100644
--- a/xla/pjrt/pjrt_c_api_client.cc
+++ b/xla/pjrt/pjrt_c_api_client.cc
@@ -2020,16 +2020,17 @@ absl::Span<const int64_t> PjRtCApiBuffer::dimensions() const {
   return absl::Span<const int64_t>(args.dims, args.num_dims);
 }
 
-std::unique_ptr<PjRtLayout> PjRtCApiBuffer::layout() const {
+std::shared_ptr<const PjRtLayout> PjRtCApiBuffer::layout() const {
   {
     absl::MutexLock lock(&mu_);
-    if (!layout_.has_value()) {
+    if (layout_ == nullptr) {
       const PJRT_Api* c_api = pjrt_c_api();
       PJRT_Layouts_Extension* extension =
           pjrt::FindExtension<PJRT_Layouts_Extension>(
               c_api, PJRT_Extension_Type::PJRT_Extension_Type_Layouts);
       if (extension == nullptr) {
-        layout_.emplace(LayoutUtil::MakeDescendingLayout(dimensions().size()));
+        layout_ = std::make_shared<PjRtXlaLayout>(
+            LayoutUtil::MakeDescendingLayout(dimensions().size()));
       } else {
         std::unique_ptr<PJRT_Layouts_MemoryLayout,
                         pjrt::PJRT_Layouts_MemoryLayoutDeleter>
@@ -2057,11 +2058,11 @@ std::unique_ptr<PjRtLayout> PjRtCApiBuffer::layout() const {
         absl::StatusOr<PjRtXlaLayout> pjrt_xla_layout =
             PjRtXlaLayout::Deserialize(serialized_layout);
         TF_CHECK_OK(pjrt_xla_layout.status());
-        layout_.emplace(*pjrt_xla_layout);
+        layout_ = std::make_shared<PjRtXlaLayout>(*std::move(pjrt_xla_layout));
       }
     }
   }
-  return std::make_unique<PjRtXlaLayout>(*layout_);
+  return layout_;
 }
 
 bool PjRtCApiBuffer::has_dynamic_dimensions() const {
diff --git a/xla/pjrt/pjrt_c_api_client.h b/xla/pjrt/pjrt_c_api_client.h
index 46304e6d46bcef..03e41ec3985903 100644
--- a/xla/pjrt/pjrt_c_api_client.h
+++ b/xla/pjrt/pjrt_c_api_client.h
@@ -485,7 +485,7 @@ class PjRtCApiBuffer : public PjRtBuffer {
 
   absl::Span<const int64_t> dimensions() const override;
 
-  std::unique_ptr<PjRtLayout> layout() const override;
+  std::shared_ptr<const PjRtLayout> layout() const override;
 
   // PJRT C API doesn't support tuple buffers.
   bool IsTuple() const override { return false; }
@@ -583,7 +583,7 @@ class PjRtCApiBuffer : public PjRtBuffer {
   // we set on `readiness_event` modifies `readiness_promise_`.
   std::shared_ptr<PjRtFuture<>::Promise> readiness_promise_;
   // Set and cached the first time layout() is called.
-  mutable std::optional<PjRtXlaLayout> layout_;
+  mutable std::shared_ptr<const PjRtXlaLayout> layout_;
   // Set and cached the first time is_dynamic_dimension() is called.
   mutable std::optional<absl::InlinedVector<bool, InlineRank()>>
       is_dynamic_dimension_;
diff --git a/xla/pjrt/pjrt_client.h b/xla/pjrt/pjrt_client.h
index 26c777b1fdd4ef..0b1da9ef4660a1 100644
--- a/xla/pjrt/pjrt_client.h
+++ b/xla/pjrt/pjrt_client.h
@@ -1121,12 +1121,12 @@ class PjRtBuffer {
     return on_device_shape().dimensions();
   }
 
-  // The on-device memory layout of this buffer. Returned via unique_ptr to make
+  // The on-device memory layout of this buffer. Returned via shared_ptr to make
   // memory management easier -- PjRtLayout is an abstract base class, so cannot
   // be easily copied.
-  virtual std::unique_ptr<PjRtLayout> layout() const {
+  virtual std::shared_ptr<const PjRtLayout> layout() const {
     CHECK(on_device_shape().has_layout());
-    return std::make_unique<PjRtXlaLayout>(on_device_shape().layout());
+    return std::make_shared<PjRtXlaLayout>(on_device_shape().layout());
   }
 
   // PjRtBuffers can either represent a single array buffer or a tuple of array
diff --git a/xla/pjrt/pjrt_executable.cc b/xla/pjrt/pjrt_executable.cc
index e2fa5e53f9bfee..def2f0edd24b8d 100644
--- a/xla/pjrt/pjrt_executable.cc
+++ b/xla/pjrt/pjrt_executable.cc
@@ -422,7 +422,7 @@ PjRtExecutable::GetOutputDimensions() const {
   return output_dimensions;
 }
 
-absl::StatusOr<std::vector<std::unique_ptr<PjRtLayout>>>
+absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
 PjRtExecutable::GetParameterLayouts() const {
   TF_ASSIGN_OR_RETURN(std::vector<std::shared_ptr<HloModule>> hlo_modules,
                       GetHloModules());
@@ -439,7 +439,7 @@ PjRtExecutable::GetParameterLayouts() const {
   ComputationLayout comp_layout = hlo_modules[0]->entry_computation_layout();
   TF_ASSIGN_OR_RETURN(std::vector<Layout> layouts,
                       comp_layout.FlattenedParameterLayouts());
-  std::vector<std::unique_ptr<PjRtLayout>> result;
+  std::vector<std::shared_ptr<const PjRtLayout>> result;
   result.reserve(layouts.size());
   for (const Layout& layout : layouts) {
     result.push_back(std::make_unique<PjRtXlaLayout>(layout));
@@ -447,7 +447,7 @@ PjRtExecutable::GetParameterLayouts() const {
   return result;
 }
 
-absl::StatusOr<std::vector<std::unique_ptr<PjRtLayout>>>
+absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
 PjRtExecutable::GetOutputLayouts() const {
   TF_ASSIGN_OR_RETURN(std::vector<std::shared_ptr<HloModule>> hlo_modules,
                       GetHloModules());
@@ -464,7 +464,7 @@ PjRtExecutable::GetOutputLayouts() const {
   ComputationLayout comp_layout = hlo_modules[0]->entry_computation_layout();
   TF_ASSIGN_OR_RETURN(std::vector<Layout> layouts,
                       comp_layout.FlattenedResultLayouts());
-  std::vector<std::unique_ptr<PjRtLayout>> result;
+  std::vector<std::shared_ptr<const PjRtLayout>> result;
   result.reserve(layouts.size());
   for (const Layout& layout : layouts) {
     result.push_back(std::make_unique<PjRtXlaLayout>(layout));
diff --git a/xla/pjrt/pjrt_executable.h b/xla/pjrt/pjrt_executable.h
index 07715fe0dbae79..fc4f76ef4776a8 100644
--- a/xla/pjrt/pjrt_executable.h
+++ b/xla/pjrt/pjrt_executable.h
@@ -335,11 +335,11 @@ class PjRtExecutable {
   GetOutputDimensions() const;
 
   // Returns the layout of each input parameter.
-  virtual absl::StatusOr<std::vector<std::unique_ptr<PjRtLayout>>>
+  virtual absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
   GetParameterLayouts() const;
 
   // Returns the layout of each output.
-  virtual absl::StatusOr<std::vector<std::unique_ptr<PjRtLayout>>>
+  virtual absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
   GetOutputLayouts() const;
 
   // Returns a list of lists of memory kind strings for output. The returned
diff --git a/xla/pjrt/pjrt_layout.h b/xla/pjrt/pjrt_layout.h
index eea9b861690860..005881e4634849 100644
--- a/xla/pjrt/pjrt_layout.h
+++ b/xla/pjrt/pjrt_layout.h
@@ -100,9 +100,9 @@ class PjRtXlaLayout : public PjRtLayout {
 // TODO(b/327524065): make callers use PjRtLayout directly instead of assuming
 // an xla::Layout and get rid of this function.
 inline Layout GetXlaLayoutUnsafe(
-    const std::unique_ptr<PjRtLayout>& pjrt_layout) {
-  PjRtXlaLayout* xla_layout =
-      tensorflow::down_cast<PjRtXlaLayout*>(pjrt_layout.get());
+    const std::shared_ptr<const PjRtLayout>& pjrt_layout) {
+  const PjRtXlaLayout* xla_layout =
+      tensorflow::down_cast<const PjRtXlaLayout*>(pjrt_layout.get());
   CHECK(xla_layout != nullptr) << "Got unexpected layout type";
   return xla_layout->xla_layout();
 }
diff --git a/xla/python/ifrt/array.h b/xla/python/ifrt/array.h
index 2a4ff23b1fdb1d..e31a2600352324 100644
--- a/xla/python/ifrt/array.h
+++ b/xla/python/ifrt/array.h
@@ -76,7 +76,7 @@ class Array : public llvm::RTTIExtends<Array, Value> {
   // The device memory layout for each shard of the Array. All shards are
   // assumed to have the same layout. Cannot be nullptr; implementations should
   // return UNIMPLEMENTED instead.
-  virtual absl::StatusOr<std::unique_ptr<PjRtLayout>> layout() const = 0;
+  virtual absl::StatusOr<std::shared_ptr<const PjRtLayout>> layout() const = 0;
 
   // Breaks an array up into per-device arrays. This is the elimination
   // counterpart of `Client::AssembleArrayFromSingleDeviceArrays()`.
diff --git a/xla/python/ifrt/client.h b/xla/python/ifrt/client.h
index 441aa66781a462..01eab2f3492e9a 100644
--- a/xla/python/ifrt/client.h
+++ b/xla/python/ifrt/client.h
@@ -241,7 +241,7 @@ class Client : public llvm::RTTIExtends<Client, llvm::RTTIRoot> {
   // single-shard dimensions `dims`.
   // TODO(hyeontaek): Change the API to take `Shape` and `Sharding` instead of
   // single-shard dimensions and device.
-  virtual absl::StatusOr<std::unique_ptr<xla::PjRtLayout>>
+  virtual absl::StatusOr<std::shared_ptr<const PjRtLayout>>
   GetDefaultLayoutForDevice(DType dtype, absl::Span<const int64_t> dims,
                             Device* device) const = 0;
 
diff --git a/xla/python/ifrt/executable.h b/xla/python/ifrt/executable.h
index 5332768c885b9c..9bf0128ed7e0b8 100644
--- a/xla/python/ifrt/executable.h
+++ b/xla/python/ifrt/executable.h
@@ -78,10 +78,10 @@ class Executable : public llvm::RTTIExtends<Executable, llvm::RTTIRoot> {
   // Returns a list of output `OpSharding`.
   virtual std::optional<std::vector<OpSharding>> GetOutputShardings() const = 0;
   // Returns a list of parameter layouts.
-  virtual absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  virtual absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
   GetParameterLayouts() const = 0;
   // Returns a list of output/result layouts.
-  virtual absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  virtual absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
   GetOutputLayouts() const = 0;
   // Returns an `HloModule` (optimized) per partition.
   virtual absl::StatusOr<std::vector<std::shared_ptr<HloModule>>>
@@ -187,10 +187,10 @@ class LoadedExecutable
   // Returns a list of output OpSharding.
   virtual std::optional<std::vector<OpSharding>> GetOutputShardings() const = 0;
   // Returns a list of parameter layouts.
-  virtual absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  virtual absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
   GetParameterLayouts() const = 0;
   // Returns a list of output/result layouts.
-  virtual absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  virtual absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
   GetOutputLayouts() const = 0;
   // Return an HloModule (optimized) per partition.
   virtual absl::StatusOr<std::vector<std::shared_ptr<HloModule>>>
diff --git a/xla/python/ifrt/mock.cc b/xla/python/ifrt/mock.cc
index d62646bf5b78ad..09cfa924e46e99 100644
--- a/xla/python/ifrt/mock.cc
+++ b/xla/python/ifrt/mock.cc
@@ -78,9 +78,10 @@ MockArray::MockArray(tsl::RCReference<xla::ifrt::Array> delegated)
     return delegated_->shared_ptr_sharding();
   });
   ON_CALL(*this, layout)
-      .WillByDefault([this]() -> absl::StatusOr<std::unique_ptr<PjRtLayout>> {
-        return delegated_->layout();
-      });
+      .WillByDefault(
+          [this]() -> absl::StatusOr<std::shared_ptr<const PjRtLayout>> {
+            return delegated_->layout();
+          });
   ON_CALL(*this, DisassembleIntoSingleDeviceArrays(_))
       .WillByDefault([this](ArrayCopySemantics semantics) {
         return delegated_->DisassembleIntoSingleDeviceArrays(semantics);
diff --git a/xla/python/ifrt/mock.h b/xla/python/ifrt/mock.h
index 11ba98cc96326a..2009c048cbb588 100644
--- a/xla/python/ifrt/mock.h
+++ b/xla/python/ifrt/mock.h
@@ -76,7 +76,7 @@ class MockArray : public llvm::RTTIExtends<MockArray, Array> {
   MOCK_METHOD(const Sharding&, sharding, (), (const, final));
   MOCK_METHOD(absl::Nonnull<std::shared_ptr<const Sharding>>,
               shared_ptr_sharding, (), (const, final));
-  MOCK_METHOD(absl::StatusOr<std::unique_ptr<PjRtLayout>>, layout, (),
+  MOCK_METHOD(absl::StatusOr<std::shared_ptr<const PjRtLayout>>, layout, (),
               (const, final));
   MOCK_METHOD(absl::StatusOr<std::vector<tsl::RCReference<Array>>>,
               DisassembleIntoSingleDeviceArrays, (ArrayCopySemantics semantics),
@@ -173,7 +173,7 @@ class MockClient : public llvm::RTTIExtends<MockClient, Client> {
   MOCK_METHOD(absl::StatusOr<std::shared_ptr<Topology>>, GetTopologyForDevices,
               (const tsl::RCReference<xla::ifrt::DeviceList>& devices),
               (const, final));
-  MOCK_METHOD(absl::StatusOr<std::unique_ptr<xla::PjRtLayout>>,
+  MOCK_METHOD(absl::StatusOr<std::shared_ptr<const PjRtLayout>>,
               GetDefaultLayoutForDevice,
               (xla::ifrt::DType dtype, absl::Span<const int64_t> dims,
                xla::ifrt::Device* device),
@@ -264,9 +264,9 @@ class MockExecutable : public llvm::RTTIExtends<MockExecutable, Executable> {
               (const, final));
   MOCK_METHOD(std::optional<std::vector<OpSharding>>, GetOutputShardings, (),
               (const, final));
-  MOCK_METHOD(absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>,
+  MOCK_METHOD(absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>,
               GetParameterLayouts, (), (const, final));
-  MOCK_METHOD(absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>,
+  MOCK_METHOD(absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>,
               GetOutputLayouts, (), (const, final));
   MOCK_METHOD(absl::StatusOr<std::vector<std::shared_ptr<HloModule>>>,
               GetHloModules, (), (const, final));
@@ -293,9 +293,9 @@ class MockLoadedExecutable
               (const, final));
   MOCK_METHOD(std::optional<std::vector<OpSharding>>, GetOutputShardings, (),
               (const, final));
-  MOCK_METHOD(absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>,
+  MOCK_METHOD(absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>,
               GetParameterLayouts, (), (const, final));
-  MOCK_METHOD(absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>,
+  MOCK_METHOD(absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>,
               GetOutputLayouts, (), (const, final));
   MOCK_METHOD(absl::StatusOr<std::vector<std::vector<absl::string_view>>>,
               GetOutputMemoryKinds, (), (const, final));
diff --git a/xla/python/ifrt_proxy/client/array.h b/xla/python/ifrt_proxy/client/array.h
index 2a9ccdf17bea32..5c4b42475f36c7 100644
--- a/xla/python/ifrt_proxy/client/array.h
+++ b/xla/python/ifrt_proxy/client/array.h
@@ -112,7 +112,7 @@ class Array final : public llvm::RTTIExtends<Array, xla::ifrt::Array> {
   std::shared_ptr<const Sharding> shared_ptr_sharding() const override {
     return sharding_;
   }
-  absl::StatusOr<std::unique_ptr<PjRtLayout>> layout() const override {
+  absl::StatusOr<std::shared_ptr<const PjRtLayout>> layout() const override {
     return absl::UnimplementedError(
         "Array::layout() not implemented for IFRT proxy");
   };
diff --git a/xla/python/ifrt_proxy/client/client.h b/xla/python/ifrt_proxy/client/client.h
index 3732b5ddd832d7..0f1323e1abeaa9 100644
--- a/xla/python/ifrt_proxy/client/client.h
+++ b/xla/python/ifrt_proxy/client/client.h
@@ -140,9 +140,10 @@ class Client final : public llvm::RTTIExtends<Client, xla::ifrt::Client> {
     return absl::UnimplementedError(
         "GetTopologyForDevices is not supported for the IFRT proxy client.");
   }
-  absl::StatusOr<std::unique_ptr<xla::PjRtLayout>> GetDefaultLayoutForDevice(
-      xla::ifrt::DType dtype, absl::Span<const int64_t> dims,
-      xla::ifrt::Device* device) const override {
+  absl::StatusOr<std::shared_ptr<const xla::PjRtLayout>>
+  GetDefaultLayoutForDevice(xla::ifrt::DType dtype,
+                            absl::Span<const int64_t> dims,
+                            xla::ifrt::Device* device) const override {
     return absl::UnimplementedError(
         "GetDefaultLayout is not supported for the IFRT proxy client.");
   }
diff --git a/xla/python/ifrt_proxy/client/executable.cc b/xla/python/ifrt_proxy/client/executable.cc
index 81ef43ec5c0f3b..6de9e3757eeff3 100644
--- a/xla/python/ifrt_proxy/client/executable.cc
+++ b/xla/python/ifrt_proxy/client/executable.cc
@@ -310,10 +310,11 @@ LoadedExecutable::LoadedExecutable(
 
         auto parse_layouts =
             [](const LoadedExecutableMetadataResponse::LayoutList& list) {
-              std::vector<xla::Layout> layouts;
+              std::vector<std::shared_ptr<const xla::PjRtLayout>> layouts;
               layouts.reserve(list.layouts_size());
               for (const auto& layout : list.layouts()) {
-                layouts.push_back(xla::Layout::CreateFromProto(layout));
+                layouts.push_back(std::make_shared<xla::PjRtXlaLayout>(
+                    xla::Layout::CreateFromProto(layout)));
               }
               return layouts;
             };
@@ -433,34 +434,20 @@ std::optional<std::vector<OpSharding>> LoadedExecutable::GetOutputShardings()
   return (*info)->output_shardings;
 }
 
-absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
 LoadedExecutable::GetParameterLayouts() const {
   tsl::profiler::TraceMe traceme_ifrt_entrypoint(
       "IfrtProxyEntrypointLoadedExecutableGetParameterLayouts");
   TF_ASSIGN_OR_RETURN(auto info, metadata_future_.Await());
-  TF_RETURN_IF_ERROR(info->parameter_layouts.status());
-
-  std::vector<std::unique_ptr<xla::PjRtLayout>> result;
-  result.reserve(info->parameter_layouts->size());
-  for (const xla::Layout& layout : *info->parameter_layouts) {
-    result.push_back(std::make_unique<xla::PjRtXlaLayout>(layout));
-  }
-  return result;
+  return info->parameter_layouts;
 }
 
-absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
 LoadedExecutable::GetOutputLayouts() const {
   tsl::profiler::TraceMe traceme_ifrt_entrypoint(
       "IfrtProxyEntrypointLoadedExecutableGetOutputLayouts");
   TF_ASSIGN_OR_RETURN(auto info, metadata_future_.Await());
-  TF_RETURN_IF_ERROR(info->output_layouts.status());
-
-  std::vector<std::unique_ptr<xla::PjRtLayout>> result;
-  result.reserve(info->output_layouts->size());
-  for (const xla::Layout& layout : *info->output_layouts) {
-    result.push_back(std::make_unique<xla::PjRtXlaLayout>(layout));
-  }
-  return result;
+  return info->output_layouts;
 }
 
 absl::StatusOr<std::vector<std::vector<absl::string_view>>>
diff --git a/xla/python/ifrt_proxy/client/executable.h b/xla/python/ifrt_proxy/client/executable.h
index 5ce5292d5a76b8..0af4a14a3e80b6 100644
--- a/xla/python/ifrt_proxy/client/executable.h
+++ b/xla/python/ifrt_proxy/client/executable.h
@@ -35,6 +35,7 @@
 #include "xla/hlo/ir/hlo_module.h"
 #include "xla/layout.h"
 #include "xla/pjrt/pjrt_executable.h"
+#include "xla/pjrt/pjrt_layout.h"
 #include "xla/python/ifrt/array.h"
 #include "xla/python/ifrt/attribute_map.h"
 #include "xla/python/ifrt/client.h"
@@ -77,9 +78,9 @@ class LoadedExecutable final
 
   std::optional<std::vector<OpSharding>> GetParameterShardings() const override;
   std::optional<std::vector<OpSharding>> GetOutputShardings() const override;
-  absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
   GetParameterLayouts() const override;
-  absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
   GetOutputLayouts() const override;
   absl::StatusOr<std::vector<std::vector<absl::string_view>>>
   GetOutputMemoryKinds() const override;
@@ -105,8 +106,10 @@ class LoadedExecutable final
     std::optional<std::vector<xla::OpSharding>> parameter_shardings;
     std::optional<std::vector<xla::OpSharding>> output_shardings;
 
-    absl::StatusOr<std::vector<xla::Layout>> parameter_layouts;
-    absl::StatusOr<std::vector<xla::Layout>> output_layouts;
+    absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
+        parameter_layouts;
+    absl::StatusOr<std::vector<std::shared_ptr<const xla::PjRtLayout>>>
+        output_layouts;
 
     // Elements in `output_memory_kinds` point to elements in `memory_kinds`.
     // Required since `GetOutputMemoryKinds()` returns `absl::string_view`.
diff --git a/xla/python/ifrt_proxy/client/executable_test.cc b/xla/python/ifrt_proxy/client/executable_test.cc
index 70bb1791d3d8f6..3972429fb38147 100644
--- a/xla/python/ifrt_proxy/client/executable_test.cc
+++ b/xla/python/ifrt_proxy/client/executable_test.cc
@@ -158,19 +158,20 @@ TEST_F(LoadedExecutableTest, Metadata) {
   ASSERT_OK_AND_ASSIGN(auto parameter_layouts,
                        executable.GetParameterLayouts());
   EXPECT_EQ(parameter_layouts.size(), 2);
+  EXPECT_EQ(tensorflow::down_cast<const xla::PjRtXlaLayout*>(
+                parameter_layouts[0].get())
+                ->xla_layout(),
+            xla::LayoutUtil::MakeDescendingLayout(/*rank=*/1));
+  EXPECT_EQ(tensorflow::down_cast<const xla::PjRtXlaLayout*>(
+                parameter_layouts[1].get())
+                ->xla_layout(),
+            xla::LayoutUtil::MakeDescendingLayout(/*rank=*/2));
+  ASSERT_OK_AND_ASSIGN(auto output_layouts, executable.GetOutputLayouts());
+  EXPECT_EQ(output_layouts.size(), 1);
   EXPECT_EQ(
-      tensorflow::down_cast<xla::PjRtXlaLayout*>(parameter_layouts[0].get())
-          ->xla_layout(),
-      xla::LayoutUtil::MakeDescendingLayout(/*rank=*/1));
-  EXPECT_EQ(
-      tensorflow::down_cast<xla::PjRtXlaLayout*>(parameter_layouts[1].get())
+      tensorflow::down_cast<const xla::PjRtXlaLayout*>(output_layouts[0].get())
           ->xla_layout(),
       xla::LayoutUtil::MakeDescendingLayout(/*rank=*/2));
-  ASSERT_OK_AND_ASSIGN(auto output_layouts, executable.GetOutputLayouts());
-  EXPECT_EQ(output_layouts.size(), 1);
-  EXPECT_EQ(tensorflow::down_cast<xla::PjRtXlaLayout*>(output_layouts[0].get())
-                ->xla_layout(),
-            xla::LayoutUtil::MakeDescendingLayout(/*rank=*/2));
   EXPECT_THAT(executable.GetOutputMemoryKinds(),
               IsOkAndHolds(ElementsAre(ElementsAre("foo"))));
 }
diff --git a/xla/python/ifrt_proxy/server/ifrt_backend.cc b/xla/python/ifrt_proxy/server/ifrt_backend.cc
index e26a6cb5c44e5d..b36f84fabcacc8 100644
--- a/xla/python/ifrt_proxy/server/ifrt_backend.cc
+++ b/xla/python/ifrt_proxy/server/ifrt_backend.cc
@@ -1287,7 +1287,7 @@ IfrtBackend::HandleLoadedExecutableMetadataRequest(
         parameter_layouts.ok()) {
       auto* const layouts =
           metadata_resp->mutable_parameter_layouts_list()->mutable_layouts();
-      for (const std::unique_ptr<xla::PjRtLayout>& parameter_layout :
+      for (const std::shared_ptr<const xla::PjRtLayout>& parameter_layout :
            *parameter_layouts) {
         // TODO(b/329165105): use PjRtLayout::Serialize instead
         const xla::PjRtXlaLayout* layout =
@@ -1305,7 +1305,7 @@ IfrtBackend::HandleLoadedExecutableMetadataRequest(
         output_layouts.ok()) {
       auto* const layouts =
           metadata_resp->mutable_output_layouts_list()->mutable_layouts();
-      for (const std::unique_ptr<xla::PjRtLayout>& output_layout :
+      for (const std::shared_ptr<const xla::PjRtLayout>& output_layout :
            *output_layouts) {
         // TODO(b/329165105): use PjRtLayout::Serialize instead
         const xla::PjRtXlaLayout* layout =
diff --git a/xla/python/ifrt_proxy/server/ifrt_backend_test.cc b/xla/python/ifrt_proxy/server/ifrt_backend_test.cc
index f3fa9f991ea056..fd3c35e6831f03 100644
--- a/xla/python/ifrt_proxy/server/ifrt_backend_test.cc
+++ b/xla/python/ifrt_proxy/server/ifrt_backend_test.cc
@@ -1243,16 +1243,16 @@ TEST_P(IfrtBackendHandlerTest, LoadedExecutableMetadata) {
     EXPECT_CALL(*executable, GetOutputShardings())
         .WillOnce(Return(std::vector<OpSharding>{op_sharding1}));
 
-    std::vector<std::unique_ptr<xla::PjRtLayout>> parameter_layouts;
-    parameter_layouts.push_back(std::make_unique<xla::PjRtXlaLayout>(
+    std::vector<std::shared_ptr<const xla::PjRtLayout>> parameter_layouts;
+    parameter_layouts.push_back(std::make_shared<xla::PjRtXlaLayout>(
         xla::LayoutUtil::MakeDescendingLayout(/*rank=*/1)));
-    parameter_layouts.push_back(std::make_unique<xla::PjRtXlaLayout>(
+    parameter_layouts.push_back(std::make_shared<xla::PjRtXlaLayout>(
         xla::LayoutUtil::MakeDescendingLayout(/*rank=*/2)));
     EXPECT_CALL(*executable, GetParameterLayouts())
         .WillOnce(Return(std::move(parameter_layouts)));
 
-    std::vector<std::unique_ptr<xla::PjRtLayout>> output_layouts;
-    output_layouts.push_back(std::make_unique<xla::PjRtXlaLayout>(
+    std::vector<std::shared_ptr<const xla::PjRtLayout>> output_layouts;
+    output_layouts.push_back(std::make_shared<xla::PjRtXlaLayout>(
         xla::LayoutUtil::MakeDescendingLayout(/*rank=*/2)));
     EXPECT_CALL(*executable, GetOutputLayouts())
         .WillOnce(Return(std::move(output_layouts)));
diff --git a/xla/python/jax_jit.cc b/xla/python/jax_jit.cc
index 46041be0e7eb8d..e6d7ee51ab5f1f 100644
--- a/xla/python/jax_jit.cc
+++ b/xla/python/jax_jit.cc
@@ -197,7 +197,7 @@ std::string CallSignature::DebugString() const {
     out->append(s.DebugString());
   };
   auto layout_formatter = [](std::string* out,
-                             const std::shared_ptr<xla::PjRtLayout>& l) {
+                             const std::shared_ptr<const xla::PjRtLayout>& l) {
     if (l != nullptr) {
       out->append(l->ToString());
     } else {
@@ -252,8 +252,8 @@ bool CallSignature::operator==(const CallSignature& other) const {
       absl::c_equal(dynamic_arg_shardings, other.dynamic_arg_shardings,
                     ShardingEqual) &&
       absl::c_equal(dynamic_arg_layouts, other.dynamic_arg_layouts,
-                    [](const std::shared_ptr<xla::PjRtLayout>& a,
-                       const std::shared_ptr<xla::PjRtLayout>& b) {
+                    [](const std::shared_ptr<const xla::PjRtLayout>& a,
+                       const std::shared_ptr<const xla::PjRtLayout>& b) {
                       return (a && b) ? *a == *b : a == b;
                     }) &&
       (global_extra_jit_context.has_value() ==
diff --git a/xla/python/jax_jit.h b/xla/python/jax_jit.h
index 4fb3775ef823c0..59d35abf0daa18 100644
--- a/xla/python/jax_jit.h
+++ b/xla/python/jax_jit.h
@@ -196,7 +196,7 @@ struct CallSignature {
   std::vector<nanobind::object> dynamic_arg_shardings;
 
   // The layout of the jax.Array arguments.
-  std::vector<std::shared_ptr<xla::PjRtLayout>> dynamic_arg_layouts;
+  std::vector<std::shared_ptr<const xla::PjRtLayout>> dynamic_arg_layouts;
 
   absl::InlinedVector<bool, 2> committed_args;
 
diff --git a/xla/python/pjrt_ifrt/basic_string_array.cc b/xla/python/pjrt_ifrt/basic_string_array.cc
index d3b9fd1be984f5..14914090b5912d 100644
--- a/xla/python/pjrt_ifrt/basic_string_array.cc
+++ b/xla/python/pjrt_ifrt/basic_string_array.cc
@@ -147,6 +147,7 @@ BasicStringArray::BasicStringArray(Client* client, Shape shape,
     : client_(client),
       shape_(std::move(shape)),
       sharding_(std::move(sharding)),
+      layout_(std::make_shared<BasicStringArrayLayout>()),
       buffers_(std::move(buffers)),
       ready_future_(std::move(ready_future)),
       on_done_with_buffer_(std::move(on_done_with_buffer)) {}
@@ -446,12 +447,13 @@ absl::StatusOr<tsl::RCReference<Array>> BasicStringArray::FullyReplicatedShard(
       std::move(buffers_future), std::move(on_done_with_buffer));
 }
 
-absl::StatusOr<std::unique_ptr<PjRtLayout>> BasicStringArray::layout() const {
+absl::StatusOr<std::shared_ptr<const PjRtLayout>> BasicStringArray::layout()
+    const {
   absl::MutexLock lock(&mu_);
   if (is_deleted_) {
     return absl::FailedPreconditionError("Array has already been deleted");
   }
-  return std::make_unique<BasicStringArrayLayout>();
+  return layout_;
 }
 
 std::string BasicStringArray::DebugString() const {
diff --git a/xla/python/pjrt_ifrt/basic_string_array.h b/xla/python/pjrt_ifrt/basic_string_array.h
index a430cfa73fdd26..b3c6ef0caf7e45 100644
--- a/xla/python/pjrt_ifrt/basic_string_array.h
+++ b/xla/python/pjrt_ifrt/basic_string_array.h
@@ -121,7 +121,7 @@ class BasicStringArray final
     return sharding_;
   }
 
-  absl::StatusOr<std::unique_ptr<PjRtLayout>> layout() const override;
+  absl::StatusOr<std::shared_ptr<const PjRtLayout>> layout() const override;
 
   absl::StatusOr<std::vector<tsl::RCReference<Array>>>
   DisassembleIntoSingleDeviceArrays(ArrayCopySemantics semantics) override;
@@ -172,6 +172,7 @@ class BasicStringArray final
   Client* client_;
   Shape shape_;
   std::shared_ptr<const Sharding> sharding_;
+  std::shared_ptr<const PjRtLayout> layout_;
   Future<Buffers> buffers_;
   Future<> ready_future_;
 
diff --git a/xla/python/pjrt_ifrt/pjrt_array.cc b/xla/python/pjrt_ifrt/pjrt_array.cc
index 0c04f21a533464..724703bf47d207 100644
--- a/xla/python/pjrt_ifrt/pjrt_array.cc
+++ b/xla/python/pjrt_ifrt/pjrt_array.cc
@@ -553,7 +553,7 @@ bool PjRtArray::IsDeleted() const {
 
 std::string PjRtArray::DebugString() const {
   DCHECK(this);
-  absl::StatusOr<std::unique_ptr<PjRtLayout>> layout_ptr = layout();
+  absl::StatusOr<std::shared_ptr<const PjRtLayout>> layout_ptr = layout();
   std::string layout_str =
       layout_ptr.ok() ? (*layout_ptr)->ToString() : "<unknown>";
 
@@ -566,12 +566,12 @@ std::string PjRtArray::DebugString() const {
 
 // TODO(b/330198879): populate layout at construction instead of accessing PJRT
 // buffer directly for consistency with Pathways.
-absl::StatusOr<std::unique_ptr<PjRtLayout>> PjRtArray::layout() const {
+absl::StatusOr<std::shared_ptr<const PjRtLayout>> PjRtArray::layout() const {
   CHECK(!pjrt_buffers_.empty());
-  std::unique_ptr<PjRtLayout> layout = pjrt_buffers_[0]->layout();
+  std::shared_ptr<const PjRtLayout> layout = pjrt_buffers_[0]->layout();
 #ifndef NDEBUG
   for (int i = 1; i < pjrt_buffers_.size(); ++i) {
-    std::unique_ptr<PjRtLayout> layout_i = pjrt_buffers_[i]->layout();
+    std::shared_ptr<const PjRtLayout> layout_i = pjrt_buffers_[i]->layout();
     DCHECK(*layout == *layout_i)
         << "PjRtArray has mismatched layouts across shards! "
         << "shard 0: " << layout->ToString() << ", shard " << i << ": "
diff --git a/xla/python/pjrt_ifrt/pjrt_array.h b/xla/python/pjrt_ifrt/pjrt_array.h
index d14747fea550ea..7a88f708248393 100644
--- a/xla/python/pjrt_ifrt/pjrt_array.h
+++ b/xla/python/pjrt_ifrt/pjrt_array.h
@@ -151,7 +151,7 @@ class PjRtArray final
     return sharding_;
   }
 
-  absl::StatusOr<std::unique_ptr<PjRtLayout>> layout() const override;
+  absl::StatusOr<std::shared_ptr<const PjRtLayout>> layout() const override;
 
   absl::StatusOr<std::vector<tsl::RCReference<Array>>>
   DisassembleIntoSingleDeviceArrays(ArrayCopySemantics semantics) override;
diff --git a/xla/python/pjrt_ifrt/pjrt_client.cc b/xla/python/pjrt_ifrt/pjrt_client.cc
index dca9f6381e2e45..171adfa6e9b10e 100644
--- a/xla/python/pjrt_ifrt/pjrt_client.cc
+++ b/xla/python/pjrt_ifrt/pjrt_client.cc
@@ -1116,7 +1116,7 @@ absl::StatusOr<std::shared_ptr<Topology>> PjRtClient::GetTopologyForDevices(
                                                           topology));
 }
 
-absl::StatusOr<std::unique_ptr<PjRtLayout>>
+absl::StatusOr<std::shared_ptr<const PjRtLayout>>
 PjRtClient::GetDefaultLayoutForDevice(DType dtype,
                                       absl::Span<const int64_t> dims,
                                       Device* device) const {
diff --git a/xla/python/pjrt_ifrt/pjrt_client.h b/xla/python/pjrt_ifrt/pjrt_client.h
index 4849f5329e9e07..3f87a7139bddb2 100644
--- a/xla/python/pjrt_ifrt/pjrt_client.h
+++ b/xla/python/pjrt_ifrt/pjrt_client.h
@@ -259,9 +259,9 @@ class PjRtClient final
   absl::StatusOr<std::shared_ptr<Topology>> GetTopologyForDevices(
       const tsl::RCReference<DeviceList>& devices) const override;
 
-  absl::StatusOr<std::unique_ptr<xla::PjRtLayout>> GetDefaultLayoutForDevice(
-      DType dtype, absl::Span<const int64_t> dims,
-      Device* device) const override;
+  absl::StatusOr<std::shared_ptr<const xla::PjRtLayout>>
+  GetDefaultLayoutForDevice(DType dtype, absl::Span<const int64_t> dims,
+                            Device* device) const override;
 
   absl::StatusOr<PjRtCompatibleDevice*> LookupPjRtDevice(
       xla::PjRtDevice* pjrt_device) const override;
diff --git a/xla/python/pjrt_ifrt/pjrt_executable.h b/xla/python/pjrt_ifrt/pjrt_executable.h
index ce83ee0da24de1..cb75494a5a4599 100644
--- a/xla/python/pjrt_ifrt/pjrt_executable.h
+++ b/xla/python/pjrt_ifrt/pjrt_executable.h
@@ -116,13 +116,13 @@ class PjRtExecutable final
     return pjrt_executable_->GetOutputShardings();
   }
 
-  absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
   GetParameterLayouts() const override {
     DCHECK(this);
     return pjrt_executable_->GetParameterLayouts();
   }
 
-  absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
   GetOutputLayouts() const override {
     DCHECK(this);
     return pjrt_executable_->GetOutputLayouts();
@@ -242,13 +242,13 @@ class PjRtLoadedExecutable final
     return pjrt_loaded_executable_->GetOutputShardings();
   }
 
-  absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
   GetParameterLayouts() const override {
     DCHECK(this);
     return pjrt_loaded_executable_->GetParameterLayouts();
   }
 
-  absl::StatusOr<std::vector<std::unique_ptr<xla::PjRtLayout>>>
+  absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
   GetOutputLayouts() const override {
     DCHECK(this);
     return pjrt_loaded_executable_->GetOutputLayouts();
diff --git a/xla/python/py_array.cc b/xla/python/py_array.cc
index a8899b8ea144fe..e917dc3e4294dd 100644
--- a/xla/python/py_array.cc
+++ b/xla/python/py_array.cc
@@ -47,6 +47,7 @@ limitations under the License.
 #include "llvm/Support/Casting.h"
 #include "nanobind/nanobind.h"
 #include "nanobind/stl/optional.h"  // IWYU pragma: keep
+#include "nanobind/stl/shared_ptr.h"  // IWYU pragma: keep
 #include "nanobind/stl/string.h"  // IWYU pragma: keep
 #include "nanobind/stl/string_view.h"  // IWYU pragma: keep
 #include "nanobind/stl/unique_ptr.h"  // IWYU pragma: keep
diff --git a/xla/python/py_array.h b/xla/python/py_array.h
index 61987eb985e003..d3bf0ca3337966 100644
--- a/xla/python/py_array.h
+++ b/xla/python/py_array.h
@@ -171,7 +171,7 @@ class PyArray : public nanobind::object {
 
   const nanobind::object& sharding() const { return GetStorage().sharding; }
 
-  absl::StatusOr<std::unique_ptr<PjRtLayout>> layout() {
+  absl::StatusOr<std::shared_ptr<const PjRtLayout>> layout() {
     return ifrt_array()->layout();
   }
 
diff --git a/xla/python/py_client.cc b/xla/python/py_client.cc
index f900fe09170092..6d9cf48173aaff 100644
--- a/xla/python/py_client.cc
+++ b/xla/python/py_client.cc
@@ -777,7 +777,8 @@ PyType_Slot PyClient::slots_[] = {
       .def(
           "get_default_layout",
           [](PyClient& self, nb_dtype dtype, nb::sequence shard_shape,
-             nb_class_ptr<PyDevice> device) -> std::unique_ptr<PjRtLayout> {
+             nb_class_ptr<PyDevice> device)
+              -> std::shared_ptr<const PjRtLayout> {
             ifrt::DType ifrt_type = xla::ValueOrThrow(DtypeToIfRtDType(dtype));
             std::vector<int64_t> dims = SequenceToVector<int64_t>(shard_shape);
             return xla::ValueOrThrow(
diff --git a/xla/python/py_compile_only_client.cc b/xla/python/py_compile_only_client.cc
index d366ef93c096bf..a31e732a84ee11 100644
--- a/xla/python/py_compile_only_client.cc
+++ b/xla/python/py_compile_only_client.cc
@@ -336,7 +336,7 @@ class CompileOnlyIfRtClient final
     return topology_;
   }
 
-  absl::StatusOr<std::unique_ptr<PjRtLayout>> GetDefaultLayoutForDevice(
+  absl::StatusOr<std::shared_ptr<const PjRtLayout>> GetDefaultLayoutForDevice(
       ifrt::DType dtype, absl::Span<const int64_t> dims,
       ifrt::Device* device) const override {
     TF_ASSIGN_OR_RETURN(PrimitiveType element_type, ToPrimitiveType(dtype));
diff --git a/xla/python/py_executable.cc b/xla/python/py_executable.cc
index 7326521695c7bc..bd582d3035cf58 100644
--- a/xla/python/py_executable.cc
+++ b/xla/python/py_executable.cc
@@ -415,13 +415,13 @@ PyLoadedExecutable::GetOutputMemoryKinds() const {
   return ifrt_loaded_executable_->GetOutputMemoryKinds();
 }
 
-absl::StatusOr<std::vector<std::unique_ptr<PjRtLayout>>>
+absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
 PyLoadedExecutable::GetParameterLayouts() const {
   nb::gil_scoped_release gil_release;
   return ifrt_loaded_executable_->GetParameterLayouts();
 }
 
-absl::StatusOr<std::vector<std::unique_ptr<PjRtLayout>>>
+absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
 PyLoadedExecutable::GetOutputLayouts() const {
   nb::gil_scoped_release gil_release;
   return ifrt_loaded_executable_->GetOutputLayouts();
diff --git a/xla/python/py_executable.h b/xla/python/py_executable.h
index 9af7a4a7839702..480f33d99d95a9 100644
--- a/xla/python/py_executable.h
+++ b/xla/python/py_executable.h
@@ -189,11 +189,11 @@ class PyLoadedExecutable {
   absl::StatusOr<std::vector<std::vector<absl::string_view>>>
   GetOutputMemoryKinds() const;
 
-  absl::StatusOr<std::vector<std::unique_ptr<PjRtLayout>>> GetParameterLayouts()
-      const;
+  absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
+  GetParameterLayouts() const;
 
-  absl::StatusOr<std::vector<std::unique_ptr<PjRtLayout>>> GetOutputLayouts()
-      const;
+  absl::StatusOr<std::vector<std::shared_ptr<const PjRtLayout>>>
+  GetOutputLayouts() const;
 
   std::optional<std::vector<OpSharding>> GetParameterShardings() const;
 
diff --git a/xla/tools/multihost_hlo_runner/functional_hlo_runner.cc b/xla/tools/multihost_hlo_runner/functional_hlo_runner.cc
index 023252fd8c690b..3101f288cf6775 100644
--- a/xla/tools/multihost_hlo_runner/functional_hlo_runner.cc
+++ b/xla/tools/multihost_hlo_runner/functional_hlo_runner.cc
@@ -1307,13 +1307,13 @@ FunctionalHloRunner::CopyArgumentsToDevice(
     TF_RET_CHECK(!shape.IsTuple()) << "Param tuple without flattened_arguments";
     return non_tuple_memory_space(shape);
   };
-  TF_ASSIGN_OR_RETURN(const std::vector<std::unique_ptr<PjRtLayout>>&
+  TF_ASSIGN_OR_RETURN(const std::vector<std::shared_ptr<const PjRtLayout>>&
                           executable_parameter_pjrt_layouts,
                       executable->GetParameterLayouts());
   std::vector<Layout> executable_parameter_layouts;
   executable_parameter_layouts.reserve(
       executable_parameter_pjrt_layouts.size());
-  for (const std::unique_ptr<PjRtLayout>& pjrt_layout :
+  for (const std::shared_ptr<const PjRtLayout>& pjrt_layout :
        executable_parameter_pjrt_layouts) {
     executable_parameter_layouts.push_back(
         xla::GetXlaLayoutUnsafe(pjrt_layout));