From 68f1c4d75d28d15137c18eb8d235dd6f3c56a065 Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Fri, 6 Jun 2025 17:06:49 -0700 Subject: [PATCH 01/10] Update PyTorch/XLA to 20250606 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 78d666fa..b76a3aa5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dev = [ tp = "torchprime.launcher.cli:cli" [tool.torchprime] -torch_xla_version = "20250501" +torch_xla_version = "20250606" [tool.setuptools.packages.find] where = [""] From 923b32559fab8539dae4ca4688f740223503fb81 Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Fri, 6 Jun 2025 20:31:25 -0700 Subject: [PATCH 02/10] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b76a3aa5..892c8847 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dev = [ tp = "torchprime.launcher.cli:cli" [tool.torchprime] -torch_xla_version = "20250606" +torch_xla_version = "20250528" [tool.setuptools.packages.find] where = [""] From a2c594a0bddd43857594337dd8868ad30a099cab Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Fri, 6 Jun 2025 21:07:32 -0700 Subject: [PATCH 03/10] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 892c8847..57bfe9a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dev = [ tp = "torchprime.launcher.cli:cli" [tool.torchprime] -torch_xla_version = "20250528" +torch_xla_version = "20250530" [tool.setuptools.packages.find] where = [""] From 23d9db1ccec57d1dbedc0160602eb4980b76ab1e Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Fri, 6 Jun 2025 22:10:09 -0700 Subject: [PATCH 04/10] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 57bfe9a9..d9d159c3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dev = [ tp = "torchprime.launcher.cli:cli" [tool.torchprime] -torch_xla_version = "20250530" +torch_xla_version = "20250602" [tool.setuptools.packages.find] where = [""] From 918ebdb96dcb0ef32a7f6caf3df3599abb1b1040 Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Fri, 6 Jun 2025 22:44:26 -0700 Subject: [PATCH 05/10] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d9d159c3..9d9df99e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dev = [ tp = "torchprime.launcher.cli:cli" [tool.torchprime] -torch_xla_version = "20250602" +torch_xla_version = "20250601" [tool.setuptools.packages.find] where = [""] From 0fdf9cf64725d7892586c6a973e55243896d14b4 Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Fri, 6 Jun 2025 23:16:39 -0700 Subject: [PATCH 06/10] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9d9df99e..4ac35778 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dev = [ tp = "torchprime.launcher.cli:cli" [tool.torchprime] -torch_xla_version = "20250601" +torch_xla_version = "20250531" [tool.setuptools.packages.find] where = [""] From 5ca325338ce2847aea12b37baee9fdc58f974534 Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Sat, 7 Jun 2025 23:11:06 -0700 Subject: [PATCH 07/10] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 4ac35778..b76a3aa5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dev = [ tp = "torchprime.launcher.cli:cli" [tool.torchprime] -torch_xla_version = "20250531" +torch_xla_version = "20250606" [tool.setuptools.packages.find] where = [""] From 02a8d99c04be460e130c3be2cbc626ca1e145282 Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Sat, 7 Jun 2025 23:19:52 -0700 Subject: [PATCH 08/10] update --- torchprime/launcher/thunk.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/torchprime/launcher/thunk.py b/torchprime/launcher/thunk.py index 3fc4b6f9..b0fff59a 100644 --- a/torchprime/launcher/thunk.py +++ b/torchprime/launcher/thunk.py @@ -14,6 +14,10 @@ # `--megascale_grpc_enable_xor_tracer=false` flag when libtpu is updated xla_flags = os.environ.get("LIBTPU_INIT_ARGS", "") xla_flags = f"{xla_flags} --megascale_grpc_enable_xor_tracer=false" + +# Workaround for MegaScale perf regression +# TODO(b/NNN): Remove the `--megascale_grpc_num_channels` override +xla_flags = f"{xla_flags} --megascale_grpc_num_channels=64" os.environ["LIBTPU_INIT_ARGS"] = xla_flags # Get the artifact dir from env var. From bc8aed4cd32cc29781bbba91448467abc046ccb3 Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Sun, 8 Jun 2025 13:30:54 -0700 Subject: [PATCH 09/10] Update thunk.py --- torchprime/launcher/thunk.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/torchprime/launcher/thunk.py b/torchprime/launcher/thunk.py index b0fff59a..5ec90b0b 100644 --- a/torchprime/launcher/thunk.py +++ b/torchprime/launcher/thunk.py @@ -16,7 +16,9 @@ xla_flags = f"{xla_flags} --megascale_grpc_enable_xor_tracer=false" # Workaround for MegaScale perf regression -# TODO(b/NNN): Remove the `--megascale_grpc_num_channels` override +# +# TODO(b/423386767): Remove the `--megascale_grpc_num_channels` override +# when the perf regression is fixed. xla_flags = f"{xla_flags} --megascale_grpc_num_channels=64" os.environ["LIBTPU_INIT_ARGS"] = xla_flags From 4755f4e1a2acf0c2c4de10e2c47de7ec725477d8 Mon Sep 17 00:00:00 2001 From: Yifei Teng Date: Mon, 9 Jun 2025 09:57:53 -0700 Subject: [PATCH 10/10] Update thunk.py --- torchprime/launcher/thunk.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/torchprime/launcher/thunk.py b/torchprime/launcher/thunk.py index 5ec90b0b..9203305a 100644 --- a/torchprime/launcher/thunk.py +++ b/torchprime/launcher/thunk.py @@ -17,8 +17,9 @@ # Workaround for MegaScale perf regression # -# TODO(b/423386767): Remove the `--megascale_grpc_num_channels` override -# when the perf regression is fixed. +# TODO(https://github.com/AI-Hypercomputer/torchprime/issues/300): +# Remove the `--megascale_grpc_num_channels` override when the perf +# regression is fixed. xla_flags = f"{xla_flags} --megascale_grpc_num_channels=64" os.environ["LIBTPU_INIT_ARGS"] = xla_flags