From 5d68632ee979ee52aa9092d364679f298d8e43aa Mon Sep 17 00:00:00 2001
From: landscapepainter <34902420+landscapepainter@users.noreply.github.com>
Date: Sun, 18 Aug 2024 14:29:02 -0700
Subject: [PATCH 01/12] [SkyServe][Test] Fix
 test_smoke.py::test_skyserve_new_autoscaler_update (#3824)

* fix test_smoke.py::test_skyserve_new_autoscaler_update

* nit

* format

* move out the comment from run at new_autoscaler_after.yaml

* revert comment

* revert templating

* nit

* nit
---
 tests/skyserve/update/new_autoscaler_after.yaml  | 3 +--
 tests/skyserve/update/new_autoscaler_before.yaml | 3 +--
 tests/test_smoke.py                              | 4 ++--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/tests/skyserve/update/new_autoscaler_after.yaml b/tests/skyserve/update/new_autoscaler_after.yaml
index f5a2e552f67..2d12d3ef109 100644
--- a/tests/skyserve/update/new_autoscaler_after.yaml
+++ b/tests/skyserve/update/new_autoscaler_after.yaml
@@ -8,7 +8,6 @@ service:
     base_ondemand_fallback_replicas: 1
 
 resources:
-  cloud: gcp
   ports: 8081
   use_spot: true
   cpus: 2+
@@ -22,4 +21,4 @@ run: |
     # blue-green update.
     sleep 120
   fi
-  python3 server.py
+  python3 server.py --port 8081
diff --git a/tests/skyserve/update/new_autoscaler_before.yaml b/tests/skyserve/update/new_autoscaler_before.yaml
index a91c3cd230a..793221080ae 100644
--- a/tests/skyserve/update/new_autoscaler_before.yaml
+++ b/tests/skyserve/update/new_autoscaler_before.yaml
@@ -5,10 +5,9 @@ service:
   replicas: 2
 
 resources:
-  cloud: gcp
   ports: 8081
   cpus: 2+
 
 workdir: examples/serve/http_server
 
-run: python3 server.py
+run: python3 server.py --port 8081
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index 33280e1fd4c..874e51e0f9c 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -3952,7 +3952,7 @@ def test_skyserve_update_autoscale(generic_cloud: str):
 @pytest.mark.parametrize('mode', ['rolling', 'blue_green'])
 def test_skyserve_new_autoscaler_update(mode: str, generic_cloud: str):
     """Test skyserve with update that changes autoscaler"""
-    name = _get_service_name() + mode
+    name = f'{_get_service_name()}-{mode}'
 
     wait_until_no_pending = (
         f's=$(sky serve status {name}); echo "$s"; '
@@ -3982,7 +3982,7 @@ def test_skyserve_new_autoscaler_update(mode: str, generic_cloud: str):
             _check_service_version(name, "1"),
         ]
     test = Test(
-        'test-skyserve-new-autoscaler-update',
+        f'test-skyserve-new-autoscaler-update-{mode}',
         [
             f'sky serve up -n {name} --cloud {generic_cloud} -y tests/skyserve/update/new_autoscaler_before.yaml',
             _SERVE_WAIT_UNTIL_READY.format(name=name, replica_num=2) +

From 0f8882e358fbbf3b97dbe2ef17291cd1e708cdd9 Mon Sep 17 00:00:00 2001
From: Tian Xia <cblmemo@gmail.com>
Date: Mon, 19 Aug 2024 19:54:04 +0800
Subject: [PATCH 02/12] [Catalog] Fix Azure catalog duplications (#3842)

fix
---
 sky/clouds/service_catalog/data_fetchers/fetch_azure.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sky/clouds/service_catalog/data_fetchers/fetch_azure.py b/sky/clouds/service_catalog/data_fetchers/fetch_azure.py
index 9a7b2a90bee..bbd337e23aa 100644
--- a/sky/clouds/service_catalog/data_fetchers/fetch_azure.py
+++ b/sky/clouds/service_catalog/data_fetchers/fetch_azure.py
@@ -140,8 +140,12 @@ def get_pricing_df(region: Optional[str] = None) -> 'pd.DataFrame':
     print(f'Done fetching pricing {region}')
     df = pd.DataFrame(all_items)
     assert 'productName' in df.columns, (region, df.columns)
-    return df[(~df['productName'].str.contains(' Windows')) &
-              (df['unitPrice'] > 0)]
+    # Filter out the cloud services and windows products.
+    # Some H100 series use ' Win' instead of ' Windows', e.g.
+    # Virtual Machines NCCadsv5 Srs Win
+    return df[
+        (~df['productName'].str.contains(' Win| Cloud Services| CloudServices'))
+        & (df['unitPrice'] > 0)]
 
 
 def get_sku_df(region_set: Set[str]) -> 'pd.DataFrame':

From fffeacda56a96b71666724ddab13e89f90946943 Mon Sep 17 00:00:00 2001
From: Tian Xia <cblmemo@gmail.com>
Date: Mon, 19 Aug 2024 21:26:13 +0800
Subject: [PATCH 03/12] [Core][TPU] Support TPU V5 (#3814)

* init

* add v5p in europe-west4-b

* reset tpu v5 default runtime version

* resolve comments

* add comment for pricing ingo

* apply suggestions from code review

* refactor comment
---
 .../data_fetchers/fetch_gcp.py                | 151 ++++++++++++++++--
 sky/resources.py                              |  15 +-
 2 files changed, 152 insertions(+), 14 deletions(-)

diff --git a/sky/clouds/service_catalog/data_fetchers/fetch_gcp.py b/sky/clouds/service_catalog/data_fetchers/fetch_gcp.py
index 9578196b4eb..5b680500c75 100644
--- a/sky/clouds/service_catalog/data_fetchers/fetch_gcp.py
+++ b/sky/clouds/service_catalog/data_fetchers/fetch_gcp.py
@@ -54,6 +54,110 @@
  ,tpu-v3-1024,1,,,tpu-v3-1024,1024.0,307.2,us-east1,us-east1-d
  ,tpu-v3-2048,1,,,tpu-v3-2048,2048.0,614.4,us-east1,us-east1-d
  """)))
+
+# TPU V5 is not visible in specific zones. We hardcode the missing zones here.
+# NOTE(dev): Keep the zones and the df in sync.
+TPU_V5_MISSING_ZONES_DF = {
+    'europe-west4-b': pd.read_csv(
+        io.StringIO(
+            textwrap.dedent("""\
+ AcceleratorName,AcceleratorCount,Region,AvailabilityZone
+ tpu-v5p-8,1,europe-west4,europe-west4-b
+ tpu-v5p-16,1,europe-west4,europe-west4-b
+ tpu-v5p-32,1,europe-west4,europe-west4-b
+ tpu-v5p-64,1,europe-west4,europe-west4-b
+ tpu-v5p-128,1,europe-west4,europe-west4-b
+ tpu-v5p-256,1,europe-west4,europe-west4-b
+ tpu-v5p-384,1,europe-west4,europe-west4-b
+ tpu-v5p-512,1,europe-west4,europe-west4-b
+ tpu-v5p-640,1,europe-west4,europe-west4-b
+ tpu-v5p-768,1,europe-west4,europe-west4-b
+ tpu-v5p-896,1,europe-west4,europe-west4-b
+ tpu-v5p-1024,1,europe-west4,europe-west4-b
+ tpu-v5p-1152,1,europe-west4,europe-west4-b
+ tpu-v5p-1280,1,europe-west4,europe-west4-b
+ tpu-v5p-1408,1,europe-west4,europe-west4-b
+ tpu-v5p-1536,1,europe-west4,europe-west4-b
+ tpu-v5p-1664,1,europe-west4,europe-west4-b
+ tpu-v5p-1792,1,europe-west4,europe-west4-b
+ tpu-v5p-1920,1,europe-west4,europe-west4-b
+ tpu-v5p-2048,1,europe-west4,europe-west4-b
+ tpu-v5p-2176,1,europe-west4,europe-west4-b
+ tpu-v5p-2304,1,europe-west4,europe-west4-b
+ tpu-v5p-2432,1,europe-west4,europe-west4-b
+ tpu-v5p-2560,1,europe-west4,europe-west4-b
+ tpu-v5p-2688,1,europe-west4,europe-west4-b
+ tpu-v5p-2816,1,europe-west4,europe-west4-b
+ tpu-v5p-2944,1,europe-west4,europe-west4-b
+ tpu-v5p-3072,1,europe-west4,europe-west4-b
+ tpu-v5p-3200,1,europe-west4,europe-west4-b
+ tpu-v5p-3328,1,europe-west4,europe-west4-b
+ tpu-v5p-3456,1,europe-west4,europe-west4-b
+ tpu-v5p-3584,1,europe-west4,europe-west4-b
+ tpu-v5p-3712,1,europe-west4,europe-west4-b
+ tpu-v5p-3840,1,europe-west4,europe-west4-b
+ tpu-v5p-3968,1,europe-west4,europe-west4-b
+ tpu-v5p-4096,1,europe-west4,europe-west4-b
+ tpu-v5p-4224,1,europe-west4,europe-west4-b
+ tpu-v5p-4352,1,europe-west4,europe-west4-b
+ tpu-v5p-4480,1,europe-west4,europe-west4-b
+ tpu-v5p-4608,1,europe-west4,europe-west4-b
+ tpu-v5p-4736,1,europe-west4,europe-west4-b
+ tpu-v5p-4864,1,europe-west4,europe-west4-b
+ tpu-v5p-4992,1,europe-west4,europe-west4-b
+ tpu-v5p-5120,1,europe-west4,europe-west4-b
+ tpu-v5p-5248,1,europe-west4,europe-west4-b
+ tpu-v5p-5376,1,europe-west4,europe-west4-b
+ tpu-v5p-5504,1,europe-west4,europe-west4-b
+ tpu-v5p-5632,1,europe-west4,europe-west4-b
+ tpu-v5p-5760,1,europe-west4,europe-west4-b
+ tpu-v5p-5888,1,europe-west4,europe-west4-b
+ tpu-v5p-6016,1,europe-west4,europe-west4-b
+ tpu-v5p-6144,1,europe-west4,europe-west4-b
+ tpu-v5p-6272,1,europe-west4,europe-west4-b
+ tpu-v5p-6400,1,europe-west4,europe-west4-b
+ tpu-v5p-6528,1,europe-west4,europe-west4-b
+ tpu-v5p-6656,1,europe-west4,europe-west4-b
+ tpu-v5p-6784,1,europe-west4,europe-west4-b
+ tpu-v5p-6912,1,europe-west4,europe-west4-b
+ tpu-v5p-7040,1,europe-west4,europe-west4-b
+ tpu-v5p-7168,1,europe-west4,europe-west4-b
+ tpu-v5p-7296,1,europe-west4,europe-west4-b
+ tpu-v5p-7424,1,europe-west4,europe-west4-b
+ tpu-v5p-7552,1,europe-west4,europe-west4-b
+ tpu-v5p-7680,1,europe-west4,europe-west4-b
+ tpu-v5p-7808,1,europe-west4,europe-west4-b
+ tpu-v5p-7936,1,europe-west4,europe-west4-b
+ tpu-v5p-8064,1,europe-west4,europe-west4-b
+ tpu-v5p-8192,1,europe-west4,europe-west4-b
+ tpu-v5p-8320,1,europe-west4,europe-west4-b
+ tpu-v5p-8448,1,europe-west4,europe-west4-b
+ tpu-v5p-8704,1,europe-west4,europe-west4-b
+ tpu-v5p-8832,1,europe-west4,europe-west4-b
+ tpu-v5p-8960,1,europe-west4,europe-west4-b
+ tpu-v5p-9216,1,europe-west4,europe-west4-b
+ tpu-v5p-9472,1,europe-west4,europe-west4-b
+ tpu-v5p-9600,1,europe-west4,europe-west4-b
+ tpu-v5p-9728,1,europe-west4,europe-west4-b
+ tpu-v5p-9856,1,europe-west4,europe-west4-b
+ tpu-v5p-9984,1,europe-west4,europe-west4-b
+ tpu-v5p-10240,1,europe-west4,europe-west4-b
+ tpu-v5p-10368,1,europe-west4,europe-west4-b
+ tpu-v5p-10496,1,europe-west4,europe-west4-b
+ tpu-v5p-10752,1,europe-west4,europe-west4-b
+ tpu-v5p-10880,1,europe-west4,europe-west4-b
+ tpu-v5p-11008,1,europe-west4,europe-west4-b
+ tpu-v5p-11136,1,europe-west4,europe-west4-b
+ tpu-v5p-11264,1,europe-west4,europe-west4-b
+ tpu-v5p-11520,1,europe-west4,europe-west4-b
+ tpu-v5p-11648,1,europe-west4,europe-west4-b
+ tpu-v5p-11776,1,europe-west4,europe-west4-b
+ tpu-v5p-11904,1,europe-west4,europe-west4-b
+ tpu-v5p-12032,1,europe-west4,europe-west4-b
+ tpu-v5p-12160,1,europe-west4,europe-west4-b
+ tpu-v5p-12288,1,europe-west4,europe-west4-b
+ """)))
+}
 # FIXME(woosuk): Remove this once the bug is fixed.
 # See https://github.com/skypilot-org/skypilot/issues/1759#issue-1619614345
 TPU_V4_HOST_DF = pd.read_csv(
@@ -415,6 +519,12 @@ def get_gpu_price(row: pd.Series, spot: bool) -> Optional[float]:
 
 
 def _get_tpu_for_zone(zone: str) -> 'pd.DataFrame':
+    # Use hardcoded TPU V5 data as it is invisible in some zones.
+    missing_tpus_df = pd.DataFrame(columns=[
+        'AcceleratorName', 'AcceleratorCount', 'Region', 'AvailabilityZone'
+    ])
+    if zone in TPU_V5_MISSING_ZONES_DF:
+        missing_tpus_df = TPU_V5_MISSING_ZONES_DF[zone]
     tpus = []
     parent = f'projects/{project_id}/locations/{zone}'
     tpus_request = tpu_client.projects().locations().acceleratorTypes().list(
@@ -432,16 +542,14 @@ def _get_tpu_for_zone(zone: str) -> 'pd.DataFrame':
     new_tpus = []
     for tpu in tpus:
         tpu_name = tpu['type']
-        # skip tpu v5 as we currently don't support it
-        if 'v5' in tpu_name:
-            continue
         new_tpus.append({
             'AcceleratorName': f'tpu-{tpu_name}',
             'AcceleratorCount': 1,
             'Region': zone.rpartition('-')[0],
             'AvailabilityZone': zone,
         })
-    return pd.DataFrame(new_tpus).reset_index(drop=True)
+    new_tpu_df = pd.DataFrame(new_tpus).reset_index(drop=True)
+    return pd.concat([new_tpu_df, missing_tpus_df])
 
 
 def _get_tpus() -> 'pd.DataFrame':
@@ -458,11 +566,22 @@ def _get_tpus() -> 'pd.DataFrame':
 
 
 # TODO: the TPUs fetched fails to contain us-east1
-def get_tpu_df(skus: List[Dict[str, Any]]) -> 'pd.DataFrame':
+def get_tpu_df(gce_skus: List[Dict[str, Any]],
+               tpu_skus: List[Dict[str, Any]]) -> 'pd.DataFrame':
     df = _get_tpus()
     if df.empty:
         return df
 
+    def _get_tpu_description_str(tpu_version: str) -> str:
+        # TPU V5 has a different naming convention since it is contained in
+        # the GCE SKUs. v5p -> TpuV5p, v5litepod -> TpuV5e.
+        if tpu_version.startswith('v5'):
+            if tpu_version == 'v5p':
+                return 'TpuV5p'
+            assert tpu_version == 'v5litepod', tpu_version
+            return 'TpuV5e'
+        return f'Tpu-{tpu_version}'
+
     def get_tpu_price(row: pd.Series, spot: bool) -> Optional[float]:
         assert row['AcceleratorCount'] == 1, row
         tpu_price = None
@@ -475,9 +594,12 @@ def get_tpu_price(row: pd.Series, spot: bool) -> Optional[float]:
         # whether the TPU is a single device or a pod.
         # For TPU-v4, the pricing is uniform, and thus the pricing API
         # only provides the price of TPU-v4 pods.
-        is_pod = num_cores > 8 or tpu_version == 'v4'
+        # The price shown for v5 TPU is per chip hour, so there is no 'Pod'
+        # keyword in the description.
+        is_pod = ((num_cores > 8 or tpu_version == 'v4') and
+                  not tpu_version.startswith('v5'))
 
-        for sku in skus:
+        for sku in gce_skus + tpu_skus:
             if tpu_region not in sku['serviceRegions']:
                 continue
             description = sku['description']
@@ -489,7 +611,7 @@ def get_tpu_price(row: pd.Series, spot: bool) -> Optional[float]:
                 if 'Preemptible' in description:
                     continue
 
-            if f'Tpu-{tpu_version}' not in description:
+            if _get_tpu_description_str(tpu_version) not in description:
                 continue
             if is_pod:
                 if 'Pod' not in description:
@@ -500,7 +622,15 @@ def get_tpu_price(row: pd.Series, spot: bool) -> Optional[float]:
 
             unit_price = _get_unit_price(sku)
             tpu_device_price = unit_price
-            tpu_core_price = tpu_device_price / 8
+            # v5p naming convention is v$VERSION_NUMBERp-$CORES_COUNT, while
+            # v5e is v$VERSION_NUMBER-$CHIP_COUNT. In the same time, V5 price
+            # is shown as per chip price, which is 2 cores for v5p and 1 core
+            # for v5e. Reference here:
+            # https://cloud.google.com/tpu/docs/v5p#using-accelerator-type
+            # https://cloud.google.com/tpu/docs/v5e#tpu-v5e-config
+            core_per_sku = (1 if tpu_version == 'v5litepod' else
+                            2 if tpu_version == 'v5p' else 8)
+            tpu_core_price = tpu_device_price / core_per_sku
             tpu_price = num_cores * tpu_core_price
             break
 
@@ -546,7 +676,8 @@ def get_catalog_df(region_prefix: str) -> 'pd.DataFrame':
         region_prefix)] if not gpu_df.empty else gpu_df
 
     gcp_tpu_skus = get_skus(TPU_SERVICE_ID)
-    tpu_df = get_tpu_df(gcp_tpu_skus)
+    # TPU V5 SKU is not included in the TPU SKUs but in the GCE SKUs.
+    tpu_df = get_tpu_df(gcp_skus, gcp_tpu_skus)
 
     # Merge the dataframes.
     df = pd.concat([vm_df, gpu_df, tpu_df, TPU_V4_HOST_DF])
diff --git a/sky/resources.py b/sky/resources.py
index f0cb1abda1e..2f19cd1aa01 100644
--- a/sky/resources.py
+++ b/sky/resources.py
@@ -578,10 +578,17 @@ def _set_accelerators(
                                 'Cannot specify instance type'
                                 f' (got "{self.instance_type}") for TPU VM.')
                 if 'runtime_version' not in accelerator_args:
-                    if use_tpu_vm:
-                        accelerator_args['runtime_version'] = 'tpu-vm-base'
-                    else:
-                        accelerator_args['runtime_version'] = '2.12.0'
+
+                    def _get_default_runtime_version() -> str:
+                        if not use_tpu_vm:
+                            return '2.12.0'
+                        # TPU V5 requires a newer runtime version.
+                        if acc.startswith('tpu-v5'):
+                            return 'v2-alpha-tpuv5'
+                        return 'tpu-vm-base'
+
+                    accelerator_args['runtime_version'] = (
+                        _get_default_runtime_version())
                     logger.info(
                         'Missing runtime_version in accelerator_args, using'
                         f' default ({accelerator_args["runtime_version"]})')

From 90de1b2564d16463a49f198199da4dc3e9540695 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Mon, 19 Aug 2024 16:50:53 -0700
Subject: [PATCH 04/12] [Docs] Fix imgur links (#3846)

* Fix imgur links

* Remove unecessary file

* revert
---
 docs/source/examples/interactive-development.rst | 2 +-
 llm/codellama/README.md                          | 4 ++--
 llm/falcon/README.md                             | 2 +-
 llm/gpt-2/README.md                              | 8 ++++----
 llm/llama-2/README.md                            | 2 +-
 llm/llama-3/README.md                            | 4 ++--
 llm/llama-3_1-finetuning/readme.md               | 6 +++---
 llm/lorax/README.md                              | 2 +-
 llm/vicuna-llama-2/README.md                     | 6 +++---
 llm/vllm/README.md                               | 2 +-
 10 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/docs/source/examples/interactive-development.rst b/docs/source/examples/interactive-development.rst
index cc50f8e6ea8..40920934597 100644
--- a/docs/source/examples/interactive-development.rst
+++ b/docs/source/examples/interactive-development.rst
@@ -110,7 +110,7 @@ This is supported by simply connecting VSCode to the cluster with the cluster na
 
 For more details, please refer to the `VSCode documentation <https://code.visualstudio.com/docs/remote/ssh-tutorial>`__.
 
-.. image:: https://imgur.com/8mKfsET.gif
+.. image:: https://i.imgur.com/8mKfsET.gif
   :align: center
   :alt: Connect to the cluster with VSCode
 
diff --git a/llm/codellama/README.md b/llm/codellama/README.md
index 8e5025d22b5..f145fd062ff 100644
--- a/llm/codellama/README.md
+++ b/llm/codellama/README.md
@@ -10,14 +10,14 @@ The followings are the demos of Code Llama 70B hosted by SkyPilot Serve (aka Sky
 ## Demos
 <figure>
 <center>
-<img src="https://imgur.com/fguAmP0.gif" width="60%" title="Coding Assistant: Connect to hosted Code Llama with Tabby in VScode" />
+<img src="https://i.imgur.com/fguAmP0.gif" width="60%" title="Coding Assistant: Connect to hosted Code Llama with Tabby in VScode" />
 
 <figcaption>Coding Assistant: Connect to hosted Code Llama with Tabby in VScode</figcaption>
 </figure>
 
 <figure>
 <center>
-<img src="https://imgur.com/Dor1MoE.gif" width="60%" title="Chat: Connect to hosted Code Llama with FastChat" />
+<img src="https://i.imgur.com/Dor1MoE.gif" width="60%" title="Chat: Connect to hosted Code Llama with FastChat" />
 
 <figcaption>Chat: Connect to hosted Code Llama with FastChat</figcaption>
 </figure>
diff --git a/llm/falcon/README.md b/llm/falcon/README.md
index 837e93f5558..6eb480d9ea8 100644
--- a/llm/falcon/README.md
+++ b/llm/falcon/README.md
@@ -50,7 +50,7 @@ sky launch -c falcon -s falcon.yaml --no-use-spot
 
 For reference, below is a loss graph you may expect to see, and the amount of time and the approximate cost of fine-tuning each of the models over 500 epochs (assuming a spot instance A100 GPU rate at $1.1 / hour and a A100-80GB rate of $1.61 / hour):
 
-<img width="524" alt="image" src="https://imgur.com/BDlHink.png">
+<img width="524" alt="image" src="https://i.imgur.com/BDlHink.png">
 
 1. `ybelkada/falcon-7b-sharded-bf16`: 2.5 to 3 hours using 1 A100 spot GPU; total cost ≈ $3.3.
 
diff --git a/llm/gpt-2/README.md b/llm/gpt-2/README.md
index bc9893fec5b..10fa2cf6998 100644
--- a/llm/gpt-2/README.md
+++ b/llm/gpt-2/README.md
@@ -28,14 +28,14 @@ Run the following command to start GPT-2 (124M) training on a GPU VM with 8 A100
 sky launch -c gpt2 gpt2.yaml
 ```
 
-![GPT-2 training with 8 A100 GPUs](https://imgur.com/v8SGpsF.png)
+![GPT-2 training with 8 A100 GPUs](https://i.imgur.com/v8SGpsF.png)
 
 Or, you can train the model with a single A100, by adding `--gpus A100`:
 ```bash
 sky launch -c gpt2 gpt2.yaml --gpus A100
 ```
 
-![GPT-2 training with a single A100](https://imgur.com/hN65g4r.png)
+![GPT-2 training with a single A100](https://i.imgur.com/hN65g4r.png)
 
 
 It is also possible to speed up the training of the model on 8 H100 (2.3x more tok/s than 8x A100s):
@@ -43,7 +43,7 @@ It is also possible to speed up the training of the model on 8 H100 (2.3x more t
 sky launch -c gpt2 gpt2.yaml --gpus H100:8
 ```
 
-![GPT-2 training with 8 H100](https://imgur.com/STbi80b.png)
+![GPT-2 training with 8 H100](https://i.imgur.com/STbi80b.png)
 
 ### Download logs and visualizations
 
@@ -54,7 +54,7 @@ scp -r gpt2:~/llm.c/log124M .
 We can visualize the training progress with the notebook provided in [llm.c](https://github.com/karpathy/llm.c/blob/master/dev/vislog.ipynb). (Note: we cut off the training after 10K steps, which already achieve similar validation loss as OpenAI GPT-2 checkpoint.)
 
 <div align="center">
-<img src="https://imgur.com/lskPEAQ.png" width="60%">
+<img src="https://i.imgur.com/lskPEAQ.png" width="60%">
 </div>
 
 > Yes! We are able to reproduce the training of GPT-2 (124M) on any cloud with SkyPilot.
diff --git a/llm/llama-2/README.md b/llm/llama-2/README.md
index d8f8151572e..4f1a8f60cae 100644
--- a/llm/llama-2/README.md
+++ b/llm/llama-2/README.md
@@ -94,6 +94,6 @@ You can also host the official FAIR model without using huggingface and gradio.
     ```
 
 3. Open http://localhost:7681 in your browser and start chatting!
-<img src="https://imgur.com/Ay8sDhG.png" alt="LLaMA chatbot running on the cloud via SkyPilot"/>
+<img src="https://i.imgur.com/Ay8sDhG.png" alt="LLaMA chatbot running on the cloud via SkyPilot"/>
 
 
diff --git a/llm/llama-3/README.md b/llm/llama-3/README.md
index d0c28dc93c6..ef19d94b5c0 100644
--- a/llm/llama-3/README.md
+++ b/llm/llama-3/README.md
@@ -5,7 +5,7 @@
 
 
 <p align="center">
-<img src="https://imgur.com/1NEZs9f.png" alt="Llama-3 x SkyPilot" style="width: 50%;">
+<img src="https://i.imgur.com/1NEZs9f.png" alt="Llama-3 x SkyPilot" style="width: 50%;">
 </p>
 
 [Llama-3](https://github.com/meta-llama/llama3) is the latest top open-source LLM from Meta. It has been released with a license that authorizes commercial use. You can deploy a private Llama-3 chatbot with SkyPilot in your own cloud with just one simple command.
@@ -248,7 +248,7 @@ To use the Gradio UI, open the URL shown in the logs:
 
 
 <p align="center">
-<img src="https://imgur.com/zPpY2Bg.gif" alt="Gradio UI serving Llama-3" style="width: 80%;">
+<img src="https://i.imgur.com/zPpY2Bg.gif" alt="Gradio UI serving Llama-3" style="width: 80%;">
 </p>
 
 To stop the instance:
diff --git a/llm/llama-3_1-finetuning/readme.md b/llm/llama-3_1-finetuning/readme.md
index 836f3bf1b3b..935dccde84e 100644
--- a/llm/llama-3_1-finetuning/readme.md
+++ b/llm/llama-3_1-finetuning/readme.md
@@ -135,7 +135,7 @@ sky launch -c llama31 lora.yaml \
 
 <figure>
 <center>
-<img src="https://imgur.com/B7Ib4Ii.png" width="60%" />
+<img src="https://i.imgur.com/B7Ib4Ii.png" width="60%" />
 
      
 <figcaption>Training Loss of LoRA finetuning Llama 3.1</figcaption>
@@ -218,10 +218,10 @@ run: |
 
 ## Appendix: Preparation
 1. Request the access to [Llama 3.1 weights on huggingface](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) (Click on the blue box and follow the steps):
-![](https://imgur.com/snIQhr9.png)
+![](https://i.imgur.com/snIQhr9.png)
 
 2. Get your [huggingface access token](https://huggingface.co/settings/tokens):
-![](https://imgur.com/3idBgHn.png)
+![](https://i.imgur.com/3idBgHn.png)
 
 
 3. Add huggingface token to your environment variable:
diff --git a/llm/lorax/README.md b/llm/lorax/README.md
index 2fe548c92a8..6cc44cf1134 100644
--- a/llm/lorax/README.md
+++ b/llm/lorax/README.md
@@ -4,7 +4,7 @@
 <!-- $UNCOMMENT# LoRAX: Multi-LoRA Inference Server -->
 
 <p align="center">
-    <img src="https://imgur.com/OUapRYC.png" alt="LoRAX" style="width:200px;" />
+    <img src="https://i.imgur.com/OUapRYC.png" alt="LoRAX" style="width:200px;" />
 </p>
 
 [LoRAX](https://github.com/predibase/lorax) (LoRA eXchange) is a framework that allows users to serve thousands of fine-tuned LLMs on a single GPU, dramatically reducing the cost of serving without compromising on throughput or latency. It works by dynamically loading multiple fine-tuned "adapters" (LoRAs, etc.) on top of a single base model at runtime. Concurrent requests for different adapters can be processed together in a single batch, allowing LoRAX to maintain near linear throughput scaling as the number of adapters increases.
diff --git a/llm/vicuna-llama-2/README.md b/llm/vicuna-llama-2/README.md
index 899792c299d..24caa525a56 100644
--- a/llm/vicuna-llama-2/README.md
+++ b/llm/vicuna-llama-2/README.md
@@ -1,6 +1,6 @@
 # Train Your Own Vicuna on Llama-2
 
-![Vicuna-Llama-2](https://imgur.com/McZWg6z.gif "Result model in action, trained using this guide. From the SkyPilot and Vicuna teams.")
+![Vicuna-Llama-2](https://i.imgur.com/McZWg6z.gif "Result model in action, trained using this guide. From the SkyPilot and Vicuna teams.")
 
 Meta released [Llama 2](https://ai.meta.com/llama/) two weeks ago and has made a big wave in the AI community. In our opinion, its biggest impact is that the model is now released under a [permissive license](https://github.com/facebookresearch/llama/blob/main/LICENSE) that **allows the model weights to be used commercially**[^1]. This differs from Llama 1 which cannot be used commercially.
 
@@ -106,7 +106,7 @@ sky launch --no-use-spot ...
 
 
 <p align="center">
-    <img src="https://imgur.com/yVIXfQo.gif" width="100%" alt="Optimizer"/>
+    <img src="https://i.imgur.com/yVIXfQo.gif" width="100%" alt="Optimizer"/>
 </p>
 
 **Optional**: Try out the training for the 13B model:
@@ -139,7 +139,7 @@ sky launch -c serve serve.yaml --env MODEL_CKPT=<your-model-checkpoint>/chatbot/
 ```
 In [serve.yaml](https://github.com/skypilot-org/skypilot/tree/master/llm/vicuna-llama-2/serve.yaml), we specified launching a Gradio server that serves the model checkpoint at `<your-model-checkpoint>/chatbot/7b`.
 
-![Vicuna-Llama-2](https://imgur.com/McZWg6z.gif "Serving the resulting model with Gradio.")
+![Vicuna-Llama-2](https://i.imgur.com/McZWg6z.gif "Serving the resulting model with Gradio.")
 
 
 > **Tip**: You can also switch to a cheaper accelerator, such as L4, to save costs, by adding `--gpus L4` to the above command.
diff --git a/llm/vllm/README.md b/llm/vllm/README.md
index e3a2befbecc..9fb3c0c1364 100644
--- a/llm/vllm/README.md
+++ b/llm/vllm/README.md
@@ -4,7 +4,7 @@
 <!-- $UNCOMMENT# vLLM: Easy, Fast, and Cheap LLM Inference -->
 
 <p align="center">
-    <img src="https://imgur.com/yxtzPEu.png" alt="vLLM"/>
+    <img src="https://i.imgur.com/yxtzPEu.png" alt="vLLM"/>
 </p>
 
 This README contains instructions to run a demo for vLLM, an open-source library for fast LLM inference and serving, which improves the throughput compared to HuggingFace by **up to 24x**.

From 2691d4b7a7e21dc6624cc43928442d85f2f289ac Mon Sep 17 00:00:00 2001
From: landscapepainter <34902420+landscapepainter@users.noreply.github.com>
Date: Mon, 19 Aug 2024 18:45:36 -0700
Subject: [PATCH 05/12] [Azure][Storage] Update default storage account naming
 with subscription id hash (#3796)

* add subscription id hash on default storage account name

* format

* add comment

* nit

* update default storage account name with hashed region value

* nit

* format

* nit

* comment update

* nit
---
 sky/data/storage.py | 53 ++++++++++++++++++++++++++++++++++++++++-----
 tests/test_smoke.py | 41 +++++++++++++++--------------------
 2 files changed, 64 insertions(+), 30 deletions(-)

diff --git a/sky/data/storage.py b/sky/data/storage.py
index f09d79ea48e..b915d1c6d54 100644
--- a/sky/data/storage.py
+++ b/sky/data/storage.py
@@ -1,5 +1,6 @@
 """Storage and Store Classes for Sky Data."""
 import enum
+import hashlib
 import os
 import re
 import shlex
@@ -1942,8 +1943,15 @@ class AzureBlobStore(AbstractStore):
     """Represents the backend for Azure Blob Storage Container."""
 
     _ACCESS_DENIED_MESSAGE = 'Access Denied'
-    DEFAULT_STORAGE_ACCOUNT_NAME = 'sky{region}{user_hash}'
     DEFAULT_RESOURCE_GROUP_NAME = 'sky{user_hash}'
+    # Unlike resource group names, which only need to be unique within the
+    # subscription, storage account names must be globally unique across all of
+    # Azure users. Hence, the storage account name includes the subscription
+    # hash as well to ensure its uniqueness.
+    DEFAULT_STORAGE_ACCOUNT_NAME = (
+        'sky{region_hash}{user_hash}{subscription_hash}')
+    _SUBSCRIPTION_HASH_LENGTH = 4
+    _REGION_HASH_LENGTH = 4
 
     class AzureBlobStoreMetadata(AbstractStore.StoreMetadata):
         """A pickle-able representation of Azure Blob Store.
@@ -1977,7 +1985,7 @@ def __init__(self,
                  name: str,
                  source: str,
                  storage_account_name: str = '',
-                 region: Optional[str] = None,
+                 region: Optional[str] = 'eastus',
                  is_sky_managed: Optional[bool] = None,
                  sync_on_reconstruction: bool = True):
         self.storage_client: 'storage.Client'
@@ -2156,6 +2164,41 @@ def initialize(self):
             # If is_sky_managed is specified, then we take no action.
             self.is_sky_managed = is_new_bucket
 
+    @staticmethod
+    def get_default_storage_account_name(region: Optional[str]) -> str:
+        """Generates a unique default storage account name.
+
+        The subscription ID is included to avoid conflicts when user switches
+        subscriptions. The length of region_hash, user_hash, and
+        subscription_hash are adjusted to ensure the storage account name
+        adheres to the 24-character limit, as some region names can be very
+        long. Using a 4-character hash for the region helps keep the name
+        concise and prevents potential conflicts.
+        Reference: https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules#microsoftstorage # pylint: disable=line-too-long
+
+        Args:
+            region: Name of the region to create the storage account/container.
+
+        Returns:
+            Name of the default storage account.
+        """
+        assert region is not None
+        subscription_id = azure.get_subscription_id()
+        subscription_hash_obj = hashlib.md5(subscription_id.encode('utf-8'))
+        subscription_hash = subscription_hash_obj.hexdigest(
+        )[:AzureBlobStore._SUBSCRIPTION_HASH_LENGTH]
+        region_hash_obj = hashlib.md5(region.encode('utf-8'))
+        region_hash = region_hash_obj.hexdigest()[:AzureBlobStore.
+                                                  _REGION_HASH_LENGTH]
+
+        storage_account_name = (
+            AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
+                region_hash=region_hash,
+                user_hash=common_utils.get_user_hash(),
+                subscription_hash=subscription_hash))
+
+        return storage_account_name
+
     def _get_storage_account_and_resource_group(
             self) -> Tuple[str, Optional[str]]:
         """Get storage account and resource group to be used for AzureBlobStore
@@ -2239,10 +2282,8 @@ def _get_storage_account_and_resource_group(
             else:
                 # If storage account name is not provided from config, then
                 # use default resource group and storage account names.
-                storage_account_name = (
-                    self.DEFAULT_STORAGE_ACCOUNT_NAME.format(
-                        region=self.region,
-                        user_hash=common_utils.get_user_hash()))
+                storage_account_name = self.get_default_storage_account_name(
+                    self.region)
                 resource_group_name = (self.DEFAULT_RESOURCE_GROUP_NAME.format(
                     user_hash=common_utils.get_user_hash()))
                 try:
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index 874e51e0f9c..28eeeed5190 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -48,6 +48,7 @@
 from sky import jobs
 from sky import serve
 from sky import skypilot_config
+from sky.adaptors import azure
 from sky.adaptors import cloudflare
 from sky.adaptors import ibm
 from sky.clouds import AWS
@@ -1103,9 +1104,8 @@ def test_azure_storage_mounts_with_stop():
     cloud = 'azure'
     storage_name = f'sky-test-{int(time.time())}'
     default_region = 'eastus'
-    storage_account_name = (
-        storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
-            region=default_region, user_hash=common_utils.get_user_hash()))
+    storage_account_name = (storage_lib.AzureBlobStore.
+                            get_default_storage_account_name(default_region))
     storage_account_key = data_utils.get_az_storage_account_key(
         storage_account_name)
     template_str = pathlib.Path(
@@ -2990,8 +2990,7 @@ def test_managed_jobs_storage(generic_cloud: str):
         region = 'westus2'
         region_flag = f' --region {region}'
         storage_account_name = (
-            storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
-                region=region, user_hash=common_utils.get_user_hash()))
+            storage_lib.AzureBlobStore.get_default_storage_account_name(region))
         region_cmd = TestStorageWithCredentials.cli_region_cmd(
             storage_lib.StoreType.AZURE,
             storage_account_name=storage_account_name)
@@ -4287,9 +4286,8 @@ def cli_delete_cmd(store_type,
         if store_type == storage_lib.StoreType.AZURE:
             default_region = 'eastus'
             storage_account_name = (
-                storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
-                    region=default_region,
-                    user_hash=common_utils.get_user_hash()))
+                storage_lib.AzureBlobStore.get_default_storage_account_name(
+                    default_region))
             storage_account_key = data_utils.get_az_storage_account_key(
                 storage_account_name)
             return ('az storage container delete '
@@ -4324,11 +4322,9 @@ def cli_ls_cmd(store_type, bucket_name, suffix=''):
             config_storage_account = skypilot_config.get_nested(
                 ('azure', 'storage_account'), None)
             storage_account_name = config_storage_account if (
-                config_storage_account is not None
-            ) else (
-                storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
-                    region=default_region,
-                    user_hash=common_utils.get_user_hash()))
+                config_storage_account is not None) else (
+                    storage_lib.AzureBlobStore.get_default_storage_account_name(
+                        default_region))
             storage_account_key = data_utils.get_az_storage_account_key(
                 storage_account_name)
             list_cmd = ('az storage blob list '
@@ -4390,9 +4386,8 @@ def cli_count_name_in_bucket(store_type,
             if storage_account_name is None:
                 default_region = 'eastus'
                 storage_account_name = (
-                    storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.
-                    format(region=default_region,
-                           user_hash=common_utils.get_user_hash()))
+                    storage_lib.AzureBlobStore.get_default_storage_account_name(
+                        default_region))
             storage_account_key = data_utils.get_az_storage_account_key(
                 storage_account_name)
             return ('az storage blob list '
@@ -4418,9 +4413,8 @@ def cli_count_file_in_bucket(store_type, bucket_name):
         elif store_type == storage_lib.StoreType.AZURE:
             default_region = 'eastus'
             storage_account_name = (
-                storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
-                    region=default_region,
-                    user_hash=common_utils.get_user_hash()))
+                storage_lib.AzureBlobStore.get_default_storage_account_name(
+                    default_region))
             storage_account_key = data_utils.get_az_storage_account_key(
                 storage_account_name)
             return ('az storage blob list '
@@ -4622,8 +4616,8 @@ def tmp_az_bucket(self, tmp_bucket_name):
         # Creates a temporary bucket using gsutil
         default_region = 'eastus'
         storage_account_name = (
-            storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
-                region=default_region, user_hash=common_utils.get_user_hash()))
+            storage_lib.AzureBlobStore.get_default_storage_account_name(
+                default_region))
         storage_account_key = data_utils.get_az_storage_account_key(
             storage_account_name)
         bucket_uri = data_utils.AZURE_CONTAINER_URL.format(
@@ -4859,9 +4853,8 @@ def test_nonexistent_bucket(self, nonexist_bucket_url):
             elif nonexist_bucket_url.startswith('https'):
                 default_region = 'eastus'
                 storage_account_name = (
-                    storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.
-                    format(region=default_region,
-                           user_hash=common_utils.get_user_hash()))
+                    storage_lib.AzureBlobStore.get_default_storage_account_name(
+                        default_region))
                 storage_account_key = data_utils.get_az_storage_account_key(
                     storage_account_name)
                 command = f'az storage container exists --account-name {storage_account_name} --account-key {storage_account_key} --name {nonexist_bucket_name}'

From 87e55ab961f0d10622006618fc6f65d3ce2d5beb Mon Sep 17 00:00:00 2001
From: Seth Kimmel <seth.kimmel3@gmail.com>
Date: Tue, 20 Aug 2024 12:02:27 -0700
Subject: [PATCH 06/12] Update aws.rst (#3849)

* Update aws.rst

* Update docs/source/cloud-setup/cloud-permissions/aws.rst

Co-authored-by: Zhanghao Wu <zhanghao.wu@outlook.com>

---------

Co-authored-by: Zhanghao Wu <zhanghao.wu@outlook.com>
---
 docs/source/cloud-setup/cloud-permissions/aws.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/cloud-setup/cloud-permissions/aws.rst b/docs/source/cloud-setup/cloud-permissions/aws.rst
index e34499df3b4..89510331988 100644
--- a/docs/source/cloud-setup/cloud-permissions/aws.rst
+++ b/docs/source/cloud-setup/cloud-permissions/aws.rst
@@ -148,7 +148,7 @@ AWS accounts can be attached with a policy that limits the permissions of the ac
     :align: center
     :alt: AWS Add Policy
 
-8. **Optional**: If you would like to have your users access S3 buckets: You can additionally attach S3 access, such as the "AmazonS3FullAccess" policy.
+8. **Optional**: If you would like to have your users access S3 buckets: You can additionally attach S3 access, such as the "AmazonS3FullAccess" policy. Note that enabling S3 access is required to use :ref:`managed-jobs` with `workdir` or `file_mounts` for now.
 
 .. image:: ../../images/screenshots/aws/aws-s3-policy.png
     :width: 80%

From fa60f7ab533190099662c49731d1c0af13abd6b9 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Tue, 20 Aug 2024 17:01:28 -0700
Subject: [PATCH 07/12] [Docker] Add `--gpus all` for docker when GPU is
 available (#3833)

* Add --gpus all for when GPU is available

* Get rid of --gpus all
---
 sky/provision/docker_utils.py       | 2 +-
 sky/templates/aws-ray.yml.j2        | 3 ---
 sky/templates/azure-ray.yml.j2      | 3 ---
 sky/templates/gcp-ray.yml.j2        | 3 ---
 sky/templates/paperspace-ray.yml.j2 | 3 ---
 5 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/sky/provision/docker_utils.py b/sky/provision/docker_utils.py
index aa29a3666a3..e989fbc085a 100644
--- a/sky/provision/docker_utils.py
+++ b/sky/provision/docker_utils.py
@@ -381,7 +381,7 @@ def _configure_runtime(self, run_options: List[str]) -> List[str]:
         if 'nvidia-container-runtime' in runtime_output:
             try:
                 self._run('nvidia-smi', log_err_when_fail=False)
-                return run_options + ['--runtime=nvidia']
+                return run_options + ['--runtime=nvidia', '--gpus all']
             except Exception as e:  # pylint: disable=broad-except
                 logger.debug(
                     'Nvidia Container Runtime is present in the docker image'
diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2
index 0a6b0bcc08c..4cfecfe2b12 100644
--- a/sky/templates/aws-ray.yml.j2
+++ b/sky/templates/aws-ray.yml.j2
@@ -11,9 +11,6 @@ docker:
   container_name: {{docker_container_name}}
   run_options:
     - --ulimit nofile=1048576:1048576
-    {%- if custom_resources is not none %}
-      --gpus all
-    {%- endif %}
     {%- for run_option in docker_run_options %}
     - {{run_option}}
     {%- endfor %}
diff --git a/sky/templates/azure-ray.yml.j2 b/sky/templates/azure-ray.yml.j2
index 39672a976b8..65d500fc677 100644
--- a/sky/templates/azure-ray.yml.j2
+++ b/sky/templates/azure-ray.yml.j2
@@ -11,9 +11,6 @@ docker:
   container_name: {{docker_container_name}}
   run_options:
     - --ulimit nofile=1048576:1048576
-    {%- if custom_resources is not none %}
-      --gpus all
-    {%- endif %}
     {%- for run_option in docker_run_options %}
     - {{run_option}}
     {%- endfor %}
diff --git a/sky/templates/gcp-ray.yml.j2 b/sky/templates/gcp-ray.yml.j2
index d7e787953d9..bcc16bac531 100644
--- a/sky/templates/gcp-ray.yml.j2
+++ b/sky/templates/gcp-ray.yml.j2
@@ -12,9 +12,6 @@ docker:
   container_name: {{docker_container_name}}
   run_options:
     - --ulimit nofile=1048576:1048576
-    {%- if gpu is not none %}
-      --gpus all
-    {%- endif %}
     {%- for run_option in docker_run_options %}
     - {{run_option}}
     {%- endfor %}
diff --git a/sky/templates/paperspace-ray.yml.j2 b/sky/templates/paperspace-ray.yml.j2
index 400714978b9..8eea5ac4f8a 100644
--- a/sky/templates/paperspace-ray.yml.j2
+++ b/sky/templates/paperspace-ray.yml.j2
@@ -11,9 +11,6 @@ docker:
   container_name: {{docker_container_name}}
   run_options:
     - --ulimit nofile=1048576:1048576
-    {%- if custom_resources is not none %}
-      --gpus all
-    {%- endif %}
     {%- for run_option in docker_run_options %}
     - {{run_option}}
     {%- endfor %}

From 2ea613f89e7aee947117c9cb56e5d0ced3f0db94 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Tue, 20 Aug 2024 18:35:06 -0700
Subject: [PATCH 08/12] [Azure] Fix azure failover with
 `RoleAssigmentUpdateNotPermitted` (#3848)

* Fix azure failover

* Add unique id back
---
 sky/provision/azure/config.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/sky/provision/azure/config.py b/sky/provision/azure/config.py
index 146deaa6781..7b50c3d8c0f 100644
--- a/sky/provision/azure/config.py
+++ b/sky/provision/azure/config.py
@@ -3,6 +3,7 @@
 Creates the resource group and deploys the configuration template to Azure for
 a cluster to be launched.
 """
+import hashlib
 import json
 import logging
 from pathlib import Path
@@ -15,6 +16,7 @@
 
 logger = logging.getLogger(__name__)
 
+UNIQUE_ID_LEN = 4
 _DEPLOYMENT_NAME = 'skypilot-config'
 _LEGACY_DEPLOYMENT_NAME = 'ray-config'
 _RESOURCE_GROUP_WAIT_FOR_DELETION_TIMEOUT = 480  # 8 minutes
@@ -103,10 +105,12 @@ def bootstrap_instances(
 
     logger.info(f'Using cluster name: {cluster_name_on_cloud}')
 
+    hasher = hashlib.md5(provider_config['resource_group'].encode('utf-8'))
+    unique_id = hasher.hexdigest()[:UNIQUE_ID_LEN]
     subnet_mask = provider_config.get('subnet_mask')
     if subnet_mask is None:
         # choose a random subnet, skipping most common value of 0
-        random.seed(cluster_name_on_cloud)
+        random.seed(unique_id)
         subnet_mask = f'10.{random.randint(1, 254)}.0.0/16'
     logger.info(f'Using subnet mask: {subnet_mask}')
 
@@ -119,10 +123,10 @@ def bootstrap_instances(
                     'value': subnet_mask
                 },
                 'clusterId': {
-                    # We use the cluster name as the unique ID for the cluster,
-                    # as we have already appended the user hash to the cluster
-                    # name.
-                    'value': cluster_name_on_cloud
+                    # We use the cluster name + resource group hash as the
+                    # unique ID for the cluster, as we need to make sure that
+                    # the deployments have unique names during failover.
+                    'value': f'{cluster_name_on_cloud}-{unique_id}'
                 },
             },
         }

From 6101a04198ad50cc04f68a8ca38ca98bfbd72954 Mon Sep 17 00:00:00 2001
From: mjibril <mjibril@users.noreply.github.com>
Date: Wed, 21 Aug 2024 18:43:14 +0100
Subject: [PATCH 09/12] [FluidStack][API] Update FluidStack to new API (#3799)

* [FluidStack][API] Update FluidStack to new API

    * Update FluidStack to new API

    * Simplify deployment by removing CUDA installation and using pre-built images

    Co-authored-by: Mubarak Jibril  <mubarak.jibril@gmail.com>

* fix format
---
 docs/source/getting-started/installation.rst  |   5 +-
 sky/clouds/fluidstack.py                      |  59 ++---
 .../data_fetchers/fetch_fluidstack.py         | 218 ++++++++++++++----
 sky/provision/fluidstack/fluidstack_utils.py  | 175 +++++---------
 sky/provision/fluidstack/instance.py          |  48 +---
 sky/templates/fluidstack-ray.yml.j2           |   1 -
 tests/test_smoke.py                           |  19 ++
 7 files changed, 281 insertions(+), 244 deletions(-)

diff --git a/docs/source/getting-started/installation.rst b/docs/source/getting-started/installation.rst
index be7ae1ff327..9f251a5aafe 100644
--- a/docs/source/getting-started/installation.rst
+++ b/docs/source/getting-started/installation.rst
@@ -301,13 +301,12 @@ RunPod
 Fluidstack
 ~~~~~~~~~~~~~~~~~~
 
-`Fluidstack <https://fluidstack.io/>`__ is a cloud provider offering low-cost GPUs. To configure Fluidstack access, go to the `Home <https://console.fluidstack.io/>`__ page on your Fluidstack console to generate an API key and then add the :code:`API key` to :code:`~/.fluidstack/api_key` and the :code:`API token` to :code:`~/.fluidstack/api_token`:
-
+`Fluidstack <https://fluidstack.io/>`__ is a cloud provider offering low-cost GPUs. To configure Fluidstack access, go to the `Home <https://dashboard.fluidstack.io/>`__ page on your Fluidstack console to generate an API key and then add the :code:`API key` to :code:`~/.fluidstack/api_key` :
 .. code-block:: shell
 
   mkdir -p ~/.fluidstack
   echo "your_api_key_here" > ~/.fluidstack/api_key
-  echo "your_api_token_here" > ~/.fluidstack/api_token
+
 
 
 Cudo Compute
diff --git a/sky/clouds/fluidstack.py b/sky/clouds/fluidstack.py
index d292ace02f8..ef397d4c55e 100644
--- a/sky/clouds/fluidstack.py
+++ b/sky/clouds/fluidstack.py
@@ -15,8 +15,7 @@
 
 _CREDENTIAL_FILES = [
     # credential files for FluidStack,
-    fluidstack_utils.FLUIDSTACK_API_KEY_PATH,
-    fluidstack_utils.FLUIDSTACK_API_TOKEN_PATH,
+    fluidstack_utils.FLUIDSTACK_API_KEY_PATH
 ]
 if typing.TYPE_CHECKING:
     # Renaming to avoid shadowing variables.
@@ -189,20 +188,12 @@ def make_deploy_resources_variables(
             custom_resources = json.dumps(acc_dict, separators=(',', ':'))
         else:
             custom_resources = None
-        cuda_installation_commands = """
-        sudo wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb -O /usr/local/cuda-keyring_1.1-1_all.deb;
-        sudo dpkg -i /usr/local/cuda-keyring_1.1-1_all.deb;
-        sudo apt-get update;
-        sudo apt-get -y install cuda-toolkit-12-3;
-        sudo apt-get install -y cuda-drivers;
-        sudo apt-get install -y python3-pip;
-        nvidia-smi || sudo reboot;"""
+
         return {
             'instance_type': resources.instance_type,
             'custom_resources': custom_resources,
             'region': region.name,
-            'fluidstack_username': self.default_username(region.name),
-            'cuda_installation_commands': cuda_installation_commands,
+            'fluidstack_username': 'ubuntu',
         }
 
     def _get_feasible_launchable_resources(
@@ -270,17 +261,26 @@ def check_credentials(cls) -> Tuple[bool, Optional[str]]:
         try:
             assert os.path.exists(
                 os.path.expanduser(fluidstack_utils.FLUIDSTACK_API_KEY_PATH))
-            assert os.path.exists(
-                os.path.expanduser(fluidstack_utils.FLUIDSTACK_API_TOKEN_PATH))
+
+            with open(os.path.expanduser(
+                    fluidstack_utils.FLUIDSTACK_API_KEY_PATH),
+                      encoding='UTF-8') as f:
+                api_key = f.read().strip()
+                if not api_key.startswith('api_key'):
+                    return False, ('Invalid FluidStack API key format. '
+                                   'To configure credentials, go to:\n    '
+                                   '  https://dashboard.fluidstack.io \n    '
+                                   'to obtain an API key, '
+                                   'then add save the contents '
+                                   'to ~/.fluidstack/api_key \n')
         except AssertionError:
-            return False, (
-                'Failed to access FluidStack Cloud'
-                ' with credentials. '
-                'To configure credentials, go to:\n    '
-                '  https://console.fluidstack.io \n    '
-                'to obtain an API key and API Token, '
-                'then add save the contents '
-                'to ~/.fluidstack/api_key and ~/.fluidstack/api_token \n')
+            return False, ('Failed to access FluidStack Cloud'
+                           ' with credentials. '
+                           'To configure credentials, go to:\n    '
+                           '  https://dashboard.fluidstack.io \n    '
+                           'to obtain an API key, '
+                           'then add save the contents '
+                           'to ~/.fluidstack/api_key \n')
         except requests.exceptions.ConnectionError:
             return False, ('Failed to verify FluidStack Cloud credentials. '
                            'Check your network connection '
@@ -303,21 +303,6 @@ def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
                                                     zone,
                                                     clouds='fluidstack')
 
-    @classmethod
-    def default_username(cls, region: str) -> str:
-        return {
-            'norway_2_eu': 'ubuntu',
-            'calgary_1_canada': 'ubuntu',
-            'norway_3_eu': 'ubuntu',
-            'norway_4_eu': 'ubuntu',
-            'india_2': 'root',
-            'nevada_1_usa': 'fsuser',
-            'generic_1_canada': 'ubuntu',
-            'iceland_1_eu': 'ubuntu',
-            'new_york_1_usa': 'fsuser',
-            'illinois_1_usa': 'fsuser'
-        }.get(region, 'ubuntu')
-
     @classmethod
     def query_status(
         cls,
diff --git a/sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py b/sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py
index 5d50399ab89..cf943541e08 100644
--- a/sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py
+++ b/sky/clouds/service_catalog/data_fetchers/fetch_fluidstack.py
@@ -11,10 +11,140 @@
 
 import requests
 
-ENDPOINT = 'https://api.fluidstack.io/v1/plans'
+ENDPOINT = 'https://platform.fluidstack.io/list_available_configurations'
 DEFAULT_FLUIDSTACK_API_KEY_PATH = os.path.expanduser('~/.fluidstack/api_key')
-DEFAULT_FLUIDSTACK_API_TOKEN_PATH = os.path.expanduser(
-    '~/.fluidstack/api_token')
+
+plan_vcpus_memory = [{
+    'gpu_type': 'RTX_A6000_48GB',
+    'gpu_count': 2,
+    'min_cpu_count': 12,
+    'min_memory': 110.0
+}, {
+    'gpu_type': 'RTX_A6000_48GB',
+    'gpu_count': 4,
+    'min_cpu_count': 24,
+    'min_memory': 220.0
+}, {
+    'gpu_type': 'A100_NVLINK_80GB',
+    'gpu_count': 8,
+    'min_cpu_count': 252,
+    'min_memory': 960.0
+}, {
+    'gpu_type': 'H100_PCIE_80GB',
+    'gpu_count': 8,
+    'min_cpu_count': 252,
+    'min_memory': 1440.0
+}, {
+    'gpu_type': 'RTX_A4000_16GB',
+    'gpu_count': 2,
+    'min_cpu_count': 12,
+    'min_memory': 48.0
+}, {
+    'gpu_type': 'H100_PCIE_80GB',
+    'gpu_count': 2,
+    'min_cpu_count': 60,
+    'min_memory': 360.0
+}, {
+    'gpu_type': 'RTX_A6000_48GB',
+    'gpu_count': 8,
+    'min_cpu_count': 252,
+    'min_memory': 464.0
+}, {
+    'gpu_type': 'H100_NVLINK_80GB',
+    'gpu_count': 8,
+    'min_cpu_count': 252,
+    'min_memory': 1440.0
+}, {
+    'gpu_type': 'H100_PCIE_80GB',
+    'gpu_count': 1,
+    'min_cpu_count': 28,
+    'min_memory': 180.0
+}, {
+    'gpu_type': 'RTX_A5000_24GB',
+    'gpu_count': 1,
+    'min_cpu_count': 8,
+    'min_memory': 30.0
+}, {
+    'gpu_type': 'RTX_A5000_24GB',
+    'gpu_count': 2,
+    'min_cpu_count': 16,
+    'min_memory': 60.0
+}, {
+    'gpu_type': 'L40_48GB',
+    'gpu_count': 2,
+    'min_cpu_count': 64,
+    'min_memory': 120.0
+}, {
+    'gpu_type': 'RTX_A4000_16GB',
+    'gpu_count': 8,
+    'min_cpu_count': 48,
+    'min_memory': 192.0
+}, {
+    'gpu_type': 'RTX_A4000_16GB',
+    'gpu_count': 1,
+    'min_cpu_count': 6,
+    'min_memory': 24.0
+}, {
+    'gpu_type': 'RTX_A4000_16GB',
+    'gpu_count': 4,
+    'min_cpu_count': 24,
+    'min_memory': 96.0
+}, {
+    'gpu_type': 'A100_PCIE_80GB',
+    'gpu_count': 4,
+    'min_cpu_count': 124,
+    'min_memory': 480.0
+}, {
+    'gpu_type': 'H100_PCIE_80GB',
+    'gpu_count': 4,
+    'min_cpu_count': 124,
+    'min_memory': 720.0
+}, {
+    'gpu_type': 'L40_48GB',
+    'gpu_count': 8,
+    'min_cpu_count': 252,
+    'min_memory': 480.0
+}, {
+    'gpu_type': 'RTX_A5000_24GB',
+    'gpu_count': 8,
+    'min_cpu_count': 64,
+    'min_memory': 240.0
+}, {
+    'gpu_type': 'L40_48GB',
+    'gpu_count': 1,
+    'min_cpu_count': 32,
+    'min_memory': 60.0
+}, {
+    'gpu_type': 'RTX_A6000_48GB',
+    'gpu_count': 1,
+    'min_cpu_count': 6,
+    'min_memory': 55.0
+}, {
+    'gpu_type': 'L40_48GB',
+    'gpu_count': 4,
+    'min_cpu_count': 126,
+    'min_memory': 240.0
+}, {
+    'gpu_type': 'A100_PCIE_80GB',
+    'gpu_count': 1,
+    'min_cpu_count': 28,
+    'min_memory': 120.0
+}, {
+    'gpu_type': 'A100_PCIE_80GB',
+    'gpu_count': 8,
+    'min_cpu_count': 252,
+    'min_memory': 1440.0
+}, {
+    'gpu_type': 'A100_PCIE_80GB',
+    'gpu_count': 2,
+    'min_cpu_count': 60,
+    'min_memory': 240.0
+}, {
+    'gpu_type': 'RTX_A5000_24GB',
+    'gpu_count': 4,
+    'min_cpu_count': 32,
+    'min_memory': 120.0
+}]
 
 GPU_MAP = {
     'H100_PCIE_80GB': 'H100',
@@ -47,19 +177,15 @@ def get_regions(plans: List) -> dict:
     regions = {}
     for plan in plans:
         for region in plan.get('regions', []):
-            regions[region['id']] = region['id']
+            regions[region] = region
     return regions
 
 
 def create_catalog(output_dir: str) -> None:
-    response = requests.get(ENDPOINT)
+    with open(DEFAULT_FLUIDSTACK_API_KEY_PATH, 'r', encoding='UTF-8') as f:
+        api_key = f.read().strip()
+    response = requests.get(ENDPOINT, headers={'api-key': api_key})
     plans = response.json()
-    #plans = [plan for plan in plans if len(plan['regions']) > 0]
-    plans = [
-        plan for plan in plans if plan['minimum_commitment'] == 'hourly' and
-        plan['type'] in ['preconfigured'] and
-        plan['gpu_type'] not in ['NO GPU', 'RTX_3080_10GB', 'RTX_3090_24GB']
-    ]
 
     with open(os.path.join(output_dir, 'vms.csv'), mode='w',
               encoding='utf-8') as f:
@@ -81,39 +207,45 @@ def create_catalog(output_dir: str) -> None:
             except KeyError:
                 #print(f'Could not map {plan["gpu_type"]}')
                 continue
-            gpu_memory = int(
-                str(plan['configuration']['gpu_memory']).replace('GB',
-                                                                 '')) * 1024
-            gpu_cnt = int(plan['configuration']['gpu_count'])
-            vcpus = float(plan['configuration']['core_count'])
-            mem = float(plan['configuration']['ram'])
-            price = float(plan['price']['hourly']) * gpu_cnt
-            gpuinfo = {
-                'Gpus': [{
-                    'Name': gpu,
-                    'Manufacturer': 'NVIDIA',
-                    'Count': gpu_cnt,
-                    'MemoryInfo': {
-                        'SizeInMiB': int(gpu_memory)
-                    },
-                }],
-                'TotalGpuMemoryInMiB': int(gpu_memory * gpu_cnt),
-            }
-            gpuinfo = json.dumps(gpuinfo).replace('"', "'")  # pylint: disable=invalid-string-quote
-            for r in plan.get('regions', []):
-                if r['id'] == 'india_2':
+            for gpu_cnt in plan['gpu_counts']:
+                gpu_memory = float(plan['gpu_type'].split('_')[-1].replace(
+                    'GB', '')) * 1024
+                try:
+                    vcpus_mem = [
+                        x for x in plan_vcpus_memory
+                        if x['gpu_type'] == plan['gpu_type'] and
+                        x['gpu_count'] == gpu_cnt
+                    ][0]
+                    vcpus = vcpus_mem['min_cpu_count']
+                    mem = vcpus_mem['min_memory']
+                except IndexError:
                     continue
-                writer.writerow([
-                    plan['plan_id'],
-                    gpu,
-                    gpu_cnt,
-                    vcpus,
-                    mem,
-                    price,
-                    r['id'],
-                    gpuinfo,
-                    '',
-                ])
+                price = float(plan['price_per_gpu_hr']) * gpu_cnt
+                gpuinfo = {
+                    'Gpus': [{
+                        'Name': gpu,
+                        'Manufacturer': 'NVIDIA',
+                        'Count': gpu_cnt,
+                        'MemoryInfo': {
+                            'SizeInMiB': int(gpu_memory)
+                        },
+                    }],
+                    'TotalGpuMemoryInMiB': int(gpu_memory * gpu_cnt),
+                }
+                gpuinfo = json.dumps(gpuinfo).replace('"', "'")  # pylint: disable=invalid-string-quote
+                instance_type = f'{plan["gpu_type"]}::{gpu_cnt}'
+                for region in plan.get('regions', []):
+                    writer.writerow([
+                        instance_type,
+                        gpu,
+                        gpu_cnt,
+                        vcpus,
+                        mem,
+                        price,
+                        region,
+                        gpuinfo,
+                        '',
+                    ])
 
 
 if __name__ == '__main__':
diff --git a/sky/provision/fluidstack/fluidstack_utils.py b/sky/provision/fluidstack/fluidstack_utils.py
index ebc616c0bfc..a9efb865a3c 100644
--- a/sky/provision/fluidstack/fluidstack_utils.py
+++ b/sky/provision/fluidstack/fluidstack_utils.py
@@ -3,7 +3,8 @@
 import functools
 import json
 import os
-from typing import Any, Dict, List, Optional
+import time
+from typing import Any, Dict, List
 import uuid
 
 import requests
@@ -13,9 +14,8 @@ def get_key_suffix():
     return str(uuid.uuid4()).replace('-', '')[:8]
 
 
-ENDPOINT = 'https://api.fluidstack.io/v1/'
+ENDPOINT = 'https://platform.fluidstack.io/'
 FLUIDSTACK_API_KEY_PATH = '~/.fluidstack/api_key'
-FLUIDSTACK_API_TOKEN_PATH = '~/.fluidstack/api_token'
 
 
 def read_contents(path: str) -> str:
@@ -46,109 +46,76 @@ def raise_fluidstack_error(response: requests.Response) -> None:
     raise FluidstackAPIError(f'{message}', status_code)
 
 
-@functools.lru_cache()
-def with_nvidia_drivers(region: str):
-    if region in ['norway_4_eu', 'generic_1_canada']:
-        return False
-    client = FluidstackClient()
-    plans = client.get_plans()
-    for plan in plans:
-        if region in [r['id'] for r in plan['regions']]:
-            if 'Ubuntu 20.04 LTS (Nvidia)' in plan['os_options']:
-                return True
-    return False
-
-
 class FluidstackClient:
     """FluidStack API Client"""
 
     def __init__(self):
         self.api_key = read_contents(
-            os.path.expanduser(FLUIDSTACK_API_KEY_PATH))
-        self.api_token = read_contents(
-            os.path.expanduser(FLUIDSTACK_API_TOKEN_PATH))
+            os.path.expanduser(FLUIDSTACK_API_KEY_PATH)).strip()
 
     def get_plans(self):
-        response = requests.get(ENDPOINT + 'plans')
+        response = requests.get(ENDPOINT + 'list_available_configurations',
+                                headers={'api-key': self.api_key})
         raise_fluidstack_error(response)
         plans = response.json()
-        plans = [
-            plan for plan in plans
-            if plan['minimum_commitment'] == 'hourly' and plan['type'] in
-            ['preconfigured', 'custom'] and plan['gpu_type'] != 'NO GPU'
-        ]
         return plans
 
-    def list_instances(
-            self,
-            tag_filters: Optional[Dict[str,
-                                       str]] = None) -> List[Dict[str, Any]]:
+    def list_instances(self) -> List[Dict[str, Any]]:
         response = requests.get(
-            ENDPOINT + 'servers',
-            auth=(self.api_key, self.api_token),
+            ENDPOINT + 'instances',
+            headers={'api-key': self.api_key},
         )
         raise_fluidstack_error(response)
         instances = response.json()
-        filtered_instances = []
-
-        for instance in instances:
-            if isinstance(instance['tags'], str):
-                instance['tags'] = json.loads(instance['tags'])
-            if not instance['tags']:
-                instance['tags'] = {}
-            if tag_filters:
-                for key in tag_filters:
-                    if instance['tags'].get(key, None) != tag_filters[key]:
-                        break
-                else:
-                    filtered_instances.append(instance)
-            else:
-                filtered_instances.append(instance)
-
-        return filtered_instances
+        return instances
 
     def create_instance(
         self,
         instance_type: str = '',
-        hostname: str = '',
+        name: str = '',
         region: str = '',
         ssh_pub_key: str = '',
         count: int = 1,
     ) -> List[str]:
         """Launch new instances."""
 
-        config: Dict[str, Any] = {}
         plans = self.get_plans()
         regions = self.list_regions()
+        gpu_type, gpu_count = instance_type.split('::')
+        gpu_count = int(gpu_count)
+
         plans = [
-            plan for plan in plans if plan['plan_id'] == instance_type and
-            region in [r['id'] for r in plan['regions']]
+            plan for plan in plans if plan['gpu_type'] == gpu_type and
+            gpu_count in plan['gpu_counts'] and region in plan['regions']
         ]
         if not plans:
             raise FluidstackAPIError(
                 f'Plan {instance_type} out of stock in region {region}')
 
         ssh_key = self.get_or_add_ssh_key(ssh_pub_key)
-        os_id = 'Ubuntu 20.04 LTS'
-        body = dict(plan=None if config else instance_type,
-                    region=regions[region],
-                    os=os_id,
-                    hostname=hostname,
-                    ssh_keys=[ssh_key['id']],
-                    multiplicity=count,
-                    config=config)
-
-        response = requests.post(ENDPOINT + 'server',
-                                 auth=(self.api_key, self.api_token),
-                                 json=body)
-        raise_fluidstack_error(response)
-        instance_ids = response.json().get('multiple')
-        assert all(id is not None for id in instance_ids), instance_ids
+        default_operating_system = 'ubuntu_22_04_lts_nvidia'
+        instance_ids = []
+        for _ in range(count):
+            body = dict(gpu_type=gpu_type,
+                        gpu_count=gpu_count,
+                        region=regions[region],
+                        operating_system_label=default_operating_system,
+                        name=name,
+                        ssh_key=ssh_key['name'])
+
+            response = requests.post(ENDPOINT + 'instances',
+                                     headers={'api-key': self.api_key},
+                                     json=body)
+            raise_fluidstack_error(response)
+            instance_id = response.json().get('id')
+            instance_ids.append(instance_id)
+            time.sleep(1)
+
         return instance_ids
 
     def list_ssh_keys(self):
-        response = requests.get(ENDPOINT + 'ssh',
-                                auth=(self.api_key, self.api_token))
+        response = requests.get(ENDPOINT + 'ssh_keys',
+                                headers={'api-key': self.api_key})
         raise_fluidstack_error(response)
         return response.json()
 
@@ -156,86 +123,50 @@ def get_or_add_ssh_key(self, ssh_pub_key: str = '') -> Dict[str, str]:
         """Add ssh key if not already added."""
         ssh_keys = self.list_ssh_keys()
         for key in ssh_keys:
-            if key['public_key'].strip() == ssh_pub_key.strip():
-                return {
-                    'id': key['id'],
-                    'name': key['name'],
-                    'ssh_key': ssh_pub_key
-                }
+            if key['public_key'].strip().split()[:2] == ssh_pub_key.strip(
+            ).split()[:2]:
+                return {'name': key['name'], 'ssh_key': ssh_pub_key}
         ssh_key_name = 'skypilot-' + get_key_suffix()
         response = requests.post(
-            ENDPOINT + 'ssh',
-            auth=(self.api_key, self.api_token),
+            ENDPOINT + 'ssh_keys',
+            headers={'api-key': self.api_key},
             json=dict(name=ssh_key_name, public_key=ssh_pub_key),
         )
         raise_fluidstack_error(response)
-        key_id = response.json()['id']
-        return {'id': key_id, 'name': ssh_key_name, 'ssh_key': ssh_pub_key}
+        return {'name': ssh_key_name, 'ssh_key': ssh_pub_key}
 
     @functools.lru_cache()
     def list_regions(self):
-        response = requests.get(ENDPOINT + 'plans')
-        raise_fluidstack_error(response)
-        plans = response.json()
-        plans = [
-            plan for plan in plans
-            if plan['minimum_commitment'] == 'hourly' and plan['type'] in
-            ['preconfigured', 'custom'] and plan['gpu_type'] != 'NO GPU'
-        ]
+        plans = self.get_plans()
 
         def get_regions(plans: List) -> dict:
             """Return a list of regions where the plan is available."""
             regions = {}
             for plan in plans:
                 for region in plan.get('regions', []):
-                    regions[region['id']] = region['id']
+                    regions[region] = region
             return regions
 
         regions = get_regions(plans)
         return regions
 
     def delete(self, instance_id: str):
-        response = requests.delete(ENDPOINT + 'server/' + instance_id,
-                                   auth=(self.api_key, self.api_token))
+        response = requests.delete(ENDPOINT + 'instances/' + instance_id,
+                                   headers={'api-key': self.api_key})
         raise_fluidstack_error(response)
         return response.json()
 
     def stop(self, instance_id: str):
-        response = requests.put(ENDPOINT + 'server/' + instance_id + '/stop',
-                                auth=(self.api_key, self.api_token))
-        raise_fluidstack_error(response)
-        return response.json()
-
-    def restart(self, instance_id: str):
-        response = requests.post(ENDPOINT + 'server/' + instance_id + '/reboot',
-                                 auth=(self.api_key, self.api_token))
-        raise_fluidstack_error(response)
-        return response.json()
-
-    def info(self, instance_id: str):
-        response = requests.get(ENDPOINT + f'server/{instance_id}',
-                                auth=(self.api_key, self.api_token))
-        raise_fluidstack_error(response)
-        return response.json()
-
-    def status(self, instance_id: str):
-        response = self.info(instance_id)
-        return response['status']
-
-    def add_tags(self, instance_id: str, tags: Dict[str, str]) -> str:
-        response = requests.patch(
-            ENDPOINT + f'server/{instance_id}/tag',
-            auth=(self.api_key, self.api_token),
-            json=dict(tags=json.dumps(tags)),
-        )
+        response = requests.put(ENDPOINT + 'instances/' + instance_id + '/stop',
+                                headers={'api-key': self.api_key})
         raise_fluidstack_error(response)
         return response.json()
 
-    def rename(self, instance_id: str, hostname: str) -> str:
-        response = requests.patch(
-            ENDPOINT + f'server/{instance_id}/rename',
-            auth=(self.api_key, self.api_token),
-            json=dict(name=hostname),
+    def rename(self, instance_id: str, name: str) -> str:
+        response = requests.put(
+            ENDPOINT + f'instances/{instance_id}/rename',
+            headers={'api-key': self.api_key},
+            json=dict(new_instance_name=name),
         )
         raise_fluidstack_error(response)
         return response.json()
diff --git a/sky/provision/fluidstack/instance.py b/sky/provision/fluidstack/instance.py
index e870ff15e0c..538aafc8887 100644
--- a/sky/provision/fluidstack/instance.py
+++ b/sky/provision/fluidstack/instance.py
@@ -27,7 +27,7 @@ def get_internal_ip(node_info: Dict[str, Any]) -> None:
     node_info['internal_ip'] = node_info['ip_address']
     runner = command_runner.SSHCommandRunner(
         (node_info['ip_address'], 22),
-        ssh_user=node_info['capabilities']['default_user_name'],
+        ssh_user='ubuntu',
         ssh_private_key=auth.PRIVATE_SSH_KEY_PATH)
     result = runner.run(_GET_INTERNAL_IP_CMD,
                         require_outputs=True,
@@ -61,7 +61,7 @@ def _filter_instances(
         if (include_instances is not None and
                 instance['id'] not in include_instances):
             continue
-        if instance.get('hostname') in possible_names:
+        if instance.get('name') in possible_names:
             filtered_instances[instance['id']] = instance
     return filtered_instances
 
@@ -69,7 +69,7 @@ def _filter_instances(
 def _get_head_instance_id(instances: Dict[str, Any]) -> Optional[str]:
     head_instance_id = None
     for inst_id, inst in instances.items():
-        if inst['hostname'].endswith('-head'):
+        if inst['name'].endswith('-head'):
             head_instance_id = inst_id
             break
     return head_instance_id
@@ -80,16 +80,7 @@ def run_instances(region: str, cluster_name_on_cloud: str,
     """Runs instances for the given cluster."""
 
     pending_status = [
-        'create',
-        'requesting',
-        'provisioning',
-        'customizing',
-        'starting',
-        'stopping',
-        'start',
-        'stop',
-        'reboot',
-        'rebooting',
+        'pending',
     ]
     while True:
         instances = _filter_instances(cluster_name_on_cloud, pending_status)
@@ -127,7 +118,7 @@ def rename(instance_id: str, new_name: str) -> None:
                         f'{instance_name}')
             rename(instance_id, instance_name)
         if (instance_id != head_instance_id and
-                instance['hostname'].endswith('-head')):
+                instance['name'].endswith('-head')):
             # Multiple head instances exist.
             # This is a rare case when the instance name was manually modified
             # on the cloud or some unexpected behavior happened.
@@ -167,7 +158,7 @@ def rename(instance_id: str, new_name: str) -> None:
         node_type = 'head' if head_instance_id is None else 'worker'
         try:
             instance_ids = utils.FluidstackClient().create_instance(
-                hostname=f'{cluster_name_on_cloud}-{node_type}',
+                name=f'{cluster_name_on_cloud}-{node_type}',
                 instance_type=config.node_config['InstanceType'],
                 ssh_pub_key=config.node_config['AuthorizedKey'],
                 region=region)
@@ -184,9 +175,6 @@ def rename(instance_id: str, new_name: str) -> None:
         instances = _filter_instances(cluster_name_on_cloud,
                                       pending_status + ['running'])
         if len(instances) < config.count:
-            # Some of pending instances have been convert to a state that will
-            # not convert to `running` status. This can be due to resource
-            # availability issue.
             all_instances = _filter_instances(
                 cluster_name_on_cloud,
                 status_filters=None,
@@ -253,15 +241,11 @@ def terminate_instances(
     instances = _filter_instances(cluster_name_on_cloud, None)
     for inst_id, inst in instances.items():
         logger.debug(f'Terminating instance {inst_id}: {inst}')
-        if worker_only and inst['hostname'].endswith('-head'):
+        if worker_only and inst['name'].endswith('-head'):
             continue
         try:
             utils.FluidstackClient().delete(inst_id)
         except Exception as e:  # pylint: disable=broad-except
-            if (isinstance(e, utils.FluidstackAPIError) and
-                    'Machine is already terminated' in str(e)):
-                logger.debug(f'Instance {inst_id} is already terminated.')
-                continue
             with ux_utils.print_exception_no_traceback():
                 raise RuntimeError(
                     f'Failed to terminate instance {inst_id}: '
@@ -291,7 +275,7 @@ def get_cluster_info(
                 tags={},
             )
         ]
-        if instance_info['hostname'].endswith('-head'):
+        if instance_info['name'].endswith('-head'):
             head_instance_id = instance_id
 
     return common.ClusterInfo(instances=instances,
@@ -311,22 +295,10 @@ def query_instances(
     instances = _filter_instances(cluster_name_on_cloud, None)
     instances = _filter_instances(cluster_name_on_cloud, None)
     status_map = {
-        'provisioning': status_lib.ClusterStatus.INIT,
-        'requesting': status_lib.ClusterStatus.INIT,
-        'create': status_lib.ClusterStatus.INIT,
-        'customizing': status_lib.ClusterStatus.INIT,
-        'stopping': status_lib.ClusterStatus.STOPPED,
-        'stop': status_lib.ClusterStatus.STOPPED,
-        'start': status_lib.ClusterStatus.INIT,
-        'reboot': status_lib.ClusterStatus.STOPPED,
-        'rebooting': status_lib.ClusterStatus.STOPPED,
+        'pending': status_lib.ClusterStatus.INIT,
         'stopped': status_lib.ClusterStatus.STOPPED,
-        'starting': status_lib.ClusterStatus.INIT,
         'running': status_lib.ClusterStatus.UP,
-        'failed to create': status_lib.ClusterStatus.INIT,
-        'timeout error': status_lib.ClusterStatus.INIT,
-        'out of stock': status_lib.ClusterStatus.INIT,
-        'terminating': None,
+        'unhealthy': status_lib.ClusterStatus.INIT,
         'terminated': None,
     }
     statuses: Dict[str, Optional[status_lib.ClusterStatus]] = {}
diff --git a/sky/templates/fluidstack-ray.yml.j2 b/sky/templates/fluidstack-ray.yml.j2
index 309a5393828..3eb277ec6d9 100644
--- a/sky/templates/fluidstack-ray.yml.j2
+++ b/sky/templates/fluidstack-ray.yml.j2
@@ -65,7 +65,6 @@ setup_commands:
     sudo pkill -9 apt-get;
     sudo pkill -9 dpkg;
     sudo dpkg --configure -a;
-    {{ cuda_installation_commands }}
     mkdir -p ~/.ssh; touch ~/.ssh/config;
     {{ conda_installation_commands }}
     {{ ray_skypilot_installation_commands }}
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index 28eeeed5190..c9b4c6e0816 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -840,6 +840,7 @@ def test_image_no_conda():
     run_one_test(test)
 
 
+@pytest.mark.no_fluidstack  # FluidStack does not support stopping instances in SkyPilot implementation
 @pytest.mark.no_kubernetes  # Kubernetes does not support stopping instances
 def test_custom_default_conda_env(generic_cloud: str):
     name = _get_cluster_name()
@@ -1549,6 +1550,7 @@ def test_job_queue_multinode(generic_cloud: str):
     run_one_test(test)
 
 
+@pytest.mark.no_fluidstack  # No FluidStack VM has 8 CPUs
 @pytest.mark.no_lambda_cloud  # No Lambda Cloud VM has 8 CPUs
 def test_large_job_queue(generic_cloud: str):
     name = _get_cluster_name()
@@ -1592,6 +1594,7 @@ def test_large_job_queue(generic_cloud: str):
     run_one_test(test)
 
 
+@pytest.mark.no_fluidstack  # No FluidStack VM has 8 CPUs
 @pytest.mark.no_lambda_cloud  # No Lambda Cloud VM has 8 CPUs
 def test_fast_large_job_queue(generic_cloud: str):
     # This is to test the jobs can be scheduled quickly when there are many jobs in the queue.
@@ -1699,6 +1702,7 @@ def test_multi_echo(generic_cloud: str):
 
 
 # ---------- Task: 1 node training. ----------
+@pytest.mark.no_fluidstack  # Fluidstack does not have T4 gpus for now
 @pytest.mark.no_lambda_cloud  # Lambda Cloud does not have V100 gpus
 @pytest.mark.no_ibm  # IBM cloud currently doesn't provide public image with CUDA
 @pytest.mark.no_scp  # SCP does not have V100 (16GB) GPUs. Run test_scp_huggingface instead.
@@ -2325,6 +2329,7 @@ def test_cancel_azure():
     run_one_test(test)
 
 
+@pytest.mark.no_fluidstack  # Fluidstack does not support V100 gpus for now
 @pytest.mark.no_lambda_cloud  # Lambda Cloud does not have V100 gpus
 @pytest.mark.no_ibm  # IBM cloud currently doesn't provide public image with CUDA
 @pytest.mark.no_paperspace  # Paperspace has `gnome-shell` on nvidia-smi
@@ -3500,6 +3505,7 @@ def test_skyserve_kubernetes_http():
     run_one_test(test)
 
 
+@pytest.mark.no_fluidstack  # Fluidstack does not support T4 gpus for now
 @pytest.mark.serve
 def test_skyserve_llm(generic_cloud: str):
     """Test skyserve with real LLM usecase"""
@@ -3557,6 +3563,7 @@ def test_skyserve_spot_recovery():
     run_one_test(test)
 
 
+@pytest.mark.no_fluidstack  # Fluidstack does not support spot instances
 @pytest.mark.serve
 @pytest.mark.no_kubernetes
 def test_skyserve_base_ondemand_fallback(generic_cloud: str):
@@ -3621,6 +3628,8 @@ def test_skyserve_dynamic_ondemand_fallback():
     run_one_test(test)
 
 
+# TODO: fluidstack does not support `--cpus 2`, but the check for services in this test is based on CPUs
+@pytest.mark.no_fluidstack
 @pytest.mark.serve
 def test_skyserve_user_bug_restart(generic_cloud: str):
     """Tests that we restart the service after user bug."""
@@ -3805,6 +3814,8 @@ def test_skyserve_large_readiness_timeout(generic_cloud: str):
     run_one_test(test)
 
 
+# TODO: fluidstack does not support `--cpus 2`, but the check for services in this test is based on CPUs
+@pytest.mark.no_fluidstack
 @pytest.mark.serve
 def test_skyserve_update(generic_cloud: str):
     """Test skyserve with update"""
@@ -3833,6 +3844,10 @@ def test_skyserve_update(generic_cloud: str):
     run_one_test(test)
 
 
+# TODO: fluidstack does not support `--cpus 2`, but the check for services in this test is based on CPUs
+pytest.mark.no_fluidstack
+
+
 @pytest.mark.serve
 def test_skyserve_rolling_update(generic_cloud: str):
     """Test skyserve with rolling update"""
@@ -3869,6 +3884,7 @@ def test_skyserve_rolling_update(generic_cloud: str):
     run_one_test(test)
 
 
+@pytest.mark.no_fluidstack
 @pytest.mark.serve
 def test_skyserve_fast_update(generic_cloud: str):
     """Test skyserve with fast update (Increment version of old replicas)"""
@@ -3946,6 +3962,7 @@ def test_skyserve_update_autoscale(generic_cloud: str):
     run_one_test(test)
 
 
+@pytest.mark.no_fluidstack  # Spot instances are note supported by Fluidstack
 @pytest.mark.serve
 @pytest.mark.no_kubernetes  # Spot instances are not supported in Kubernetes
 @pytest.mark.parametrize('mode', ['rolling', 'blue_green'])
@@ -4009,6 +4026,8 @@ def test_skyserve_new_autoscaler_update(mode: str, generic_cloud: str):
     run_one_test(test)
 
 
+# TODO: fluidstack does not support `--cpus 2`, but the check for services in this test is based on CPUs
+@pytest.mark.no_fluidstack
 @pytest.mark.serve
 def test_skyserve_failures(generic_cloud: str):
     """Test replica failure statuses"""

From 6c563e0b7b315107714f907e50415a73fd8f6102 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Wed, 21 Aug 2024 12:13:42 -0700
Subject: [PATCH 10/12] [Test] minor fix for a typo in fluidstack test (#3855)

minor fix for a typo
---
 tests/test_smoke.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index c9b4c6e0816..4d2c26103a7 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -3845,9 +3845,7 @@ def test_skyserve_update(generic_cloud: str):
 
 
 # TODO: fluidstack does not support `--cpus 2`, but the check for services in this test is based on CPUs
-pytest.mark.no_fluidstack
-
-
+@pytest.mark.no_fluidstack
 @pytest.mark.serve
 def test_skyserve_rolling_update(generic_cloud: str):
     """Test skyserve with rolling update"""

From 40749e36e89c0b68420095083dacce7f3f56f774 Mon Sep 17 00:00:00 2001
From: Colin Campbell <colinjc@users.noreply.github.com>
Date: Thu, 22 Aug 2024 00:16:04 -0400
Subject: [PATCH 11/12] Allow smarter device manager to schedule on all nodes
 (#3857)

---
 .../kubernetes/manifests/smarter-device-manager-daemonset.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml b/sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml
index 664fd69a8c8..2f8abf00550 100644
--- a/sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml
+++ b/sky/provision/kubernetes/manifests/smarter-device-manager-daemonset.yaml
@@ -26,6 +26,9 @@ spec:
       hostname: smarter-device-management
       hostNetwork: true
       dnsPolicy: ClusterFirstWithHostNet
+      tolerations:
+      - effect: NoSchedule
+        operator: Exists
       containers:
       - name: smarter-device-manager
         image: us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/smarter-device-manager:v1.1.2

From 1cd244401bfd21702f5a97878f013c060f5d9115 Mon Sep 17 00:00:00 2001
From: Zhanghao Wu <zhanghao.wu@outlook.com>
Date: Thu, 22 Aug 2024 00:21:32 -0700
Subject: [PATCH 12/12] [AWS] Support targeted on-demand capacity reservations
 (#3852)

* wip

Allow prioritize reservations

format

Allow open capacity reservations

Add check reserved resources

format

Remove specific reservations

* parent fcf1f60e2903edf8af7cdddf98189dc0c79e34d1
author Zhanghao Wu <zhanghao.wu@outlook.com> 1724175607 +0000
committer Zhanghao Wu <zhanghao.wu@outlook.com> 1724210666 +0000

wip

Allow prioritize reservations

format

Add check reserved resources

format

* Support target capacity reservation provisioning

* Fix comments

* Add doc

* format
---
 docs/source/reference/config.rst | 36 ++++++++++++++-
 sky/clouds/aws.py                | 42 ++++++++++++++++-
 sky/clouds/cloud.py              |  5 ++
 sky/clouds/gcp.py                |  4 ++
 sky/clouds/utils/aws_utils.py    | 57 +++++++++++++++++++++++
 sky/optimizer.py                 | 35 ++++++++++++--
 sky/provision/aws/instance.py    | 79 ++++++++++++++++++++++++++++----
 sky/templates/aws-ray.yml.j2     |  6 +++
 sky/utils/schemas.py             |  9 ++++
 9 files changed, 257 insertions(+), 16 deletions(-)
 create mode 100644 sky/clouds/utils/aws_utils.py

diff --git a/docs/source/reference/config.rst b/docs/source/reference/config.rst
index fc5eddd6a47..2669aef0a1b 100644
--- a/docs/source/reference/config.rst
+++ b/docs/source/reference/config.rst
@@ -193,6 +193,35 @@ Available fields and semantics:
     # Default: false.
     disk_encrypted: false
 
+    # Reserved capacity (optional).
+    #
+    # Whether to prioritize capacity reservations (considered as 0 cost) in the
+    # optimizer.
+    #
+    # If you have capacity reservations in your AWS project:
+    # Setting this to true guarantees the optimizer will pick any matching
+    # reservation within all regions and AWS will auto consume your reservations
+    # with instance match criteria to "open", and setting to false means
+    # optimizer uses regular, non-zero pricing in optimization (if by chance any
+    # matching reservation exists, AWS will still consume the reservation).
+    #
+    # Note: this setting is default to false for performance reasons, as it can
+    # take half a minute to retrieve the reservations from AWS when set to true.
+    #
+    # Default: false.
+    prioritize_reservations: false
+    #
+    # The targeted capacity reservations (CapacityReservationId) to be
+    # considered when provisioning clusters on AWS. SkyPilot will automatically
+    # prioritize this reserved capacity (considered as zero cost) if the
+    # requested resources matches the reservation.
+    #
+    # Ref: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/capacity-reservations-launch.html
+    specific_reservations:
+      - cr-a1234567
+      - cr-b2345678
+
+
     # Identity to use for AWS instances (optional).
     #
     # LOCAL_CREDENTIALS: The user's local credential files will be uploaded to
@@ -307,13 +336,16 @@ Available fields and semantics:
     # Setting this to true guarantees the optimizer will pick any matching
     # reservation and GCP will auto consume your reservation, and setting to
     # false means optimizer uses regular, non-zero pricing in optimization (if
-    # by chance any matching reservation is selected, GCP still auto consumes
-    # the reservation).
+    # by chance any matching reservation exists, GCP still auto consumes the
+    # reservation).
     #
     # If you have "specifically targeted" reservations (set by the
     # `specific_reservations` field below): This field will automatically be set
     # to true.
     #
+    # Note: this setting is default to false for performance reasons, as it can
+    # take half a minute to retrieve the reservations from GCP when set to true.
+    #
     # Default: false.
     prioritize_reservations: false
     #
diff --git a/sky/clouds/aws.py b/sky/clouds/aws.py
index 021f243da70..3a05223574d 100644
--- a/sky/clouds/aws.py
+++ b/sky/clouds/aws.py
@@ -8,7 +8,7 @@
 import subprocess
 import time
 import typing
-from typing import Any, Dict, Iterator, List, Optional, Tuple
+from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
 
 from sky import clouds
 from sky import exceptions
@@ -17,6 +17,7 @@
 from sky import skypilot_config
 from sky.adaptors import aws
 from sky.clouds import service_catalog
+from sky.clouds.utils import aws_utils
 from sky.skylet import constants
 from sky.utils import common_utils
 from sky.utils import resources_utils
@@ -173,6 +174,10 @@ def regions_with_offering(cls, instance_type: str,
             regions = [r for r in regions if r.zones]
         return regions
 
+    @classmethod
+    def optimize_by_zone(cls) -> bool:
+        return aws_utils.use_reservations()
+
     @classmethod
     def zones_provision_loop(
         cls,
@@ -197,11 +202,13 @@ def zones_provision_loop(
                                             zone=None)
         for r in regions:
             assert r.zones is not None, r
-            if num_nodes > 1:
+            if num_nodes > 1 or aws_utils.use_reservations():
                 # When num_nodes > 1, we shouldn't pass a list of zones to the
                 # AWS NodeProvider to try, because it may then place the nodes of
                 # the same cluster in different zones. This is an artifact of the
                 # current AWS NodeProvider implementation.
+                # Also, when using reservations, they are zone-specific, so we
+                # should return one zone at a time.
                 for z in r.zones:
                     yield [z]
             else:
@@ -856,6 +863,37 @@ def check_quota_available(cls,
         # Quota found to be greater than zero, try provisioning
         return True
 
+    def get_reservations_available_resources(
+        self,
+        instance_type: str,
+        region: str,
+        zone: Optional[str],
+        specific_reservations: Set[str],
+    ) -> Dict[str, int]:
+        if zone is None:
+            # For backward compatibility, the cluster in INIT state launched
+            # before #2352 may not have zone information. In this case, we
+            # return 0 for all reservations.
+            return {reservation: 0 for reservation in specific_reservations}
+        reservations = aws_utils.list_reservations_for_instance_type(
+            instance_type, region)
+
+        filtered_reservations = []
+        for r in reservations:
+            if zone != r.zone:
+                continue
+            if r.targeted:
+                if r.name in specific_reservations:
+                    filtered_reservations.append(r)
+            else:
+                filtered_reservations.append(r)
+        reservation_available_resources = {
+            r.name: r.available_resources for r in filtered_reservations
+        }
+        logger.debug('Get AWS reservations available resources:'
+                     f'{region}-{zone}: {reservation_available_resources}')
+        return reservation_available_resources
+
     @classmethod
     def query_status(cls, name: str, tag_filters: Dict[str, str],
                      region: Optional[str], zone: Optional[str],
diff --git a/sky/clouds/cloud.py b/sky/clouds/cloud.py
index 854cb467c5f..9775109ac80 100644
--- a/sky/clouds/cloud.py
+++ b/sky/clouds/cloud.py
@@ -177,6 +177,11 @@ def regions_with_offering(cls, instance_type: str,
         """
         raise NotImplementedError
 
+    @classmethod
+    def optimize_by_zone(cls) -> bool:
+        """Returns whether to optimize this cloud by zone (default: region)."""
+        return False
+
     @classmethod
     def zones_provision_loop(
         cls,
diff --git a/sky/clouds/gcp.py b/sky/clouds/gcp.py
index e24e67b2486..643d55d7037 100644
--- a/sky/clouds/gcp.py
+++ b/sky/clouds/gcp.py
@@ -260,6 +260,10 @@ def regions_with_offering(cls, instance_type: str,
             regions = [r for r in regions if r.zones]
         return regions
 
+    @classmethod
+    def optimize_by_zone(cls) -> bool:
+        return True
+
     @classmethod
     def zones_provision_loop(
         cls,
diff --git a/sky/clouds/utils/aws_utils.py b/sky/clouds/utils/aws_utils.py
new file mode 100644
index 00000000000..a6c21a15d5c
--- /dev/null
+++ b/sky/clouds/utils/aws_utils.py
@@ -0,0 +1,57 @@
+"""Utilities for AWS."""
+import dataclasses
+import time
+from typing import List
+
+import cachetools
+
+from sky import skypilot_config
+from sky.adaptors import aws
+
+
+@dataclasses.dataclass
+class AWSReservation:
+    name: str
+    instance_type: str
+    zone: str
+    available_resources: int
+    # Whether the reservation is targeted, i.e. can only be consumed when
+    # the reservation name is specified.
+    targeted: bool
+
+
+def use_reservations() -> bool:
+    prioritize_reservations = skypilot_config.get_nested(
+        ('aws', 'prioritize_reservations'), False)
+    specific_reservations = skypilot_config.get_nested(
+        ('aws', 'specific_reservations'), set())
+    return prioritize_reservations or specific_reservations
+
+
+@cachetools.cached(cache=cachetools.TTLCache(maxsize=100,
+                                             ttl=300,
+                                             timer=time.time))
+def list_reservations_for_instance_type(
+    instance_type: str,
+    region: str,
+) -> List[AWSReservation]:
+    if not use_reservations():
+        return []
+    ec2 = aws.client('ec2', region_name=region)
+    response = ec2.describe_capacity_reservations(Filters=[{
+        'Name': 'instance-type',
+        'Values': [instance_type]
+    }, {
+        'Name': 'state',
+        'Values': ['active']
+    }])
+    reservations = response['CapacityReservations']
+    return [
+        AWSReservation(
+            name=r['CapacityReservationId'],
+            instance_type=r['InstanceType'],
+            zone=r['AvailabilityZone'],
+            available_resources=r['AvailableInstanceCount'],
+            targeted=r['InstanceMatchCriteria'] == 'targeted',
+        ) for r in reservations
+    ]
diff --git a/sky/optimizer.py b/sky/optimizer.py
index 7b4b29e3bce..10aa697258b 100644
--- a/sky/optimizer.py
+++ b/sky/optimizer.py
@@ -19,6 +19,8 @@
 from sky.adaptors import common as adaptors_common
 from sky.utils import env_options
 from sky.utils import log_utils
+from sky.utils import rich_utils
+from sky.utils import subprocess_utils
 from sky.utils import ux_utils
 
 if typing.TYPE_CHECKING:
@@ -252,6 +254,26 @@ def _estimate_nodes_cost_or_time(
         # node -> cloud -> list of resources that satisfy user's requirements.
         node_to_candidate_map: _TaskToPerCloudCandidates = {}
 
+        def get_available_reservations(
+            launchable_resources: Dict[resources_lib.Resources,
+                                       List[resources_lib.Resources]]
+        ) -> Dict[resources_lib.Resources, int]:
+            num_available_reserved_nodes_per_resource = {}
+
+            def get_reservations_available_resources(
+                    resources: resources_lib.Resources):
+                num_available_reserved_nodes_per_resource[resources] = sum(
+                    resources.get_reservations_available_resources().values())
+
+            launchable_resources_list: List[resources_lib.Resources] = sum(
+                launchable_resources.values(), [])
+            with rich_utils.safe_status(
+                    '[cyan]Checking reserved resources...[/]'):
+                subprocess_utils.run_in_parallel(
+                    get_reservations_available_resources,
+                    launchable_resources_list)
+            return num_available_reserved_nodes_per_resource
+
         # Compute the estimated cost/time for each node.
         for node_i, node in enumerate(topo_order):
             if node_i == 0:
@@ -279,7 +301,11 @@ def _estimate_nodes_cost_or_time(
                     list(node.resources)[0]: list(node.resources)
                 }
 
+            # Fetch reservations in advance and in parallel to speed up the
+            # reservation info fetching.
             num_resources = len(list(node.resources))
+            num_available_reserved_nodes_per_resource = (
+                get_available_reservations(launchable_resources))
 
             for orig_resources, launchable_list in launchable_resources.items():
                 if num_resources == 1 and node.time_estimator_func is None:
@@ -302,15 +328,16 @@ def _estimate_nodes_cost_or_time(
                     else:
                         estimated_runtime = node.estimate_runtime(
                             orig_resources)
+
                 for resources in launchable_list:
                     if do_print:
                         logger.debug(f'resources: {resources}')
 
                     if minimize_cost:
                         cost_per_node = resources.get_cost(estimated_runtime)
-                        num_available_reserved_nodes = sum(
-                            resources.get_reservations_available_resources(
-                            ).values())
+                        num_available_reserved_nodes = (
+                            num_available_reserved_nodes_per_resource[resources]
+                        )
 
                         # We consider the cost of the unused reservation
                         # resources to be 0 since we are already paying for
@@ -1116,7 +1143,7 @@ def _make_launchables_for_valid_region_zones(
     regions = launchable_resources.get_valid_regions_for_launchable()
     for region in regions:
         if (launchable_resources.use_spot and region.zones is not None or
-                isinstance(launchable_resources.cloud, clouds.GCP)):
+                launchable_resources.cloud.optimize_by_zone()):
             # Spot instances.
             # Do not batch the per-zone requests.
             for zone in region.zones:
diff --git a/sky/provision/aws/instance.py b/sky/provision/aws/instance.py
index 0161992bffc..24173482f34 100644
--- a/sky/provision/aws/instance.py
+++ b/sky/provision/aws/instance.py
@@ -15,6 +15,7 @@
 from sky import status_lib
 from sky.adaptors import aws
 from sky.clouds import aws as aws_cloud
+from sky.clouds.utils import aws_utils
 from sky.provision import common
 from sky.provision import constants
 from sky.provision.aws import utils
@@ -429,19 +430,81 @@ def _create_node_tag(target_instance, is_head: bool = True) -> str:
             head_instance_id = _create_node_tag(resumed_instances[0])
 
     if to_start_count > 0:
+        target_reservations = (config.node_config.get(
+            'CapacityReservationSpecification',
+            {}).get('CapacityReservationTarget',
+                    {}).get('CapacityReservationId', []))
+        created_instances = []
+        if target_reservations:
+            node_config = copy.deepcopy(config.node_config)
+            # Clear the capacity reservation specification settings in the
+            # original node config, as we will create instances with
+            # reservations with specific settings for each reservation.
+            node_config['CapacityReservationSpecification'] = {
+                'CapacityReservationTarget': {}
+            }
+
+            reservations = aws_utils.list_reservations_for_instance_type(
+                node_config['InstanceType'], region=region)
+            # Filter the reservations by the user-specified ones, because
+            # reservations contain 'open' reservations as well, which do not
+            # need to explicitly specify in the config for creating instances.
+            target_reservations_to_count = {}
+            for reservation in reservations:
+                if (reservation.targeted and
+                        reservation.name in target_reservations):
+                    target_reservations_to_count[
+                        reservation.name] = reservation.available_resources
+
+            target_reservations_list = sorted(
+                target_reservations_to_count.items(),
+                key=lambda x: x[1],
+                reverse=True)
+            for reservation, reservation_count in target_reservations_list:
+                if reservation_count <= 0:
+                    # We have sorted the reservations by the available
+                    # resources, so if the reservation is not available, the
+                    # following reservations are not available either.
+                    break
+                reservation_count = min(reservation_count, to_start_count)
+                logger.debug(f'Creating {reservation_count} instances '
+                             f'with reservation {reservation}')
+                node_config['CapacityReservationSpecification'][
+                    'CapacityReservationTarget'] = {
+                        'CapacityReservationId': reservation
+                    }
+                created_reserved_instances = _create_instances(
+                    ec2_fail_fast,
+                    cluster_name_on_cloud,
+                    node_config,
+                    tags,
+                    reservation_count,
+                    associate_public_ip_address=(
+                        not config.provider_config['use_internal_ips']))
+                created_instances.extend(created_reserved_instances)
+                to_start_count -= reservation_count
+                if to_start_count <= 0:
+                    break
+
         # TODO(suquark): If there are existing instances (already running or
         #  resumed), then we cannot guarantee that they will be in the same
         #  availability zone (when there are multiple zones specified).
         #  This is a known issue before.
 
-        created_instances = _create_instances(
-            ec2_fail_fast,
-            cluster_name_on_cloud,
-            config.node_config,
-            tags,
-            to_start_count,
-            associate_public_ip_address=(
-                not config.provider_config['use_internal_ips']))
+        if to_start_count > 0:
+            # Remove the capacity reservation specification from the node config
+            # as we have already created the instances with the reservations.
+            config.node_config.get('CapacityReservationSpecification',
+                                   {}).pop('CapacityReservationTarget', None)
+            created_remaining_instances = _create_instances(
+                ec2_fail_fast,
+                cluster_name_on_cloud,
+                config.node_config,
+                tags,
+                to_start_count,
+                associate_public_ip_address=(
+                    not config.provider_config['use_internal_ips']))
+            created_instances.extend(created_remaining_instances)
         created_instances.sort(key=lambda x: x.id)
 
         created_instance_ids = [n.id for n in created_instances]
diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2
index 4cfecfe2b12..7e9dfccdaf1 100644
--- a/sky/templates/aws-ray.yml.j2
+++ b/sky/templates/aws-ray.yml.j2
@@ -84,6 +84,12 @@ available_node_types:
           #   SpotOptions:
           #       MaxPrice: MAX_HOURLY_PRICE
       {% endif %}
+      CapacityReservationSpecification:
+        CapacityReservationPreference: open
+        {% if specific_reservations %}
+        CapacityReservationTarget:
+          CapacityReservationId: {{specific_reservations}}
+        {% endif %}
       # Use cloud init in UserData to set up the authorized_keys to get
       # around the number of keys limit and permission issues with
       # ec2.describe_key_pairs.
diff --git a/sky/utils/schemas.py b/sky/utils/schemas.py
index 0fa1e8d34ce..01dc14f617c 100644
--- a/sky/utils/schemas.py
+++ b/sky/utils/schemas.py
@@ -706,6 +706,15 @@ def get_config_schema():
             'required': [],
             'additionalProperties': False,
             'properties': {
+                'prioritize_reservations': {
+                    'type': 'boolean',
+                },
+                'specific_reservations': {
+                    'type': 'array',
+                    'items': {
+                        'type': 'string',
+                    },
+                },
                 'disk_encrypted': {
                     'type': 'boolean',
                 },