diff --git a/.github/workflows/server_tests.yaml b/.github/workflows/server_tests.yaml
index 5fdc2b1b2..78498bc01 100644
--- a/.github/workflows/server_tests.yaml
+++ b/.github/workflows/server_tests.yaml
@@ -31,12 +31,13 @@ jobs:
           echo "files=$(git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -E '*.py$' | tr '\n' ' ')"
           echo "files=$(git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -E '*.py$' | tr '\n' ' ')" >> $GITHUB_OUTPUT
 
-      - name: Run flake8 on changed files
-        if: steps.changed_files.outputs.files != ''
-        run: |
-          pip install flake8
-          echo running linter on: ${{ steps.changed_files.outputs.files }}
-          flake8 ${{ steps.changed_files.outputs.files }}
+      # TODO(travis): reenable after running this on the entire codebase
+      # - name: Run flake8 on changed files
+      #   if: steps.changed_files.outputs.files != ''
+      #   run: |
+      #     pip install flake8
+      #     echo running linter on: ${{ steps.changed_files.outputs.files }}
+      #     flake8 ${{ steps.changed_files.outputs.files }}
 
       - name: Install Protoc
         uses: arduino/setup-protoc@v1
diff --git a/router/src/infer.rs b/router/src/infer.rs
index c950e311f..6bfd6295f 100644
--- a/router/src/infer.rs
+++ b/router/src/infer.rs
@@ -1,5 +1,5 @@
 /// Batching and inference logic
-use crate::adapter::{extract_adapter_params, Adapter};
+use crate::adapter::{extract_adapter_params, Adapter, BASE_MODEL_ADAPTER_ID};
 use crate::queue::AdapterEvent;
 use crate::scheduler::AdapterScheduler;
 use crate::validation::{Validation, ValidationError};
@@ -71,7 +71,7 @@ impl Infer {
         // Initialize with base model adapter (empty) mapping to index 0
         let adapter_to_index = Arc::new(Mutex::new(HashMap::from([(
             AdapterParameters {
-                adapter_ids: vec!["".to_string()],
+                adapter_ids: vec![BASE_MODEL_ADAPTER_ID.to_string()],
                 ..Default::default()
             },
             0,
diff --git a/server/lorax_server/adapters/utils.py b/server/lorax_server/adapters/utils.py
new file mode 100644
index 000000000..88aea4a17
--- /dev/null
+++ b/server/lorax_server/adapters/utils.py
@@ -0,0 +1,31 @@
+from typing import Optional
+
+from huggingface_hub import HfApi
+
+from lorax_server.utils.sources import HUB, PBASE, S3, get_model_source, map_pbase_model_id_to_s3
+from lorax_server.utils.weights import download_weights
+
+
+def download_adapter(
+    adapter_id: str,
+    adapter_source: str,
+    api_token: Optional[str] = None,
+) -> int:
+    if adapter_source == PBASE:
+        adapter_id = map_pbase_model_id_to_s3(adapter_id, api_token)
+        adapter_source = S3
+    
+    if adapter_source == HUB:
+        # Quick auth check on the repo against the token
+        HfApi(token=api_token).model_info(adapter_id, revision=None)
+        
+    # fail fast if ID is not an adapter (i.e. it is a full model)
+    source = get_model_source(adapter_source, adapter_id, extension=".safetensors", api_token=api_token)
+    source.load_config()
+
+    download_weights(
+        adapter_id, source=adapter_source, api_token=api_token
+    )
+
+    # Calculate size of adapter to be loaded
+    return source.get_weight_bytes()
diff --git a/server/lorax_server/cli.py b/server/lorax_server/cli.py
index 7ad076bbc..3a7ba4652 100644
--- a/server/lorax_server/cli.py
+++ b/server/lorax_server/cli.py
@@ -7,6 +7,8 @@
 from typing import Optional
 from enum import Enum
 
+from lorax_server.utils.weights import download_weights as _download_weights
+
 
 app = typer.Typer()
 
@@ -91,96 +93,6 @@ def serve(
     )
 
 
-def _download_weights(
-    model_id: str,
-    revision: Optional[str] = None,
-    extension: str = ".safetensors",
-    auto_convert: bool = True,
-    source: str = "hub",
-    api_token: Optional[str] = None,
-):
-    # Import here after the logger is added to log potential import exceptions
-    from lorax_server import utils
-    from lorax_server.utils import sources
-    model_source = sources.get_model_source(source, model_id, revision, extension, api_token)
-
-    # Test if files were already download
-    try:
-        model_source.weight_files()
-        logger.info("Files are already present on the host. " "Skipping download.")
-        return
-    # Local files not found
-    except (utils.LocalEntryNotFoundError, FileNotFoundError):
-        pass
-
-    is_local_model = (Path(model_id).exists() and Path(model_id).is_dir()) or os.getenv(
-        "WEIGHTS_CACHE_OVERRIDE", None
-    ) is not None
-
-    if not is_local_model:
-        # TODO: Combine into class that takes the source as input
-        # Try to download weights from the hub
-        try:
-            model_source.download_model_assets()
-            return
-        # No weights found on the hub with this extension
-        except utils.EntryNotFoundError as e:
-            # Check if we want to automatically convert to safetensors or if we can use .bin weights instead
-            if not extension == ".safetensors" or not auto_convert:
-                raise e
-
-    # Try to see if there are local pytorch weights
-    try:
-        # Get weights for a local model, a hub cached model and inside the WEIGHTS_CACHE_OVERRIDE
-        local_pt_files = model_source.weight_files(extension=".bin")
-
-    # No local pytorch weights
-    except utils.LocalEntryNotFoundError:
-        if extension == ".safetensors":
-            logger.warning(
-                f"No safetensors weights found for model {model_id} at revision {revision}. "
-                f"Downloading PyTorch weights."
-            )
-
-        # Try to see if there are pytorch weights on the hub
-        pt_filenames = model_source.remote_weight_files(extension=".bin")
-        # Download pytorch weights
-        local_pt_files = model_source.download_weights(pt_filenames)
-
-    if auto_convert:
-        logger.warning(
-            f"No safetensors weights found for model {model_id} at revision {revision}. "
-            f"Converting PyTorch weights to safetensors."
-        )
-
-        # Safetensors final filenames
-        local_st_files = [
-            p.parent / f"{p.stem.lstrip('pytorch_')}.safetensors"
-            for p in local_pt_files
-        ]
-        try:
-            from transformers import AutoConfig
-            import transformers
-
-            config_path = sources.get_config_path(model_id, source)
-            config = AutoConfig.from_pretrained(
-                config_path,
-                revision=revision,
-            )
-            architecture = config.architectures[0]
-
-            class_ = getattr(transformers, architecture)
-
-            # Name for this varible depends on transformers version.
-            discard_names = getattr(class_, "_tied_weights_keys", [])
-            discard_names.extend(getattr(class_, "_keys_to_ignore_on_load_missing", []))
-
-        except Exception as e:
-            discard_names = []
-        # Convert pytorch weights to safetensors
-        utils.convert_files(local_pt_files, local_st_files, discard_names)
-
-
 @app.command()
 def download_weights(
     model_id: str,
diff --git a/server/lorax_server/models/flash_causal_lm.py b/server/lorax_server/models/flash_causal_lm.py
index 2f53ffd6c..4ec97c5d6 100644
--- a/server/lorax_server/models/flash_causal_lm.py
+++ b/server/lorax_server/models/flash_causal_lm.py
@@ -32,6 +32,7 @@
 from lorax_server.utils.graph import GraphCache
 from lorax_server.adapters import AdapterBatchData, AdapterBatchMetadata
 from lorax_server.utils.segments import SegmentConcatBuilder, find_segments
+from lorax_server.utils.sources import HUB
 from lorax_server.utils.state import warmup_mode
 from lorax_server.utils.tokenizer import TokenizerManager
 
@@ -731,7 +732,7 @@ def __init__(
         sliding_window: Optional[int] = None,
         compile: bool = False,
         adapter_id: str = BASE_MODEL_ADAPTER_ID,
-        dynamic_adapter_loading_enabled: bool = True,
+        adapter_source: str = HUB,
     ):
         global SLIDING_WINDOW
         global SLIDING_WINDOW_BLOCKS
@@ -751,7 +752,8 @@ def __init__(
             world_size=world_size,
             sliding_window=sliding_window,
             adapter_id=adapter_id,
-            dynamic_adapter_loading_enabled=dynamic_adapter_loading_enabled,
+            adapter_source=adapter_source,
+            dynamic_adapter_loading_enabled=True,
         )
 
         if sliding_window is not None:
diff --git a/server/lorax_server/models/flash_gemma.py b/server/lorax_server/models/flash_gemma.py
index c1e17aafd..0030907d9 100644
--- a/server/lorax_server/models/flash_gemma.py
+++ b/server/lorax_server/models/flash_gemma.py
@@ -12,7 +12,6 @@
     GemmaConfig,
 )
 from lorax_server.utils import (
-    create_merged_weight_files,
     initialize_torch_distributed,
     weight_files,
     Weights,
@@ -63,29 +62,11 @@ def __init__(
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
-
-        # if adapter_id passed in as part of model instantiation, then we merge 
-        # the adapter weights with the model weights. This also disables dynamic
-        # adapter loading, since the model is now itself initialized with an adapter.
-        merged_weight_filenames = None
-        dynamic_adapter_loading_enabled = True
-        if len(adapter_id) > 0:
-            logger.info(f"Merging adapter weights from adapter_id {adapter_id} into model weights.")
-            # Need to pass the adapter source here
-            merged_weight_filenames = create_merged_weight_files(
-                adapter_id, model_id, model_weight_filenames=filenames, adapter_source=adapter_source
-            )
-            dynamic_adapter_loading_enabled = False
-            adapter_id = adapter_id
-        else:
-            adapter_id = BASE_MODEL_ADAPTER_ID
-
         weights = Weights(
             filenames, 
             device, 
             dtype, 
             process_group=self.process_group, 
-            merged_weight_filenames=merged_weight_filenames
         )
 
         if config.quantize in ["gptq", "awq", "eetq"]:
@@ -107,7 +88,7 @@ def __init__(
             world_size=world_size,
             compile=compile,
             adapter_id=adapter_id,
-            dynamic_adapter_loading_enabled=dynamic_adapter_loading_enabled,
+            adapter_source=adapter_source,
         )
     
     @property
diff --git a/server/lorax_server/models/flash_gpt2.py b/server/lorax_server/models/flash_gpt2.py
index 388452b18..426e18fd8 100644
--- a/server/lorax_server/models/flash_gpt2.py
+++ b/server/lorax_server/models/flash_gpt2.py
@@ -19,11 +19,7 @@
     LM_HEAD,
 )
 from lorax_server.utils import (
-    compute_delta_weight,
-    create_merged_weight_files,
-    get_start_stop_idxs_for_rank,
     initialize_torch_distributed,
-    load_module_map,
     weight_files,
     Weights,
 )
@@ -70,23 +66,6 @@ def __init__(
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
-
-        # if adapter_id passed in as part of model instantiation, then we merge 
-        # the adapter weights with the model weights. This also disables dynamic
-        # adapter loading, since the model is now itself initialized with an adapter.
-        merged_weight_filenames = None
-        dynamic_adapter_loading_enabled = True
-        if len(adapter_id) > 0:
-            logger.info(f"Merging adapter weights from adapter_id {adapter_id} into model weights.")
-            # Need to pass the adapter source here
-            merged_weight_filenames = create_merged_weight_files(
-                adapter_id, model_id, model_weight_filenames=filenames, adapter_source=adapter_source
-            )
-            dynamic_adapter_loading_enabled = False
-            adapter_id = adapter_id
-        else:
-            adapter_id = BASE_MODEL_ADAPTER_ID
-
         weights = Weights(
             filenames, 
             device, 
@@ -114,7 +93,7 @@ def __init__(
             world_size=world_size,
             compile=compile,
             adapter_id=adapter_id,
-            dynamic_adapter_loading_enabled=dynamic_adapter_loading_enabled,
+            adapter_source=adapter_source,
         )
 
     @property
diff --git a/server/lorax_server/models/flash_llama.py b/server/lorax_server/models/flash_llama.py
index 07632613b..82dde9199 100644
--- a/server/lorax_server/models/flash_llama.py
+++ b/server/lorax_server/models/flash_llama.py
@@ -13,7 +13,6 @@
     LlamaConfig,
 )
 from lorax_server.utils import (
-    create_merged_weight_files,
     initialize_torch_distributed,
     weight_files,
     Weights,
@@ -64,29 +63,11 @@ def __init__(
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
-
-        # if adapter_id passed in as part of model instantiation, then we merge 
-        # the adapter weights with the model weights. This also disables dynamic
-        # adapter loading, since the model is now itself initialized with an adapter.
-        merged_weight_filenames = None
-        dynamic_adapter_loading_enabled = True
-        if len(adapter_id) > 0:
-            logger.info(f"Merging adapter weights from adapter_id {adapter_id} into model weights.")
-            # Need to pass the adapter source here
-            merged_weight_filenames = create_merged_weight_files(
-                adapter_id, model_id, model_weight_filenames=filenames, adapter_source=adapter_source
-            )
-            dynamic_adapter_loading_enabled = False
-            adapter_id = adapter_id
-        else:
-            adapter_id = BASE_MODEL_ADAPTER_ID
-
         weights = Weights(
             filenames, 
             device, 
             dtype, 
             process_group=self.process_group, 
-            merged_weight_filenames=merged_weight_filenames
         )
 
         if config.quantize in ["gptq", "awq", "eetq"]:
@@ -108,7 +89,7 @@ def __init__(
             world_size=world_size,
             compile=compile,
             adapter_id=adapter_id,
-            dynamic_adapter_loading_enabled=dynamic_adapter_loading_enabled,
+            adapter_source=adapter_source,
         )
     
     @property
diff --git a/server/lorax_server/models/flash_mistral.py b/server/lorax_server/models/flash_mistral.py
index 97df03804..b4fa228b5 100644
--- a/server/lorax_server/models/flash_mistral.py
+++ b/server/lorax_server/models/flash_mistral.py
@@ -12,7 +12,6 @@
     MistralConfig,
 )
 from lorax_server.utils import (
-    create_merged_weight_files,
     initialize_torch_distributed,
     weight_files,
     Weights,
@@ -61,29 +60,11 @@ def __init__(
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
-
-        # if adapter_id passed in as part of model instantiation, then we merge 
-        # the adapter weights with the model weights. This also disables dynamic
-        # adapter loading, since the model is now itself initialized with an adapter.
-        merged_weight_filenames = None
-        dynamic_adapter_loading_enabled = True
-        if len(adapter_id) > 0:
-            logger.info(f"Merging adapter weights from adapter_id {adapter_id} into model weights.")
-            # Need to pass the adapter source here
-            merged_weight_filenames = create_merged_weight_files(
-                adapter_id, model_id, model_weight_filenames=filenames, adapter_source=adapter_source
-            )
-            dynamic_adapter_loading_enabled = False
-            adapter_id = adapter_id
-        else:
-            adapter_id = BASE_MODEL_ADAPTER_ID
-
         weights = Weights(
             filenames, 
             device, 
             dtype, 
             process_group=self.process_group, 
-            merged_weight_filenames=merged_weight_filenames
         )
 
         if config.quantize in ["gptq", "awq", "eetq"]:
@@ -106,7 +87,7 @@ def __init__(
             sliding_window=config.sliding_window,
             compile=compile,
             adapter_id=adapter_id,
-            dynamic_adapter_loading_enabled=dynamic_adapter_loading_enabled,
+            adapter_source=adapter_source,
         )
 
     @property
diff --git a/server/lorax_server/models/flash_mixtral.py b/server/lorax_server/models/flash_mixtral.py
index 76772ce90..6c89c2b81 100644
--- a/server/lorax_server/models/flash_mixtral.py
+++ b/server/lorax_server/models/flash_mixtral.py
@@ -31,7 +31,6 @@
     MixtralConfig,
 )
 from lorax_server.utils import (
-    create_merged_weight_files,
     initialize_torch_distributed,
     weight_files,
     Weights,
@@ -212,6 +211,7 @@ def from_pb(
             max_length = max(max_length, input_length + max_new_tokens)
 
         adapter_indices = torch.cat(adapter_indices_list).to(dtype=torch.int64, device=device)
+        print("!!! ADAPTER INDICES", adapter_indices)
 
         request_tokenizers = [
             tokenizers.get_tokenizer(r.adapter_index, tokenizer)
@@ -361,29 +361,11 @@ def __init__(
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
-
-        # if adapter_id passed in as part of model instantiation, then we merge 
-        # the adapter weights with the model weights. This also disables dynamic
-        # adapter loading, since the model is now itself initialized with an adapter.
-        merged_weight_filenames = None
-        dynamic_adapter_loading_enabled = True
-        if len(adapter_id) > 0:
-            logger.info(f"Merging adapter weights from adapter_id {adapter_id} into model weights.")
-            # Need to pass the adapter source here
-            merged_weight_filenames = create_merged_weight_files(
-                adapter_id, model_id, model_weight_filenames=filenames, adapter_source=adapter_source
-            )
-            dynamic_adapter_loading_enabled = False
-            adapter_id = adapter_id
-        else:
-            adapter_id = BASE_MODEL_ADAPTER_ID
-
         weights = Weights(
             filenames, 
             device, 
             dtype, 
             process_group=self.process_group, 
-            merged_weight_filenames=merged_weight_filenames
         )
 
         if config.quantize in ["gptq", "awq", "eetq"]:
@@ -406,7 +388,7 @@ def __init__(
             sliding_window=config.sliding_window,
             compile=compile,
             adapter_id=adapter_id,
-            dynamic_adapter_loading_enabled=dynamic_adapter_loading_enabled,
+            adapter_source=adapter_source,
         )
 
     @property
diff --git a/server/lorax_server/models/flash_phi.py b/server/lorax_server/models/flash_phi.py
index ac0be9435..f7d0156d2 100644
--- a/server/lorax_server/models/flash_phi.py
+++ b/server/lorax_server/models/flash_phi.py
@@ -18,7 +18,6 @@
     PhiConfig,
 )
 from lorax_server.utils import (
-    create_merged_weight_files,
     initialize_torch_distributed,
     weight_files,
     Weights,
@@ -69,29 +68,11 @@ def __init__(
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
-
-        # if adapter_id passed in as part of model instantiation, then we merge 
-        # the adapter weights with the model weights. This also disables dynamic
-        # adapter loading, since the model is now itself initialized with an adapter.
-        merged_weight_filenames = None
-        dynamic_adapter_loading_enabled = True
-        if len(adapter_id) > 0:
-            logger.info(f"Merging adapter weights from adapter_id {adapter_id} into model weights.")
-            # Need to pass the adapter source here
-            merged_weight_filenames = create_merged_weight_files(
-                adapter_id, model_id, model_weight_filenames=filenames, adapter_source=adapter_source
-            )
-            dynamic_adapter_loading_enabled = False
-            adapter_id = adapter_id
-        else:
-            adapter_id = BASE_MODEL_ADAPTER_ID
-
         weights = Weights(
             filenames, 
             device, 
             dtype, 
             process_group=self.process_group, 
-            merged_weight_filenames=merged_weight_filenames
         )
 
         if config.quantize in ["gptq", "awq", "eetq"]:
@@ -114,7 +95,7 @@ def __init__(
             world_size=world_size,
             compile=compile,
             adapter_id=adapter_id,
-            dynamic_adapter_loading_enabled=dynamic_adapter_loading_enabled,
+            adapter_source=adapter_source,
         )
     
     @property
diff --git a/server/lorax_server/models/flash_qwen.py b/server/lorax_server/models/flash_qwen.py
index 55e439fc6..01013b0eb 100644
--- a/server/lorax_server/models/flash_qwen.py
+++ b/server/lorax_server/models/flash_qwen.py
@@ -17,7 +17,6 @@
     QwenConfig,
 )
 from lorax_server.utils import (
-    create_merged_weight_files,
     initialize_torch_distributed,
     weight_files,
     Weights,
@@ -68,29 +67,11 @@ def __init__(
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
-
-        # if adapter_id passed in as part of model instantiation, then we merge 
-        # the adapter weights with the model weights. This also disables dynamic
-        # adapter loading, since the model is now itself initialized with an adapter.
-        merged_weight_filenames = None
-        dynamic_adapter_loading_enabled = True
-        if len(adapter_id) > 0:
-            logger.info(f"Merging adapter weights from adapter_id {adapter_id} into model weights.")
-            # Need to pass the adapter source here
-            merged_weight_filenames = create_merged_weight_files(
-                adapter_id, model_id, model_weight_filenames=filenames, adapter_source=adapter_source
-            )
-            dynamic_adapter_loading_enabled = False
-            adapter_id = adapter_id
-        else:
-            adapter_id = BASE_MODEL_ADAPTER_ID
-
         weights = Weights(
             filenames, 
             device, 
             dtype, 
             process_group=self.process_group, 
-            merged_weight_filenames=merged_weight_filenames
         )
 
         if config.quantize in ["gptq", "awq", "eetq"]:
@@ -113,7 +94,7 @@ def __init__(
             world_size=world_size,
             compile=compile,
             adapter_id=adapter_id,
-            dynamic_adapter_loading_enabled=dynamic_adapter_loading_enabled,
+            adapter_source=adapter_source,
         )
     
     @property
diff --git a/server/lorax_server/models/flash_qwen2.py b/server/lorax_server/models/flash_qwen2.py
index 693b1e381..1b3161c41 100644
--- a/server/lorax_server/models/flash_qwen2.py
+++ b/server/lorax_server/models/flash_qwen2.py
@@ -19,7 +19,6 @@
     FlashQwen2ForCausalLM,
 )
 from lorax_server.utils import (
-    create_merged_weight_files,
     initialize_torch_distributed,
     weight_files,
     Weights,
@@ -69,29 +68,11 @@ def __init__(
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
-
-        # if adapter_id passed in as part of model instantiation, then we merge 
-        # the adapter weights with the model weights. This also disables dynamic
-        # adapter loading, since the model is now itself initialized with an adapter.
-        merged_weight_filenames = None
-        dynamic_adapter_loading_enabled = True
-        if len(adapter_id) > 0:
-            logger.info(f"Merging adapter weights from adapter_id {adapter_id} into model weights.")
-            # Need to pass the adapter source here
-            merged_weight_filenames = create_merged_weight_files(
-                adapter_id, model_id, model_weight_filenames=filenames, adapter_source=adapter_source
-            )
-            dynamic_adapter_loading_enabled = False
-            adapter_id = adapter_id
-        else:
-            adapter_id = BASE_MODEL_ADAPTER_ID
-
         weights = Weights(
             filenames, 
             device, 
             dtype, 
             process_group=self.process_group, 
-            merged_weight_filenames=merged_weight_filenames
         )
 
         if config.quantize in ["gptq", "awq", "eetq"]:
@@ -116,7 +97,7 @@ def __init__(
             sliding_window=config.sliding_window,
             compile=compile,
             adapter_id=adapter_id,
-            dynamic_adapter_loading_enabled=dynamic_adapter_loading_enabled,
+            adapter_source=adapter_source,
         )
     
     @property
diff --git a/server/lorax_server/models/model.py b/server/lorax_server/models/model.py
index 047bf8cd4..228b15fa8 100644
--- a/server/lorax_server/models/model.py
+++ b/server/lorax_server/models/model.py
@@ -7,12 +7,14 @@
 from typing import Dict, List, Tuple, Optional, TypeVar, Type
 from transformers import PreTrainedTokenizerBase
 
+from lorax_server.adapters.utils import download_adapter
 from lorax_server.models.types import Batch, GeneratedText
 from lorax_server.pb.generate_pb2 import AdapterParameters, AdapterSource, InfoResponse
 from lorax_server.utils.adapter import (
     BASE_MODEL_ADAPTER_ID,
     load_and_merge_adapters,
 )
+from lorax_server.utils.sources import HUB
 from lorax_server.utils.tokenizer import TokenizerManager
 from lorax_server.adapters.weights import LayerAdapterWeights
 from lorax_server.utils.weights import shard_on_dim
@@ -33,6 +35,7 @@ def __init__(
         world_size: int = 1,
         sliding_window: Optional[int] = None,
         adapter_id: str = BASE_MODEL_ADAPTER_ID,
+        adapter_source: str = HUB,
         dynamic_adapter_loading_enabled: bool = True,
     ):
         self.model_id = model_id
@@ -59,6 +62,15 @@ def __init__(
             is not None
         )
 
+        if adapter_id and adapter_id != BASE_MODEL_ADAPTER_ID:
+            download_adapter(adapter_id, adapter_source, api_token=None)
+            self.load_adapter(
+                AdapterParameters(adapter_ids=[adapter_id]),
+                adapter_source,
+                adapter_index=0,
+                api_token=None,
+            )
+
         self.check_initialized()
 
     @property
diff --git a/server/lorax_server/server.py b/server/lorax_server/server.py
index 6aff3f372..5a66fcc14 100644
--- a/server/lorax_server/server.py
+++ b/server/lorax_server/server.py
@@ -12,8 +12,8 @@
 from pathlib import Path
 from typing import List, Optional
 
+from lorax_server.adapters.utils import download_adapter
 from lorax_server.cache import Cache
-from lorax_server.cli import _download_weights
 from lorax_server.interceptor import ExceptionInterceptor
 from lorax_server.models import Model, get_model
 from lorax_server.pb import generate_pb2_grpc, generate_pb2
@@ -142,24 +142,7 @@ async def DownloadAdapter(self, request: generate_pb2.DownloadAdapterRequest, co
                 logger.info("No adapter to download for base model. Skipping.")
                 continue
             
-            if adapter_source == PBASE:
-                adapter_id = map_pbase_model_id_to_s3(adapter_id, api_token)
-                adapter_source = S3
-            
-            if adapter_source == HUB:
-                # Quick auth check on the repo against the token
-                HfApi(token=api_token).model_info(adapter_id, revision=None)
-                
-            # fail fast if ID is not an adapter (i.e. it is a full model)
-            source = get_model_source(adapter_source, adapter_id, extension=".safetensors", api_token=api_token)
-            source.load_config()
-
-            _download_weights(
-                adapter_id, source=adapter_source, api_token=api_token
-            )
-
-            # Calculate size of adapter to be loaded
-            adapter_bytes += source.get_weight_bytes()
+            adapter_bytes += download_adapter(adapter_id, adapter_source, api_token)
         
         adapter_memory_size = self.model.adapter_memory_size()
         if adapter_memory_size > 0:
diff --git a/server/lorax_server/utils/__init__.py b/server/lorax_server/utils/__init__.py
index 910ae613f..15f41a0ff 100644
--- a/server/lorax_server/utils/__init__.py
+++ b/server/lorax_server/utils/__init__.py
@@ -1,6 +1,4 @@
 from lorax_server.utils.adapter import (
-    compute_delta_weight, 
-    create_merged_weight_files,
     load_module_map,
 )
 from lorax_server.utils.convert import convert_file, convert_files
@@ -33,8 +31,6 @@
 )
 
 __all__ = [
-    "compute_delta_weight",
-    "create_merged_weight_files",
     "load_module_map",
     "convert_file",
     "convert_files",
diff --git a/server/lorax_server/utils/adapter.py b/server/lorax_server/utils/adapter.py
index 3c3b7b4c6..f73ab992f 100644
--- a/server/lorax_server/utils/adapter.py
+++ b/server/lorax_server/utils/adapter.py
@@ -1,24 +1,14 @@
 from dataclasses import dataclass
-import os
-from collections import defaultdict
 from functools import lru_cache
-from pathlib import Path
-from typing import TYPE_CHECKING, List, Dict, Set, Tuple
+from typing import TYPE_CHECKING, Set, Tuple
 import warnings
 
-import torch
-from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
-from loguru import logger
-from peft.utils import transpose
-from safetensors.torch import load_file, save_file
+from safetensors.torch import load_file
 from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizer
-from tqdm import tqdm
-from filelock import FileLock
 
 from lorax_server.pb import generate_pb2
-from lorax_server.utils.sources import get_model_source, get_config_path, weight_files
+from lorax_server.utils.sources import get_model_source, get_config_path
 from lorax_server.utils.merges.strategies import merge_adapters
-from lorax_server.adapters.lora import get_scaling_factor
 
 if TYPE_CHECKING:
     from lorax_server.adapters.config import AdapterConfig, ModuleMap    
@@ -145,157 +135,3 @@ def load_module_map(
     # map the model weights to the relevant adapter weights (LoRA A and B matrices)
     module_map, adapter_weight_names = adapter_config.map_weights_for_model(adapter_weights, weight_names)
     return module_map, adapter_config, adapter_weight_names, adapter_tokenizer
-
-
-def compute_delta_weight(
-    lora_A: torch.Tensor, 
-    lora_B: torch.Tensor, 
-    fan_in_fan_out: bool, 
-    alpha: float, 
-    r: float,
-    uses_rslora: bool = False
-) -> torch.Tensor:
-    """Computes the delta weight for a Linear layer given A and B LoRA matrices.
-
-    TODO: add logic for other module types beyond Linear layers.
-
-    Reference: https://github.com/huggingface/peft/blob/v0.4.0/src/peft/tuners/lora.py#L799-L806
-    """
-    scaling = get_scaling_factor(alpha, r, uses_rslora=uses_rslora)
-    delta_weight = transpose(lora_B @ lora_A, fan_in_fan_out) * scaling
-    return delta_weight
-
-
-def merge_adapter_weights(
-    model_weights: Dict[str, torch.Tensor], 
-    adapter_weights: Dict[str, torch.Tensor], 
-    adapter_config: "AdapterConfig"
-) -> Tuple[Dict[str, torch.Tensor], Set[str]]:
-    """
-    Merges the adapter weights into the model weights.
-
-    Args:
-        model_weights (Dict[str, torch.Tensor]): The weights of the base model.
-        adapter_weights (Dict[str, torch.Tensor]): The weights of the adapters.
-        adapter_config (AdapterConfig): The configuration for the adapter.
-
-    Returns:
-        Tuple[Dict[str, torch.Tensor], Set[str]]: A tuple containing the merged weights and the set of processed adapter weight names.
-    """
-    from lorax_server.adapters.lora import LoraConfig
-
-    if not isinstance(adapter_config, LoraConfig):
-        raise ValueError(f"Unsupported adapter config type: {type(adapter_config)}")
-    
-    module_mapping = defaultdict(dict)
-    processed_adapter_weight_names = set()
-
-    # map the original tensor names to their adapter counterparts
-    for weight_name in model_weights:
-        end_idx = weight_name.rfind(".weight")
-        key = weight_name[:end_idx]
-        for adapter_weight_name in adapter_weights:
-            if key in adapter_weight_name:
-                # example value: 'base_model.model.model.layers.10.self_attn.v_proj.lora_B.weight'
-                # matrix_type gets the second to last element in the module name, i.e. 'lora_B'
-                matrix_type = adapter_weight_name.split(".")[-2]
-                module_mapping[weight_name][matrix_type] = adapter_weight_name
-                processed_adapter_weight_names.add(adapter_weight_name)
-
-    # merge adapter weights into model weights
-    merged_weights = {}
-    for weight_name, adapter_weight_names in tqdm(
-        module_mapping.items(), desc="Merging adapter weights", total=len(module_mapping)):
-
-        # TODO: support adapter types beyond LoRA
-        # TODO: put this on GPU if it is available. This should greatly speedup compute_delta_weight
-        lora_A = adapter_weights[adapter_weight_names["lora_A"]]
-        lora_B = adapter_weights[adapter_weight_names["lora_B"]]
-        delta_weight = compute_delta_weight(
-            lora_A,
-            lora_B,
-            adapter_config.fan_in_fan_out,
-            adapter_config.lora_alpha,
-            adapter_config.r,
-            uses_rslora=adapter_config.use_rslora,
-        )
-
-        # transpose delta weight if necessary
-        # TODO(geoffrey): I believe this is required when using Conv1D layers (gpt2).
-        # We can likely take this out once we've switched to using Linear layers.
-        if (delta_weight.shape != model_weights[weight_name].shape and 
-            delta_weight.T.shape == model_weights[weight_name].shape):
-            delta_weight = delta_weight.T
-        merged_weights[weight_name] = model_weights[weight_name] + delta_weight
-    return merged_weights, processed_adapter_weight_names
-
-
-def create_merged_weight_files(
-    adapter_id: str, 
-    model_id: str,
-    model_weight_filenames: List[Path],
-    adapter_source: str = "hub",
-) -> List[Path]:
-    """Creates merged weight files for the given adapter ID and filenames."""
-    api_token = None  # TODO(travis): add support for API token
-    source = get_model_source(adapter_source, adapter_id, api_token=api_token)
-    adapter_filenames = source.weight_files()
-
-    adapter_config = source.load_config()
-    if adapter_config.base_model_name_or_path != model_id:
-        expected_config = AutoConfig.from_pretrained(model_id)
-        model_config = AutoConfig.from_pretrained(adapter_config.base_model_name_or_path)
-        if model_config.architectures == expected_config.architectures:
-            warnings.warn(
-                f"Adapter '{adapter_id}' was not trained on base model '{model_id}'. "
-                f"If you encounter issues, use --model-id '{adapter_config.base_model_name_or_path}' instead."
-            )
-        else:
-            # TODO(travis): revisit this when we support clasification heads which will not use CausalLM
-            raise ValueError(f"Adapter '{adapter_id}' is not compatible with model '{model_id}'. "
-                             f"Architectures differ: {model_config.architectures} != {expected_config.architectures}. "
-                             f"Use --model-id '{adapter_config.base_model_name_or_path}' instead.")
-    
-    # load adapter weights from all shards (should have relatively small memory footprint)
-    adapter_weights = {}
-    for filename in adapter_filenames:
-        adapter_weights.update(load_file(filename))
-    remaining_adapter_weight_names = set(adapter_weights.keys())
-
-    merged_weight_directory = Path(HUGGINGFACE_HUB_CACHE) / f"models--{adapter_id.replace('/', '--')}-merged"
-    # just grab the existing files if they already exist and return immediately
-    lock = FileLock(str(merged_weight_directory)+ ".lock")
-    with lock:
-        if merged_weight_directory.is_dir():
-            logger.info(f"Merged weight directory {merged_weight_directory} exist, skipping merge computation.")
-            return weight_files(merged_weight_directory)
-        else:
-            logger.info("Merged weight files do not exist, computing merge.")
-            os.makedirs(merged_weight_directory)
-
-        merged_weight_filenames = []
-        for i, filename in enumerate(model_weight_filenames):
-            logger.info(
-                f"Merging adapter weights into model weights in "
-                f"{filename} ({i+1} / {len(model_weight_filenames)})..."
-            )
-            model_weights = load_file(filename)
-            merged_weights, processed_adapter_weight_names = merge_adapter_weights(
-                model_weights, adapter_weights, adapter_config)
-            
-            merged_adapter_filename = Path(merged_weight_directory, os.path.basename(filename))
-            save_file(merged_weights, merged_adapter_filename)
-            logger.debug(f"Saved merged weights into {merged_adapter_filename}")
-
-            merged_weight_filenames.append(merged_adapter_filename)
-            remaining_adapter_weight_names = remaining_adapter_weight_names.difference(
-                processed_adapter_weight_names)
-        
-        if len(remaining_adapter_weight_names) > 0:
-            logger.warning("WARNING: The following lora weights were not merged into the model weights:")
-            for lora_name in remaining_adapter_weight_names:
-                logger.warning("\t" + lora_name)
-
-        logger.info(
-            f"Finished merging adapter weights. Merged weight files saved to: {merged_weight_directory}")
-        return merged_weight_filenames
diff --git a/server/lorax_server/utils/sources/source.py b/server/lorax_server/utils/sources/source.py
index 14867d97f..4ce1081dc 100644
--- a/server/lorax_server/utils/sources/source.py
+++ b/server/lorax_server/utils/sources/source.py
@@ -4,7 +4,6 @@
 from typing import Optional, List
 from pathlib import Path
 
-from lorax_server.adapters import load_adapter_config
 from lorax_server.adapters.config import AdapterConfig
 
 
@@ -132,7 +131,8 @@ def get_weight_bytes(self) -> int:
         return total_size
     
     def load_config(self) -> AdapterConfig:
+        from lorax_server.adapters import load_adapter_config
+
         config_path = self.download_file("config.json", ignore_errors=True)
         adapter_config_path = self.download_file("adapter_config.json", ignore_errors=True)
         return load_adapter_config(config_path, adapter_config_path, self.api_token)
-        
diff --git a/server/lorax_server/utils/weights.py b/server/lorax_server/utils/weights.py
index b786751b8..fa6ab19a5 100644
--- a/server/lorax_server/utils/weights.py
+++ b/server/lorax_server/utils/weights.py
@@ -365,3 +365,93 @@ def shard_on_dim(t: torch.Tensor, dim: int, process_group: torch.distributed.Pro
         raise NotImplementedError("Let's make that generic when needed")
 
     return tensor
+
+
+def download_weights(
+    model_id: str,
+    revision: Optional[str] = None,
+    extension: str = ".safetensors",
+    auto_convert: bool = True,
+    source: str = "hub",
+    api_token: Optional[str] = None,
+):
+    # Import here after the logger is added to log potential import exceptions
+    from lorax_server import utils
+    from lorax_server.utils import sources
+    model_source = sources.get_model_source(source, model_id, revision, extension, api_token)
+
+    # Test if files were already download
+    try:
+        model_source.weight_files()
+        logger.info("Files are already present on the host. " "Skipping download.")
+        return
+    # Local files not found
+    except (utils.LocalEntryNotFoundError, FileNotFoundError):
+        pass
+
+    is_local_model = (Path(model_id).exists() and Path(model_id).is_dir()) or os.getenv(
+        "WEIGHTS_CACHE_OVERRIDE", None
+    ) is not None
+
+    if not is_local_model:
+        # TODO: Combine into class that takes the source as input
+        # Try to download weights from the hub
+        try:
+            model_source.download_model_assets()
+            return
+        # No weights found on the hub with this extension
+        except utils.EntryNotFoundError as e:
+            # Check if we want to automatically convert to safetensors or if we can use .bin weights instead
+            if not extension == ".safetensors" or not auto_convert:
+                raise e
+
+    # Try to see if there are local pytorch weights
+    try:
+        # Get weights for a local model, a hub cached model and inside the WEIGHTS_CACHE_OVERRIDE
+        local_pt_files = model_source.weight_files(extension=".bin")
+
+    # No local pytorch weights
+    except utils.LocalEntryNotFoundError:
+        if extension == ".safetensors":
+            logger.warning(
+                f"No safetensors weights found for model {model_id} at revision {revision}. "
+                f"Downloading PyTorch weights."
+            )
+
+        # Try to see if there are pytorch weights on the hub
+        pt_filenames = model_source.remote_weight_files(extension=".bin")
+        # Download pytorch weights
+        local_pt_files = model_source.download_weights(pt_filenames)
+
+    if auto_convert:
+        logger.warning(
+            f"No safetensors weights found for model {model_id} at revision {revision}. "
+            f"Converting PyTorch weights to safetensors."
+        )
+
+        # Safetensors final filenames
+        local_st_files = [
+            p.parent / f"{p.stem.lstrip('pytorch_')}.safetensors"
+            for p in local_pt_files
+        ]
+        try:
+            from transformers import AutoConfig
+            import transformers
+
+            config_path = sources.get_config_path(model_id, source)
+            config = AutoConfig.from_pretrained(
+                config_path,
+                revision=revision,
+            )
+            architecture = config.architectures[0]
+
+            class_ = getattr(transformers, architecture)
+
+            # Name for this varible depends on transformers version.
+            discard_names = getattr(class_, "_tied_weights_keys", [])
+            discard_names.extend(getattr(class_, "_keys_to_ignore_on_load_missing", []))
+
+        except Exception as e:
+            discard_names = []
+        # Convert pytorch weights to safetensors
+        utils.convert_files(local_pt_files, local_st_files, discard_names)
diff --git a/server/tests/utils/test_adapter.py b/server/tests/utils/test_adapter.py
deleted file mode 100644
index c7b2f48b9..000000000
--- a/server/tests/utils/test_adapter.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import torch
-
-from lorax_server.adapters.lora import LoraConfig
-from lorax_server.utils.adapter import merge_adapter_weights
-
-
-def test_merge_adapter_weights():
-    W_0 = torch.tensor([
-        [1, 2, 3],
-        [4, 5, 6],
-        [7, 8, 9]
-    ])
-    model_weights = {
-        "model.layers.10.self_attn.q_proj.weight": W_0
-    }
-    
-    A = torch.tensor([
-        [1, 2, 3],
-        [4, 5, 6]
-    ])
-    B = torch.tensor([
-        [1, 2],
-        [3, 4],
-        [5, 6]
-    ])
-    adapter_weights = {
-        "base_model.model.model.layers.10.self_attn.q_proj.lora_A.weight": A,
-        "base_model.model.model.layers.10.self_attn.q_proj.lora_B.weight": B
-    }
-
-    W_expected = torch.tensor([
-        [ 5.5000,  8.0000, 10.5000],
-        [13.5000, 18.0000, 22.5000],
-        [21.5000, 28.0000, 34.5000]
-    ])
-    adapter_config = LoraConfig(base_model_name_or_path="", r=2, target_modules=None, lora_alpha=1, fan_in_fan_out=False, use_rslora=False)
-    merged_weights, processed_adapter_weight_names = merge_adapter_weights(model_weights, adapter_weights, adapter_config)
-
-    assert len(merged_weights) == 1
-    assert merged_weights["model.layers.10.self_attn.q_proj.weight"].equal(W_expected)
-    
-    assert len(processed_adapter_weight_names) == 2
-    assert "base_model.model.model.layers.10.self_attn.q_proj.lora_A.weight" in processed_adapter_weight_names
-    assert "base_model.model.model.layers.10.self_attn.q_proj.lora_B.weight" in processed_adapter_weight_names
\ No newline at end of file