diff --git a/README.md b/README.md
index 6adc4ef..10c8c5f 100644
--- a/README.md
+++ b/README.md
@@ -44,10 +44,12 @@ GGUF Parser helps in reviewing and estimating the usage of a GGUF format model w
         * [Specific Context Size](#specific-context-size)
         * [Enable Flash Attention](#enable-flash-attention)
         * [Disable MMap](#disable-mmap)
+        * [With Adapter](#with-adapter)
         * [Get Proper Offload Layers](#get-proper-offload-layers)
 
 ## Notes
 
+- Since v0.8.1, GGUF Parser supports to estimate the usage with LoRA/ControlVector adapters.
 - Since v0.8.0, GGUF Parser distinguishes the remote devices from `--tensor-split` via `--rpc`.
     + For one host multiple GPU devices, you can use `--tensor-split` to get the estimated memory usage of each GPU.
     + For multiple hosts multiple GPU devices, you can use `--tensor-split` and `--rpc` to get the estimated memory
@@ -756,6 +758,36 @@ $ gguf-parser --hf-repo="etemiz/Llama-3.1-405B-Inst-GGUF" --hf-file="llama-3.1-4
 
 ```
 
+#### With Adapter
+
+Use `--lora`/`--control-vector` to estimate the usage when loading a model with adapters.
+
+```shell
+$ gguf-parser --hf-repo="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" --hf-file="Meta-Llama-3-8B-Instruct.Q5_K_M.gguf" --skip-metadata --skip-architecture --skip-tokenizer
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                                                                                                                 |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+-------------------------+-------------------+
+|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |           RAM           |       VRAM 0      |
+|       |              |                    |                 |           |                |               |                |                +------------+------------+--------+----------+
+|       |              |                    |                 |           |                |               |                |                |     UMA    |   NONUMA   |   UMA  |  NONUMA  |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+------------+------------+--------+----------+
+| llama |     8192     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      | 171.62 MiB | 321.62 MiB |  1 GiB | 6.82 GiB |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+------------+------------+--------+----------+
+
+$ # With a LoRA adapter.
+$ gguf-parser --hf-repo="QuantFactory/Meta-Llama-3-8B-Instruct-GGUF" --hf-file="Meta-Llama-3-8B-Instruct.Q5_K_M.gguf" --lora-url="https://huggingface.co/ngxson/test_gguf_lora_adapter/resolve/main/lora-Llama-3-Instruct-abliteration-LoRA-8B-f16.gguf" --skip-metadata --skip-architecture --skip-tokenizer
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ESTIMATE                                                                                                                                                                                 |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+-------------------------+-------------------+
+|  ARCH | CONTEXT SIZE | BATCH SIZE (L / P) | FLASH ATTENTION | MMAP LOAD | EMBEDDING ONLY | DISTRIBUTABLE | OFFLOAD LAYERS | FULL OFFLOADED |           RAM           |       VRAM 0      |
+|       |              |                    |                 |           |                |               |                |                +------------+------------+--------+----------+
+|       |              |                    |                 |           |                |               |                |                |     UMA    |   NONUMA   |   UMA  |  NONUMA  |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+------------+------------+--------+----------+
+| llama |     8192     |     2048 / 512     |     Disabled    |  Enabled  |       No       |   Supported   |   33 (32 + 1)  |       Yes      | 184.30 MiB | 334.30 MiB |  1 GiB | 6.98 GiB |
++-------+--------------+--------------------+-----------------+-----------+----------------+---------------+----------------+----------------+------------+------------+--------+----------+
+
+```
+
 #### Get Proper Offload Layers
 
 Use `--gpu-layers-step` to get the proper offload layers number when the model is too large to fit into the GPUs memory.
diff --git a/cmd/gguf-parser/README.md b/cmd/gguf-parser/README.md
index 80b2fa2..aef2178 100644
--- a/cmd/gguf-parser/README.md
+++ b/cmd/gguf-parser/README.md
@@ -23,24 +23,24 @@ GLOBAL OPTIONS:
 
    Estimate
 
-   --batch-size value, -b value                         Specify the logical batch size, which is used to estimate the usage. (default: 2048)
-   --cache-type-k value, --ctk value                    Specify the type of Key cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
-   --cache-type-v value, --ctv value                    Specify the type of Value cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
-   --ctx-size value, -c value                           Specify the size of prompt context, which is used to estimate the usage, default is equal to the model's maximum context size. (default: -1)
-   --flash-attention, --flash-attn, --fa                Specify enabling Flash Attention, which is used to estimate the usage. Flash Attention can reduce the usage of RAM/VRAM. (default: false)
-   --gpu-layers value, --ngl value                      Specify how many layers of the main model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)
-   --gpu-layers-draft value, --ngld value               Specify how many layers of the draft model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)
-   --gpu-layers-step value                              Specify the step of layers to offload, works with --gpu-layers. (default: 0)
-   --in-max-ctx-size                                    Limit the context size to the maximum context size of the model, if the context size is larger than the maximum context size. (default: false)
-   --main-gpu value, --mg value                         Specify the GPU to use for the model (with --split-mode = none) or for intermediate results and KV (with --split-mode = row), which is used to estimate the usage. Since gguf-parser cannot recognize the host GPU devices or RPC servers, --main-gpu only works when --tensor-split is set. (default: 0)
-   --no-kv-offload, --nkvo                              Specify disabling Key-Value offloading, which is used to estimate the usage. Disable Key-Value offloading can reduce the usage of VRAM. (default: false)
-   --no-mmap                                            Specify disabling Memory-Mapped using, which is used to estimate the usage. Memory-Mapped can avoid loading the entire model weights into RAM. (default: false)
-   --parallel-size value, --parallel value, --np value  Specify the number of parallel sequences to decode, which is used to estimate the usage. (default: 1)
-   --platform-footprint value                           Specify the platform footprint(RAM,VRAM) of running host in MiB, which is used to estimate the NonUMA usage, default is 150,250. Different platform always gets different RAM and VRAM footprints, for example, within CUDA, 'cudaMemGetInfo' would occupy some RAM and VRAM, see https://stackoverflow.com/questions/64854862/free-memory-occupied-by-cudamemgetinfo. (default: "150,250")
-   --rpc value                                          Specify the RPC servers, which is used to estimate the usage, it is a comma-separated list of host:port. Woks with --tensor-split.
-   --split-mode value, --sm value                       Specify how to split the model across multiple devices, which is used to estimate the usage, select from [layer, row, none]. Since gguf-parser always estimates the usage of VRAM, "none" is meaningless here, keep for compatibility. (default: "layer")
-   --tensor-split value, --ts value                     Specify the fraction of the model to offload to each device, which is used to estimate the usage, it is a comma-separated list of integer. Since gguf-parser cannot recognize the host GPU devices or RPC servers, must explicitly set --tensor-split to indicate how many devices are used. To declare the devices belong to RPC servers, set --rpc please.
-   --ubatch-size value, --ub value                      Specify the physical maximum batch size, which is used to estimate the usage. (default: 512)
+   --batch-size value, -b value                                        Specify the logical batch size, which is used to estimate the usage. (default: 2048)
+   --cache-type-k value, --ctk value                                   Specify the type of Key cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
+   --cache-type-v value, --ctv value                                   Specify the type of Value cache, which is used to estimate the usage, select from [f32, f16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1]. (default: "f16")
+   --ctx-size value, -c value                                          Specify the size of prompt context, which is used to estimate the usage, default is equal to the model's maximum context size. (default: -1)
+   --flash-attention, --flash-attn, --fa                               Specify enabling Flash Attention, which is used to estimate the usage. Flash Attention can reduce the usage of RAM/VRAM. (default: false)
+   --gpu-layers value, --ngl value, --n-gpu-layers value               Specify how many layers of the main model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)
+   --gpu-layers-draft value, --ngld value, --n-gpu-layers-draft value  Specify how many layers of the draft model to offload, which is used to estimate the usage, default is full offloaded. (default: -1)
+   --gpu-layers-step value                                             Specify the step of layers to offload, works with --gpu-layers. (default: 0)
+   --in-max-ctx-size                                                   Limit the context size to the maximum context size of the model, if the context size is larger than the maximum context size. (default: false)
+   --main-gpu value, --mg value                                        Specify the GPU to use for the model (with --split-mode = none) or for intermediate results and KV (with --split-mode = row), which is used to estimate the usage. Since gguf-parser cannot recognize the host GPU devices or RPC servers, --main-gpu only works when --tensor-split is set. (default: 0)
+   --no-kv-offload, --nkvo                                             Specify disabling Key-Value offloading, which is used to estimate the usage. Disable Key-Value offloading can reduce the usage of VRAM. (default: false)
+   --no-mmap                                                           Specify disabling Memory-Mapped using, which is used to estimate the usage. Memory-Mapped can avoid loading the entire model weights into RAM. (default: false)
+   --parallel-size value, --parallel value, --np value                 Specify the number of parallel sequences to decode, which is used to estimate the usage. (default: 1)
+   --platform-footprint value                                          Specify the platform footprint(RAM,VRAM) of running host in MiB, which is used to estimate the NonUMA usage, default is 150,250. Different platform always gets different RAM and VRAM footprints, for example, within CUDA, 'cudaMemGetInfo' would occupy some RAM and VRAM, see https://stackoverflow.com/questions/64854862/free-memory-occupied-by-cudamemgetinfo. (default: "150,250")
+   --rpc value                                                         Specify the RPC servers, which is used to estimate the usage, it is a comma-separated list of host:port. Woks with --tensor-split.
+   --split-mode value, --sm value                                      Specify how to split the model across multiple devices, which is used to estimate the usage, select from [layer, row, none]. Since gguf-parser always estimates the usage of VRAM, "none" is meaningless here, keep for compatibility. (default: "layer")
+   --tensor-split value, --ts value                                    Specify the fraction of the model to offload to each device, which is used to estimate the usage, it is a comma-separated list of integer. Since gguf-parser cannot recognize the host GPU devices or RPC servers, must explicitly set --tensor-split to indicate how many devices are used. To declare the devices belong to RPC servers, set --rpc please.
+   --ubatch-size value, --ub value                                     Specify the physical maximum batch size, which is used to estimate the usage. (default: 512)
 
    Load
 
@@ -54,34 +54,42 @@ GLOBAL OPTIONS:
 
    Model/Local
 
-   --draft-path value, --model-draft value, --md value  Path where the GGUF file to load for the draft model, optional, e.g. ~/.cache/lm-studio/models/QuantFactory/Qwen2-1.5B-Instruct-GGUF/Qwen2-1.5B-Instruct.Q5_K_M.gguf
-   --mmproj-path value, --mmproj value                  Path where the GGUF file to load for the multimodal projector, optional.
-   --path value, --model value, -m value                Path where the GGUF file to load for the main model, e.g. ~/.cache/lm-studio/models/QuantFactory/Qwen2-7B-Instruct-GGUF/Qwen2-7B-Instruct.Q5_K_M.gguf.
+   --control-vector-path value, --control-vector value [ --control-vector-path value, --control-vector value ]  Path where the GGUF file to load for the Control Vector adapter, optional.
+   --draft-path value, --model-draft value, --md value                                                          Path where the GGUF file to load for the draft model, optional, e.g. ~/.cache/lm-studio/models/QuantFactory/Qwen2-1.5B-Instruct-GGUF/Qwen2-1.5B-Instruct.Q5_K_M.gguf
+   --lora-path value, --lora value [ --lora-path value, --lora value ]                                          Path where the GGUF file to load for the LoRA adapter, optional.
+   --mmproj-path value, --mmproj value                                                                          Path where the GGUF file to load for the multimodal projector, optional.
+   --path value, --model value, -m value                                                                        Path where the GGUF file to load for the main model, e.g. ~/.cache/lm-studio/models/QuantFactory/Qwen2-7B-Instruct-GGUF/Qwen2-7B-Instruct.Q5_K_M.gguf.
 
    Model/Remote
 
-   --draft-url value                           Url where the GGUF file to load for the draft model, optional, e.g. https://huggingface.co/QuantFactory/Qwen2-1.5B-Instruct-GGUF/resolve/main/Qwen2-1.5B-Instruct.Q5_K_M.gguf. Note that gguf-parser does not need to download the entire GGUF file.
-   --mmproj-url value                          Url where the GGUF file to load for the multimodal projector, optional.
-   --token value                               Bearer auth token to load GGUF file, optional, works with --url/--draft-url.
-   --url value, --model-url value, --mu value  Url where the GGUF file to load for the main model, e.g. https://huggingface.co/QuantFactory/Qwen2-7B-Instruct-GGUF/resolve/main/Qwen2-7B-Instruct.Q5_K_M.gguf. Note that gguf-parser does not need to download the entire GGUF file.
+   --control-vector-url value [ --control-vector-url value ]  Url where the GGUF file to load for the Control Vector adapter, optional.
+   --draft-url value                                          Url where the GGUF file to load for the draft model, optional, e.g. https://huggingface.co/QuantFactory/Qwen2-1.5B-Instruct-GGUF/resolve/main/Qwen2-1.5B-Instruct.Q5_K_M.gguf. Note that gguf-parser does not need to download the entire GGUF file.
+   --lora-url value [ --lora-url value ]                      Url where the GGUF file to load for the LoRA adapter, optional.
+   --mmproj-url value                                         Url where the GGUF file to load for the multimodal projector, optional.
+   --token value                                              Bearer auth token to load GGUF file, optional, works with --url/--draft-url.
+   --url value, --model-url value, --mu value                 Url where the GGUF file to load for the main model, e.g. https://huggingface.co/QuantFactory/Qwen2-7B-Instruct-GGUF/resolve/main/Qwen2-7B-Instruct.Q5_K_M.gguf. Note that gguf-parser does not need to download the entire GGUF file.
 
    Model/Remote/HuggingFace
 
-   --hf-draft-file value          Model file below the --hf-draft-repo, optional, e.g. Qwen2-1.5B-Instruct.Q5_K_M.gguf.
-   --hf-draft-repo value          Repository of HuggingFace which the GGUF file store for the draft model, optional, e.g. QuantFactory/Qwen2-1.5B-Instruct-GGUF, works with --hf-draft-file.
-   --hf-file value, --hff value   Model file below the --hf-repo, e.g. Qwen2-7B-Instruct.Q5_K_M.gguf.
-   --hf-mmproj-file value         Multimodal projector file below the --hf-repo.
-   --hf-repo value, --hfr value   Repository of HuggingFace which the GGUF file store for the main model, e.g. QuantFactory/Qwen2-7B-Instruct-GGUF, works with --hf-file.
-   --hf-token value, --hft value  User access token of HuggingFace, optional, works with --hf-repo/--hf-file pair or --hf-draft-repo/--hf-draft-file pair. See https://huggingface.co/settings/tokens.
+   --hf-control-vector-file value [ --hf-control-vector-file value ]  Control Vector adapter file below the --hf-repo.
+   --hf-draft-file value                                              Model file below the --hf-draft-repo, optional, e.g. Qwen2-1.5B-Instruct.Q5_K_M.gguf.
+   --hf-draft-repo value                                              Repository of HuggingFace which the GGUF file store for the draft model, optional, e.g. QuantFactory/Qwen2-1.5B-Instruct-GGUF, works with --hf-draft-file.
+   --hf-file value, --hff value                                       Model file below the --hf-repo, e.g. Qwen2-7B-Instruct.Q5_K_M.gguf.
+   --hf-lora-file value [ --hf-lora-file value ]                      LoRA adapter file below the --hf-repo.
+   --hf-mmproj-file value                                             Multimodal projector file below the --hf-repo.
+   --hf-repo value, --hfr value                                       Repository of HuggingFace which the GGUF file store for the main model, e.g. QuantFactory/Qwen2-7B-Instruct-GGUF, works with --hf-file.
+   --hf-token value, --hft value                                      User access token of HuggingFace, optional, works with --hf-repo/--hf-file pair or --hf-draft-repo/--hf-draft-file pair. See https://huggingface.co/settings/tokens.
 
    Model/Remote/ModelScope
 
-   --ms-draft-file value   Model file below the --ms-draft-repo, optional, e.g. qwen1_5-1_8b-chat-q5_k_m.gguf.
-   --ms-draft-repo value   Repository of ModelScope which the GGUF file store for the draft model, optional, e.g. qwen/Qwen1.5-1.8B-Chat-GGUF, works with --ms-draft-file.
-   --ms-file value         Model file below the --ms-repo, e.g. qwen1_5-7b-chat-q5_k_m.gguf.
-   --ms-mmproj-file value  Multimodal projector file below the --ms-repo.
-   --ms-repo value         Repository of ModelScope which the GGUF file store for the main model, e.g. qwen/Qwen1.5-7B-Chat-GGUF, works with --ms-file.
-   --ms-token value        Git access token of ModelScope, optional, works with --ms-repo/--ms-file pair or --ms-draft-repo/--ms-draft-file pair. See https://modelscope.cn/my/myaccesstoken.
+   --ms-control-vector-file value [ --ms-control-vector-file value ]  Control Vector adapter file below the --ms-repo.
+   --ms-draft-file value                                              Model file below the --ms-draft-repo, optional, e.g. qwen1_5-1_8b-chat-q5_k_m.gguf.
+   --ms-draft-repo value                                              Repository of ModelScope which the GGUF file store for the draft model, optional, e.g. qwen/Qwen1.5-1.8B-Chat-GGUF, works with --ms-draft-file.
+   --ms-file value                                                    Model file below the --ms-repo, e.g. qwen1_5-7b-chat-q5_k_m.gguf.
+   --ms-lora-file value [ --ms-lora-file value ]                      LoRA adapter file below the --ms-repo.
+   --ms-mmproj-file value                                             Multimodal projector file below the --ms-repo.
+   --ms-repo value                                                    Repository of ModelScope which the GGUF file store for the main model, e.g. qwen/Qwen1.5-7B-Chat-GGUF, works with --ms-file.
+   --ms-token value                                                   Git access token of ModelScope, optional, works with --ms-repo/--ms-file pair or --ms-draft-repo/--ms-draft-file pair. See https://modelscope.cn/my/myaccesstoken.
 
    Model/Remote/Ollama
 
diff --git a/cmd/gguf-parser/main.go b/cmd/gguf-parser/main.go
index 5d83305..a11c14d 100644
--- a/cmd/gguf-parser/main.go
+++ b/cmd/gguf-parser/main.go
@@ -3,6 +3,7 @@ package main
 import (
 	"errors"
 	"fmt"
+	"net"
 	"os"
 	"path/filepath"
 	"regexp"
@@ -20,7 +21,6 @@ import (
 	"github.com/urfave/cli/v2"
 
 	. "github.com/gpustack/gguf-parser-go" // nolint: stylecheck
-	"net"
 )
 
 var Version = "v0.0.0"
@@ -89,6 +89,20 @@ func main() {
 				Aliases:     []string{"mmproj"},
 				Usage:       "Path where the GGUF file to load for the multimodal projector, optional.",
 			},
+			&cli.StringSliceFlag{
+				Destination: &loraPaths,
+				Category:    "Model/Local",
+				Name:        "lora-path",
+				Aliases:     []string{"lora"},
+				Usage:       "Path where the GGUF file to load for the LoRA adapter, optional.",
+			},
+			&cli.StringSliceFlag{
+				Destination: &controlVectorPaths,
+				Category:    "Model/Local",
+				Name:        "control-vector-path",
+				Aliases:     []string{"control-vector"},
+				Usage:       "Path where the GGUF file to load for the Control Vector adapter, optional.",
+			},
 			&cli.StringFlag{
 				Destination: &url,
 				Value:       url,
@@ -117,6 +131,18 @@ func main() {
 				Name:        "mmproj-url",
 				Usage:       "Url where the GGUF file to load for the multimodal projector, optional.",
 			},
+			&cli.StringSliceFlag{
+				Destination: &loraUrls,
+				Category:    "Model/Remote",
+				Name:        "lora-url",
+				Usage:       "Url where the GGUF file to load for the LoRA adapter, optional.",
+			},
+			&cli.StringSliceFlag{
+				Destination: &controlVectorUrls,
+				Category:    "Model/Remote",
+				Name:        "control-vector-url",
+				Usage:       "Url where the GGUF file to load for the Control Vector adapter, optional.",
+			},
 			&cli.StringFlag{
 				Destination: &token,
 				Value:       token,
@@ -143,13 +169,6 @@ func main() {
 				Usage: "Model file below the --hf-repo, e.g. " +
 					"Qwen2-7B-Instruct.Q5_K_M.gguf.",
 			},
-			&cli.StringFlag{
-				Destination: &hfMMProjFile,
-				Value:       hfMMProjFile,
-				Category:    "Model/Remote/HuggingFace",
-				Name:        "hf-mmproj-file",
-				Usage:       "Multimodal projector file below the --hf-repo.",
-			},
 			&cli.StringFlag{
 				Destination: &hfDraftRepo,
 				Value:       hfDraftRepo,
@@ -166,6 +185,25 @@ func main() {
 				Usage: "Model file below the --hf-draft-repo, optional, e.g. " +
 					"Qwen2-1.5B-Instruct.Q5_K_M.gguf.",
 			},
+			&cli.StringFlag{
+				Destination: &hfMMProjFile,
+				Value:       hfMMProjFile,
+				Category:    "Model/Remote/HuggingFace",
+				Name:        "hf-mmproj-file",
+				Usage:       "Multimodal projector file below the --hf-repo.",
+			},
+			&cli.StringSliceFlag{
+				Destination: &hfLoRAFiles,
+				Category:    "Model/Remote/HuggingFace",
+				Name:        "hf-lora-file",
+				Usage:       "LoRA adapter file below the --hf-repo.",
+			},
+			&cli.StringSliceFlag{
+				Destination: &hfControlVectorFiles,
+				Category:    "Model/Remote/HuggingFace",
+				Name:        "hf-control-vector-file",
+				Usage:       "Control Vector adapter file below the --hf-repo.",
+			},
 			&cli.StringFlag{
 				Destination: &hfToken,
 				Value:       hfToken,
@@ -192,13 +230,6 @@ func main() {
 				Usage: "Model file below the --ms-repo, e.g. " +
 					"qwen1_5-7b-chat-q5_k_m.gguf.",
 			},
-			&cli.StringFlag{
-				Destination: &msMMProjFile,
-				Value:       msMMProjFile,
-				Category:    "Model/Remote/ModelScope",
-				Name:        "ms-mmproj-file",
-				Usage:       "Multimodal projector file below the --ms-repo.",
-			},
 			&cli.StringFlag{
 				Destination: &msDraftRepo,
 				Value:       msDraftRepo,
@@ -215,6 +246,25 @@ func main() {
 				Usage: "Model file below the --ms-draft-repo, optional, e.g. " +
 					"qwen1_5-1_8b-chat-q5_k_m.gguf.",
 			},
+			&cli.StringFlag{
+				Destination: &msMMProjFile,
+				Value:       msMMProjFile,
+				Category:    "Model/Remote/ModelScope",
+				Name:        "ms-mmproj-file",
+				Usage:       "Multimodal projector file below the --ms-repo.",
+			},
+			&cli.StringSliceFlag{
+				Destination: &msLoRAFiles,
+				Category:    "Model/Remote/ModelScope",
+				Name:        "ms-lora-file",
+				Usage:       "LoRA adapter file below the --ms-repo.",
+			},
+			&cli.StringSliceFlag{
+				Destination: &msControlVectorFiles,
+				Category:    "Model/Remote/ModelScope",
+				Name:        "ms-control-vector-file",
+				Usage:       "Control Vector adapter file below the --ms-repo.",
+			},
 			&cli.StringFlag{
 				Destination: &msToken,
 				Value:       msToken,
@@ -464,7 +514,7 @@ func main() {
 				Value:       offloadLayers,
 				Category:    "Estimate",
 				Name:        "gpu-layers",
-				Aliases:     []string{"ngl"},
+				Aliases:     []string{"ngl", "n-gpu-layers"},
 				Usage: "Specify how many layers of the main model to offload, " +
 					"which is used to estimate the usage, " +
 					"default is full offloaded.",
@@ -474,7 +524,7 @@ func main() {
 				Value:       offloadLayersDraft,
 				Category:    "Estimate",
 				Name:        "gpu-layers-draft",
-				Aliases:     []string{"ngld"},
+				Aliases:     []string{"ngld", "n-gpu-layers-draft"},
 				Usage: "Specify how many layers of the draft model to offload, " +
 					"which is used to estimate the usage, " +
 					"default is full offloaded.",
@@ -564,28 +614,36 @@ func main() {
 
 var (
 	// model options
-	path         string
-	mmprojPath   string // for estimate
-	draftPath    string // for estimate
-	url          string
-	mmprojUrl    string // for estimate
-	draftUrl     string // for estimate
-	token        string
-	hfRepo       string
-	hfFile       string
-	hfMMProjFile string // for estimate
-	hfDraftRepo  string // for estimate
-	hfDraftFile  string // for estimate
-	hfToken      string
-	msRepo       string
-	msFile       string
-	msMMProjFile string // for estimate
-	msDraftRepo  string // for estimate
-	msDraftFile  string // for estimate
-	msToken      string
-	olBaseURL    = "https://registry.ollama.ai"
-	olModel      string
-	olUsage      bool
+	path                 string
+	mmprojPath           string          // for estimate
+	draftPath            string          // for estimate
+	loraPaths            cli.StringSlice // for estimate
+	controlVectorPaths   cli.StringSlice // for estimate
+	url                  string
+	mmprojUrl            string          // for estimate
+	draftUrl             string          // for estimate
+	loraUrls             cli.StringSlice // for estimate
+	controlVectorUrls    cli.StringSlice // for estimate
+	token                string
+	hfRepo               string
+	hfFile               string
+	hfDraftRepo          string          // for estimate
+	hfDraftFile          string          // for estimate
+	hfMMProjFile         string          // for estimate
+	hfLoRAFiles          cli.StringSlice // for estimate
+	hfControlVectorFiles cli.StringSlice // for estimate
+	hfToken              string
+	msRepo               string
+	msFile               string
+	msDraftRepo          string          // for estimate
+	msDraftFile          string          // for estimate
+	msMMProjFile         string          // for estimate
+	msLoRAFiles          cli.StringSlice // for estimate
+	msControlVectorFiles cli.StringSlice // for estimate
+	msToken              string
+	olBaseURL            = "https://registry.ollama.ai"
+	olModel              string
+	olUsage              bool
 	// load options
 	debug                  bool
 	skipProxy              bool
@@ -745,7 +803,12 @@ func mainAction(c *cli.Context) error {
 
 	// Parse GGUF file.
 
-	var gf, projgf, dftgf *GGUFFile
+	var (
+		gf     *GGUFFile
+		projgf *GGUFFile
+		dftgf  *GGUFFile
+		adpgfs []*GGUFFile
+	)
 	{
 		var err error
 
@@ -795,13 +858,42 @@ func mainAction(c *cli.Context) error {
 						projgf, err = ParseGGUFFileRemote(ctx, mls[len(mls)-1].BlobURL().String(), ropts...)
 					}
 				}
+				// Adapter overlap.
+				{
+					als := om.SearchLayers(regexp.MustCompile(`^application/vnd\.ollama\.image\.adapter$`))
+					if len(als) > 0 {
+						var adpgf *GGUFFile
+						for i := range als {
+							adpgf, err = ParseGGUFFileRemote(ctx, als[i].BlobURL().String(), ropts...)
+							if err != nil {
+								break
+							}
+							adpgfs = append(adpgfs, adpgf)
+						}
+					}
+				}
 			}
 		}
 		if err != nil {
 			return fmt.Errorf("failed to parse GGUF file: %w", err)
 		}
 
-		// Projector model.
+		// Drafter.
+		switch {
+		case draftPath != "":
+			dftgf, err = ParseGGUFFile(draftPath, ropts...)
+		case draftUrl != "":
+			dftgf, err = ParseGGUFFileRemote(ctx, draftUrl, ropts...)
+		case hfDraftRepo != "" && hfDraftFile != "":
+			dftgf, err = ParseGGUFFileFromHuggingFace(ctx, hfDraftRepo, hfDraftFile, ropts...)
+		case msDraftRepo != "" && msDraftFile != "":
+			dftgf, err = ParseGGUFFileFromModelScope(ctx, msDraftRepo, msDraftFile, ropts...)
+		}
+		if err != nil {
+			return fmt.Errorf("failed to parse draft GGUF file: %w", err)
+		}
+
+		// Projector.
 		switch {
 		case mmprojPath != "":
 			projgf, err = ParseGGUFFile(mmprojPath, ropts...)
@@ -816,19 +908,75 @@ func mainAction(c *cli.Context) error {
 			return fmt.Errorf("failed to parse multimodal projector GGUF file: %w", err)
 		}
 
-		// Drafter model.
-		switch {
-		case draftPath != "":
-			dftgf, err = ParseGGUFFile(draftPath, ropts...)
-		case draftUrl != "":
-			dftgf, err = ParseGGUFFileRemote(ctx, draftUrl, ropts...)
-		case hfDraftRepo != "" && hfDraftFile != "":
-			dftgf, err = ParseGGUFFileFromHuggingFace(ctx, hfDraftRepo, hfDraftFile, ropts...)
-		case msDraftRepo != "" && msDraftFile != "":
-			dftgf, err = ParseGGUFFileFromModelScope(ctx, msDraftRepo, msDraftFile, ropts...)
-		}
-		if err != nil {
-			return fmt.Errorf("failed to parse draft GGUF file: %w", err)
+		// Adapter.
+		{
+			// LoRA.
+			for _, loraPath := range loraPaths.Value() {
+				adpgf, err := ParseGGUFFile(loraPath, ropts...)
+				if err != nil {
+					return fmt.Errorf("failed to parse LoRA adapter GGUF file: %w", err)
+				}
+				adpgfs = append(adpgfs, adpgf)
+			}
+			for _, loraUrl := range loraUrls.Value() {
+				adpgf, err := ParseGGUFFileRemote(ctx, loraUrl, ropts...)
+				if err != nil {
+					return fmt.Errorf("failed to parse LoRA adapter GGUF file: %w", err)
+				}
+				adpgfs = append(adpgfs, adpgf)
+			}
+			if hfRepo != "" {
+				for _, hfLoRAFile := range hfLoRAFiles.Value() {
+					adpgf, err := ParseGGUFFileFromHuggingFace(ctx, hfRepo, hfLoRAFile, ropts...)
+					if err != nil {
+						return fmt.Errorf("failed to parse LoRA adapter GGUF file: %w", err)
+					}
+					adpgfs = append(adpgfs, adpgf)
+				}
+			}
+			if msRepo != "" {
+				for _, msLoRAFile := range msLoRAFiles.Value() {
+					adpgf, err := ParseGGUFFileFromModelScope(ctx, msRepo, msLoRAFile, ropts...)
+					if err != nil {
+						return fmt.Errorf("failed to parse LoRA adapter GGUF file: %w", err)
+					}
+					adpgfs = append(adpgfs, adpgf)
+				}
+			}
+
+			// Control Vector.
+			for _, cvPath := range controlVectorPaths.Value() {
+				adpgf, err := ParseGGUFFile(cvPath, ropts...)
+				if err != nil {
+					return fmt.Errorf("failed to parse Control Vector adapter GGUF file: %w", err)
+				}
+				adpgfs = append(adpgfs, adpgf)
+			}
+			for _, cvUrl := range controlVectorUrls.Value() {
+				adpgf, err := ParseGGUFFileRemote(ctx, cvUrl, ropts...)
+				if err != nil {
+					return fmt.Errorf("failed to parse Control Vector adapter GGUF file: %w", err)
+				}
+				adpgfs = append(adpgfs, adpgf)
+			}
+			if hfRepo != "" {
+				for _, hfCvFile := range hfControlVectorFiles.Value() {
+					adpgf, err := ParseGGUFFileFromHuggingFace(ctx, hfRepo, hfCvFile, ropts...)
+					if err != nil {
+						return fmt.Errorf("failed to parse Control Vector adapter GGUF file: %w", err)
+					}
+					adpgfs = append(adpgfs, adpgf)
+				}
+			}
+			if msRepo != "" {
+				for _, msCvFile := range msControlVectorFiles.Value() {
+					adpgf, err := ParseGGUFFileFromModelScope(ctx, msRepo, msCvFile, ropts...)
+					if err != nil {
+						return fmt.Errorf("failed to parse Control Vector adapter GGUF file: %w", err)
+					}
+					adpgfs = append(adpgfs, adpgf)
+				}
+			}
 		}
 	}
 
@@ -883,6 +1031,16 @@ func mainAction(c *cli.Context) error {
 			eopts = append(eopts, WithProjector(&me))
 		}
 
+		if len(adpgfs) > 0 {
+			adps := make([]LLaMACppUsageEstimate, len(adpgfs))
+			aeopts := eopts[:len(eopts):len(eopts)]
+			for i, adpgf := range adpgfs {
+				ae := adpgf.EstimateLLaMACppUsage(aeopts...)
+				adps[i] = ae
+			}
+			eopts = append(eopts, WithAdapters(adps))
+		}
+
 		deopts := eopts[:len(eopts):len(eopts)]
 		if offloadLayers >= 0 {
 			deopts = append(deopts, WithOffloadLayers(uint64(offloadLayers)))