Skip to content

Commit 8ab098e

Browse files
committed
Re-order Llama class params
1 parent e4f9db3 commit 8ab098e

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

llama_cpp/llama.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,6 @@ def __init__(
205205
model_path: str,
206206
# NOTE: These parameters are likely to change in the future.
207207
n_ctx: int = 512,
208-
rope_freq_base: float = 10000.0,
209-
rope_freq_scale: float = 1.0,
210208
n_parts: int = -1,
211209
n_gpu_layers: int = 0,
212210
seed: int = 1337,
@@ -223,15 +221,15 @@ def __init__(
223221
lora_path: Optional[str] = None,
224222
low_vram: bool = False,
225223
tensor_split: Optional[List[float]] = None,
224+
rope_freq_base: float = 10000.0,
225+
rope_freq_scale: float = 1.0,
226226
verbose: bool = True,
227227
):
228228
"""Load a llama.cpp model from `model_path`.
229229
230230
Args:
231231
model_path: Path to the model.
232232
n_ctx: Maximum context size.
233-
rope_freq_base: RoPE base frequency.
234-
rope_freq_scale: RoPE frequency scale.
235233
n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
236234
seed: Random seed. -1 for random.
237235
f16_kv: Use half-precision for key/value cache.
@@ -246,6 +244,8 @@ def __init__(
246244
lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
247245
lora_path: Path to a LoRA file to apply to the model.
248246
tensor_split: List of floats to split the model across multiple GPUs. If None, the model is not split.
247+
rope_freq_base: Base frequency for rope sampling.
248+
rope_freq_scale: Scale factor for rope sampling.
249249
verbose: Print verbose output to stderr.
250250
251251
Raises:
@@ -260,8 +260,6 @@ def __init__(
260260

261261
self.params = llama_cpp.llama_context_default_params()
262262
self.params.n_ctx = n_ctx
263-
self.params.rope_freq_base = rope_freq_base
264-
self.params.rope_freq_scale = rope_freq_scale
265263
self.params.n_gpu_layers = n_gpu_layers
266264
self.params.seed = seed
267265
self.params.f16_kv = f16_kv
@@ -281,6 +279,9 @@ def __init__(
281279
self._c_tensor_split = FloatArray(*tensor_split) # keep a reference to the array so it is not gc'd
282280
self.params.tensor_split = self._c_tensor_split
283281

282+
self.params.rope_freq_base = rope_freq_base
283+
self.params.rope_freq_scale = rope_freq_scale
284+
284285
self.last_n_tokens_size = last_n_tokens_size
285286
self.n_batch = min(n_ctx, n_batch)
286287

0 commit comments

Comments
 (0)