@@ -205,8 +205,6 @@ def __init__(
205
205
model_path : str ,
206
206
# NOTE: These parameters are likely to change in the future.
207
207
n_ctx : int = 512 ,
208
- rope_freq_base : float = 10000.0 ,
209
- rope_freq_scale : float = 1.0 ,
210
208
n_parts : int = - 1 ,
211
209
n_gpu_layers : int = 0 ,
212
210
seed : int = 1337 ,
@@ -223,15 +221,15 @@ def __init__(
223
221
lora_path : Optional [str ] = None ,
224
222
low_vram : bool = False ,
225
223
tensor_split : Optional [List [float ]] = None ,
224
+ rope_freq_base : float = 10000.0 ,
225
+ rope_freq_scale : float = 1.0 ,
226
226
verbose : bool = True ,
227
227
):
228
228
"""Load a llama.cpp model from `model_path`.
229
229
230
230
Args:
231
231
model_path: Path to the model.
232
232
n_ctx: Maximum context size.
233
- rope_freq_base: RoPE base frequency.
234
- rope_freq_scale: RoPE frequency scale.
235
233
n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
236
234
seed: Random seed. -1 for random.
237
235
f16_kv: Use half-precision for key/value cache.
@@ -246,6 +244,8 @@ def __init__(
246
244
lora_base: Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.
247
245
lora_path: Path to a LoRA file to apply to the model.
248
246
tensor_split: List of floats to split the model across multiple GPUs. If None, the model is not split.
247
+ rope_freq_base: Base frequency for rope sampling.
248
+ rope_freq_scale: Scale factor for rope sampling.
249
249
verbose: Print verbose output to stderr.
250
250
251
251
Raises:
@@ -260,8 +260,6 @@ def __init__(
260
260
261
261
self .params = llama_cpp .llama_context_default_params ()
262
262
self .params .n_ctx = n_ctx
263
- self .params .rope_freq_base = rope_freq_base
264
- self .params .rope_freq_scale = rope_freq_scale
265
263
self .params .n_gpu_layers = n_gpu_layers
266
264
self .params .seed = seed
267
265
self .params .f16_kv = f16_kv
@@ -281,6 +279,9 @@ def __init__(
281
279
self ._c_tensor_split = FloatArray (* tensor_split ) # keep a reference to the array so it is not gc'd
282
280
self .params .tensor_split = self ._c_tensor_split
283
281
282
+ self .params .rope_freq_base = rope_freq_base
283
+ self .params .rope_freq_scale = rope_freq_scale
284
+
284
285
self .last_n_tokens_size = last_n_tokens_size
285
286
self .n_batch = min (n_ctx , n_batch )
286
287
0 commit comments