Skip to content

Commit

Permalink
Update web-rwkv to v0.9.3
Browse files Browse the repository at this point in the history
  • Loading branch information
cryscan committed Dec 6, 2024
1 parent 2660177 commit 3342c6f
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 14 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ path = "crates/ai00-core"
# path = "../web-rwkv"
default-features = false
features = ["native"]
version = "0.9.0"
version = "0.9.3"

[patch.crates-io]
hf-hub = { git = "https://github.com/cgisky1980/hf-hub.git", branch = "main" }
Expand Down
18 changes: 9 additions & 9 deletions assets/configs/Config.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
[model]
embed_device = "Cpu" # Device to put the embed tensor ("Cpu" or "Gpu").
max_batch = 8 # The maximum batches that are cached on GPU.
name = "RWKV-x060-World-3B-v2.1-20240417-ctx4096.st" # Name of the model.
path = "assets/models" # Path to the folder containing all models.
precision = "Fp16" # Precision for intermediate tensors ("Fp16" or "Fp32"). "Fp32" yields better outputs but slower.
quant = 16 # Layers to be quantized.
quant_type = "Int8" # Quantization type ("Int8" or "NF4").
stop = ["\n\n"] # Additional stop words in generation.
token_chunk_size = 128 # Size of token chunk that is inferred at once. For high end GPUs, this could be 64 or 128 (faster).
embed_device = "Cpu" # Device to put the embed tensor ("Cpu" or "Gpu").
max_batch = 8 # The maximum batches that are cached on GPU.
name = "RWKV-x060-World-7B-v3-20241112-ctx4096.st" # Name of the model.
path = "assets/models" # Path to the folder containing all models.
precision = "Fp16" # Precision for intermediate tensors ("Fp16" or "Fp32"). "Fp32" yields better outputs but slower.
quant = 0 # Layers to be quantized.
quant_type = "Int8" # Quantization type ("Int8" or "NF4").
stop = ["\n\n"] # Additional stop words in generation.
token_chunk_size = 128 # Size of token chunk that is inferred at once. For high end GPUs, this could be 64 or 128 (faster).

# [[state]] # State-tuned initial state.
# id = "fd7a60ed-7807-449f-8256-bccae3246222" # UUID for this state, which is used to specify which one to use in the APIs.
Expand Down

0 comments on commit 3342c6f

Please sign in to comment.