File tree Expand file tree Collapse file tree 1 file changed +7
-0
lines changed
fastdeploy/model_executor/layers/attention Expand file tree Collapse file tree 1 file changed +7
-0
lines changed Original file line number Diff line number Diff line change 2525
2626from fastdeploy .config import FDConfig
2727from fastdeploy .model_executor .layers .quantization .quant_base import QuantMethodBase
28+ from fastdeploy .model_executor .layers .quantization .kv_cache import KvCacheQuantzationTypes
2829
2930if TYPE_CHECKING :
3031 from fastdeploy .model_executor .forward_meta import ForwardMeta
@@ -102,6 +103,12 @@ def __init__(
102103
103104 if fd_config .quant_config and hasattr (fd_config .quant_config , "kv_cache_quant_type" ):
104105 self .kvcache_quant_method : QuantMethodBase = fd_config .quant_config .get_quant_method (self )
106+
107+ # set for RL model, as RL do not need load state dict
108+ if fd_config .quant_config .kv_cache_quant_type == KvCacheQuantzationTypes .BLOCK_WISE_FP8 :
109+ self .cache_quant_type_str = "block_wise_fp8"
110+ self .quant_max_bound = 448.0
111+ self .quant_min_bound = - 448.0
105112 else :
106113 self .kvcache_quant_method = None
107114
You can’t perform that action at this time.
0 commit comments