Skip to content

Commit

Permalink
update version; clean-up condition
Browse files Browse the repository at this point in the history
  • Loading branch information
dsikka committed Oct 16, 2024
1 parent 26b4f1e commit 6e80f8a
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 12 deletions.
2 changes: 1 addition & 1 deletion requirements-common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL.
compressed-tensors == 0.7.0 # required for compressed-tensors
compressed-tensors == 0.7.1 # required for compressed-tensors
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,20 @@ def from_config(cls, config: Dict[str, Any]) -> "CompressedTensorsConfig":
"weights"] = QuantizationArgs.parse_obj(
quant_config.get("weights"))

target_scheme_map[target]["input_activations"] = None
if is_activation_quantization_format(quant_format):
target_scheme_map[target][
"input_activations"] = QuantizationArgs.parse_obj(
quant_config.get("input_activations"))
else:
target_scheme_map[target]["input_activations"] = None
input_activations = quant_config.get("input_activations")
# The only case where we have activation quant supported
# but no input_activations provided in the config
# should be w8a16fp8 w8a16fp8 can also run for cases where
# there is an input_quant but it is ignored
if not input_activations:
assert target_scheme_map[target][
"weights"].type == QuantizationType.FLOAT
else:
target_scheme_map[target][
"input_activations"] = QuantizationArgs.parse_obj(
quant_config.get("input_activations"))

return cls(target_scheme_map=target_scheme_map,
ignore=ignore,
Expand Down Expand Up @@ -245,8 +253,7 @@ def _get_scheme_from_parts(
group_size=weight_quant.group_size,
actorder=weight_quant.actorder)

# Will only be not None if is_activation_quantization_format is True
if input_quant:
if is_activation_quantization_format(self.quant_format):
if self._is_fp8_w8a8(weight_quant, input_quant):
is_fp8_w8a8_supported = self._check_scheme_supported(
CompressedTensorsW8A8Fp8.get_min_capability(), error=False)
Expand All @@ -256,16 +263,19 @@ def _get_scheme_from_parts(
is_static_input_scheme=(input_quant
and not input_quant.dynamic))
else:
# note: input_quant will be present for converted models;
# will be ignored during inference post loading
return CompressedTensorsW8A16Fp8(
strategy=weight_quant.strategy,
is_static_input_scheme=(input_quant
and not input_quant.dynamic))
is_static_input_scheme=not input_quant.dynamic)

# note: input_quant can be None
if self._is_fp8_w8a16(weight_quant, input_quant):
is_static_input_scheme = (input_quant is None
or not input_quant.dynamic)
return CompressedTensorsW8A16Fp8(
strategy=weight_quant.strategy,
is_static_input_scheme=(input_quant
and not input_quant.dynamic))
is_static_input_scheme=is_static_input_scheme)

if self._is_static_tensor_w8a8(weight_quant, input_quant):
return CompressedTensorsW8A8Int8(
Expand Down

0 comments on commit 6e80f8a

Please sign in to comment.