Fix f8f8bf16_lite quantize op input in quantize_and_compute
(#3667)
#758
fbgemm_gpu_ci_cuda.yml
on: push
Matrix: build_artifact
Matrix: test_and_publish_artifact