diff --git a/.github/workflows/build_wheels_linux_aarch64.yml b/.github/workflows/build_wheels_linux_aarch64.yml index bd2f6b3de9..c5e4320196 100644 --- a/.github/workflows/build_wheels_linux_aarch64.yml +++ b/.github/workflows/build_wheels_linux_aarch64.yml @@ -24,6 +24,7 @@ permissions: jobs: generate-matrix: + if: ${{ github.repository_owner == 'pytorch' }} uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main with: package-type: wheel @@ -32,6 +33,7 @@ jobs: test-infra-ref: main with-cuda: disable build: + if: ${{ github.repository_owner == 'pytorch' }} needs: generate-matrix strategy: fail-fast: false diff --git a/.github/workflows/build_wheels_linux_x86.yml b/.github/workflows/build_wheels_linux_x86.yml index e489f0c3cb..3cd06573b0 100644 --- a/.github/workflows/build_wheels_linux_x86.yml +++ b/.github/workflows/build_wheels_linux_x86.yml @@ -24,6 +24,7 @@ permissions: jobs: generate-matrix: + if: ${{ github.repository_owner == 'pytorch' }} uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main with: package-type: wheel @@ -34,6 +35,7 @@ jobs: with-rocm: enable with-cpu: enable build: + if: ${{ github.repository_owner == 'pytorch' }} needs: generate-matrix name: pytorch/FBGEMM uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main diff --git a/.github/workflows/fbgemm_ci.yml b/.github/workflows/fbgemm_ci.yml index 3bde37d4f0..220654c3e6 100644 --- a/.github/workflows/fbgemm_ci.yml +++ b/.github/workflows/fbgemm_ci.yml @@ -20,7 +20,7 @@ concurrency: jobs: build-linux: - runs-on: ${{ matrix.host-machine.instance }} + runs-on: ${{ github.repository_owner == 'pytorch' && matrix.host-machine.instance || 'ubuntu-latest' }} container: image: amazonlinux:2023 options: --user root @@ -105,7 +105,7 @@ jobs: build-bazel: - runs-on: linux.12xlarge + runs-on: ${{ github.repository_owner == 'pytorch' && matrix.host-machine.instance || 'ubuntu-latest' }} container: image: amazonlinux:2023 options: --user root diff --git a/.github/workflows/fbgemm_gpu_ci_cpu.yml b/.github/workflows/fbgemm_gpu_ci_cpu.yml index 1867410cc4..c73939a425 100644 --- a/.github/workflows/fbgemm_gpu_ci_cpu.yml +++ b/.github/workflows/fbgemm_gpu_ci_cpu.yml @@ -47,6 +47,7 @@ concurrency: jobs: # Build on CPU hosts, run tests, and upload to GHA build_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 @@ -118,6 +119,7 @@ jobs: # Download the built artifact from GHA, test on GPU, and push to PyPI test_and_publish_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 diff --git a/.github/workflows/fbgemm_gpu_ci_cuda.yml b/.github/workflows/fbgemm_gpu_ci_cuda.yml index c4e4990890..6a8fc53671 100644 --- a/.github/workflows/fbgemm_gpu_ci_cuda.yml +++ b/.github/workflows/fbgemm_gpu_ci_cuda.yml @@ -46,6 +46,7 @@ concurrency: jobs: # Build on CPU hosts and upload to GHA build_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 @@ -127,6 +128,7 @@ jobs: # Download the built artifact from GHA, test on GPU, and push to PyPI test_and_publish_artifact: + if: ${{ github.repository_owner == 'pytorch' }} # runs-on: linux.4xlarge.nvidia.gpu # Use available instance types - https://github.com/pytorch/test-infra/blob/main/.github/scale-config.yml runs-on: ${{ matrix.host-machine.instance }} diff --git a/.github/workflows/fbgemm_gpu_ci_genai.yml b/.github/workflows/fbgemm_gpu_ci_genai.yml index 3ffdf45c0a..7661508217 100644 --- a/.github/workflows/fbgemm_gpu_ci_genai.yml +++ b/.github/workflows/fbgemm_gpu_ci_genai.yml @@ -46,6 +46,7 @@ concurrency: jobs: # Build on CPU hosts and upload to GHA build_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 @@ -127,6 +128,7 @@ jobs: # Download the built artifact from GHA, test on GPU, and push to PyPI test_and_publish_artifact: + if: ${{ github.repository_owner == 'pytorch' }} # Use available instance types - https://github.com/pytorch/test-infra/blob/main/.github/scale-config.yml runs-on: ${{ matrix.host-machine.instance }} defaults: diff --git a/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml b/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml index ef9f78ed6d..7c3c699b9a 100644 --- a/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml +++ b/.github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml @@ -32,6 +32,7 @@ concurrency: jobs: # Build on CPU hosts and upload to GHA build_artifact: + if: ${{ github.repository_owner != 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 @@ -116,6 +117,7 @@ jobs: # Download the built artifact from GHA, test on GPU, and push to PyPI test_artifact: + if: ${{ github.repository_owner != 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 diff --git a/.github/workflows/fbgemm_gpu_ci_rocm.yml b/.github/workflows/fbgemm_gpu_ci_rocm.yml index 14e6c41cd9..2de77e4d1b 100644 --- a/.github/workflows/fbgemm_gpu_ci_rocm.yml +++ b/.github/workflows/fbgemm_gpu_ci_rocm.yml @@ -46,6 +46,7 @@ concurrency: jobs: # Build on CPU hosts and upload to GHA build_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: ${{ matrix.container-image }} @@ -125,6 +126,7 @@ jobs: # Download the built artifact from GHA, test on GPU, and push to PyPI test_and_publish_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: "rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}-complete" diff --git a/.github/workflows/fbgemm_gpu_docs.yml b/.github/workflows/fbgemm_gpu_docs.yml index 5654cae455..126269fac9 100644 --- a/.github/workflows/fbgemm_gpu_docs.yml +++ b/.github/workflows/fbgemm_gpu_docs.yml @@ -24,6 +24,7 @@ on: jobs: build-docs: + if: ${{ github.repository_owner == 'pytorch' }} permissions: # Grant write permission here so that the generated docs can be pushed to `gh-pages` branch contents: write diff --git a/.github/workflows/fbgemm_gpu_lint.yml b/.github/workflows/fbgemm_gpu_lint.yml index 3dccceacca..75e06355b5 100644 --- a/.github/workflows/fbgemm_gpu_lint.yml +++ b/.github/workflows/fbgemm_gpu_lint.yml @@ -35,7 +35,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.11" ] + python-version: [ "3.12" ] steps: - name: Checkout the Repository diff --git a/.github/workflows/fbgemm_gpu_pip.yml b/.github/workflows/fbgemm_gpu_pip.yml index 33125145fd..ae193db157 100644 --- a/.github/workflows/fbgemm_gpu_pip.yml +++ b/.github/workflows/fbgemm_gpu_pip.yml @@ -45,7 +45,7 @@ on: jobs: test_pypi_install_cpu: - if: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cpu') }} + if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cpu')) }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 @@ -104,7 +104,7 @@ jobs: test_pypi_install_cuda: - if: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cuda') }} + if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cuda') }} runs-on: ${{ matrix.host-machine.instance }} defaults: run: @@ -165,7 +165,7 @@ jobs: test_pypi_install_rocm: - if: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'rocm') }} + if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'rocm') }} runs-on: ${{ matrix.host-machine.instance }} container: image: "rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}-complete" diff --git a/.github/workflows/fbgemm_gpu_release_cpu.yml b/.github/workflows/fbgemm_gpu_release_cpu.yml index 51e4a9bec1..f7d211601b 100644 --- a/.github/workflows/fbgemm_gpu_release_cpu.yml +++ b/.github/workflows/fbgemm_gpu_release_cpu.yml @@ -44,6 +44,7 @@ concurrency: jobs: # Build on CPU hosts, run tests, and upload to GHA build_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 @@ -114,6 +115,7 @@ jobs: # Download the built artifact from GHA, test on GPU, and push to PyPI test_and_publish_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 diff --git a/.github/workflows/fbgemm_gpu_release_cuda.yml b/.github/workflows/fbgemm_gpu_release_cuda.yml index 9b242950e8..602664fe6d 100644 --- a/.github/workflows/fbgemm_gpu_release_cuda.yml +++ b/.github/workflows/fbgemm_gpu_release_cuda.yml @@ -50,6 +50,7 @@ concurrency: jobs: # Build on CPU hosts and upload to GHA build_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 @@ -126,6 +127,7 @@ jobs: # Download the built artifact from GHA, test on GPU, and push to PyPI test_and_publish_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} defaults: run: diff --git a/.github/workflows/fbgemm_gpu_release_genai.yml b/.github/workflows/fbgemm_gpu_release_genai.yml index 33fca1f640..fd27896037 100644 --- a/.github/workflows/fbgemm_gpu_release_genai.yml +++ b/.github/workflows/fbgemm_gpu_release_genai.yml @@ -50,6 +50,7 @@ concurrency: jobs: # Build on CPU hosts and upload to GHA build_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} container: image: amazonlinux:2023 @@ -126,6 +127,7 @@ jobs: # Download the built artifact from GHA, test on GPU, and push to PyPI test_and_publish_artifact: + if: ${{ github.repository_owner == 'pytorch' }} runs-on: ${{ matrix.host-machine.instance }} defaults: run: diff --git a/fbgemm_gpu/bench/quantize_ops_benchmark.py b/fbgemm_gpu/bench/quantize_ops_benchmark.py index 9ffbd99114..81eb07bead 100644 --- a/fbgemm_gpu/bench/quantize_ops_benchmark.py +++ b/fbgemm_gpu/bench/quantize_ops_benchmark.py @@ -469,7 +469,7 @@ def mixdim( ) # output is FP32 print( - f"Input tensor batch_size: {batch_size}, num_tables: {num_tables}, tensor_size: {input_data.numel() / (1 << 30)} GB, average table dimension: {sum(table_dims) * 1.0/num_tables}." + f"Input tensor batch_size: {batch_size}, num_tables: {num_tables}, tensor_size: {input_data.numel() / (1 << 30)} GB, average table dimension: {sum(table_dims) * 1.0 / num_tables}." ) print( f"Mixed dim dequantize average time per iter FP32: {average_time_mixed_dim_fp32} s, bandwidth : {input_data.numel() / (1 << 30) / average_time_mixed_dim_fp32} GB/s." diff --git a/fbgemm_gpu/bench/split_table_batched_embeddings_benchmark.py b/fbgemm_gpu/bench/split_table_batched_embeddings_benchmark.py index 177c79508d..207fa350b6 100644 --- a/fbgemm_gpu/bench/split_table_batched_embeddings_benchmark.py +++ b/fbgemm_gpu/bench/split_table_batched_embeddings_benchmark.py @@ -835,7 +835,7 @@ def cache( # noqa C901 param_size_multiplier = weights_precision.bit_rate() / 8.0 logging.info( f"Embedding tables: {E * T} rows, {nparams / 1.0e9: .2f} GParam, " - f"{nparams * param_size_multiplier / 1.0e9: .2f} GB" + f"{nparams * param_size_multiplier / 1.0e9: .2f} GB" ) logging.info( f"Accessed weights per batch: {B * T * L} rows, " @@ -889,11 +889,11 @@ def cache( # noqa C901 cache_misses.append((emb.lxu_cache_locations_list[0] == NOT_FOUND).sum().item()) emb.forward(indices.long(), offsets.long()) logging.info( - f"Exchanged cache lines -- mean: {sum(exchanged_cache_lines)/len(requests): .2f}, " + f"Exchanged cache lines -- mean: {sum(exchanged_cache_lines) / len(requests): .2f}, " f"max: {max(exchanged_cache_lines)}, min: {min(exchanged_cache_lines)}" ) logging.info( - f"Cache miss -- mean: {sum(cache_misses)/len(requests)}, " + f"Cache miss -- mean: {sum(cache_misses) / len(requests)}, " f"max: {max(cache_misses)}, min: {min(cache_misses)}" ) @@ -2386,24 +2386,24 @@ def nbit_cache( # noqa C901 input_indices.append(len(indices)) logging.info( - f"Exchanged cache lines -- mean: {sum(exchanged_cache_lines)/len(requests): .2f}, " + f"Exchanged cache lines -- mean: {sum(exchanged_cache_lines) / len(requests): .2f}, " f"max: {max(exchanged_cache_lines)}, min: {min(exchanged_cache_lines)}" ) logging.info( - f"Cache miss -- mean: {sum(cache_misses)/len(requests)}, " + f"Cache miss -- mean: {sum(cache_misses) / len(requests)}, " f"max: {max(cache_misses)}, min: {min(cache_misses)}" ) logging.info( - f"input_indices -- mean: {sum(input_indices)/len(requests)}, " + f"input_indices -- mean: {sum(input_indices) / len(requests)}, " f"max: {max(input_indices)}, min: {min(input_indices)}" ) logging.info( - f"unique_indices -- mean: {sum(unique_indices)/len(requests)}, " + f"unique_indices -- mean: {sum(unique_indices) / len(requests)}, " f"max: {max(unique_indices)}, min: {min(unique_indices)}" ) unique_miss_rate = [a / b for (a, b) in zip(exchanged_cache_lines, unique_indices)] logging.info( - f"unique_miss_rate -- mean: {sum(unique_miss_rate)/len(requests)}, " + f"unique_miss_rate -- mean: {sum(unique_miss_rate) / len(requests)}, " f"max: {max(unique_miss_rate)}, min: {min(unique_miss_rate)}" ) if record_cache_miss_counter or record_tablewise_cache_miss: diff --git a/fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py b/fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py index 25540c190c..a35fcdddf2 100644 --- a/fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py +++ b/fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py @@ -149,7 +149,7 @@ def benchmark_read_write( gibps_wr = byte_seconds_per_ns / (write_lat_ns * 2**30) gibps_tot = 2 * byte_seconds_per_ns / ((read_lat_ns + write_lat_ns) * 2**30) logging.info( - f"Total bytes: {total_bytes/1e9:0.2f} GB, " + f"Total bytes: {total_bytes / 1e9:0.2f} GB, " f"Read_us: {read_lat_ns / 1000:8.0f}, " f"Write_us: {write_lat_ns / 1000:8.0f}, " f"Total_us: {(read_lat_ns + write_lat_ns) / 1000:8.0f}, " @@ -389,7 +389,7 @@ def gen_split_tbe_generator( + param_size_multiplier * B * sum(Ds) * L ) - logging.info(f"Batch read write bytes: {read_write_bytes/1.0e9: .2f} GB") + logging.info(f"Batch read write bytes: {read_write_bytes / 1.0e9: .2f} GB") # Compute width of test name and bandwidth widths to improve report # readability diff --git a/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_inference.py b/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_inference.py index d988563aed..f1671d29d1 100644 --- a/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_inference.py +++ b/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops_inference.py @@ -531,10 +531,10 @@ def print_cache_miss_counter(self) -> None: f"Miss counter value [3] - # of total requested indices : {self.cache_miss_counter[3]}, " ) logging.info( - f"unique_miss_rate using counter : {self.cache_miss_counter[1]/self.cache_miss_counter[2]}, \n" + f"unique_miss_rate using counter : {self.cache_miss_counter[1] / self.cache_miss_counter[2]}, \n" ) logging.info( - f"total_miss_rate using counter : {self.cache_miss_counter[1]/self.cache_miss_counter[3]}, \n" + f"total_miss_rate using counter : {self.cache_miss_counter[1] / self.cache_miss_counter[3]}, \n" ) def get_uvm_cache_stats(self) -> Tensor: @@ -558,8 +558,8 @@ def print_uvm_cache_stats(self) -> None: ) if uvm_cache_stats[1]: logging.info( - f"unique indices / requested indices: {uvm_cache_stats[2]/uvm_cache_stats[1]}\n" - f"unique misses / requested indices: {uvm_cache_stats[3]/uvm_cache_stats[1]}\n" + f"unique indices / requested indices: {uvm_cache_stats[2] / uvm_cache_stats[1]}\n" + f"unique misses / requested indices: {uvm_cache_stats[3] / uvm_cache_stats[1]}\n" ) @torch.jit.export diff --git a/fbgemm_gpu/test/quantize/fp8_rowwise_test.py b/fbgemm_gpu/test/quantize/fp8_rowwise_test.py index a5989d9b19..b3cc06a139 100644 --- a/fbgemm_gpu/test/quantize/fp8_rowwise_test.py +++ b/fbgemm_gpu/test/quantize/fp8_rowwise_test.py @@ -225,9 +225,9 @@ def test_quantize_and_dequantize_op_padded_fp8_rowwise( logging.info(f"qref {torch.gather(qref, dim=1, index=idx)}") logging.info(f"dqcat {torch.gather(dqcat, dim=1, index=idx)}") logging.info( - f"relative error: max: {errors.abs().max()*100:.1f}%, " - f"median: {errors.abs().median()*100:.1f}%, " - f"mean: {errors.abs().mean()*100:.1f}%" + f"relative error: max: {errors.abs().max() * 100:.1f}%, " + f"median: {errors.abs().median() * 100:.1f}%, " + f"mean: {errors.abs().mean() * 100:.1f}%" ) torch.testing.assert_allclose(dqcat, qref, rtol=0.1, atol=0.05)