Skip to content

Commit

Permalink
[fbgemm_gpu] Gate OSS CI workflows
Browse files Browse the repository at this point in the history
- Add conditionals to prevent pytorch-infra-based OSS CI from running when repo is hosted in a non-pytorch organization
  • Loading branch information
q10 committed Sep 25, 2024
1 parent 85de260 commit 6b34091
Show file tree
Hide file tree
Showing 19 changed files with 45 additions and 24 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build_wheels_linux_aarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ permissions:

jobs:
generate-matrix:
if: ${{ github.repository_owner == 'pytorch' }}
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
with:
package-type: wheel
Expand All @@ -32,6 +33,7 @@ jobs:
test-infra-ref: main
with-cuda: disable
build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: generate-matrix
strategy:
fail-fast: false
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/build_wheels_linux_x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ permissions:

jobs:
generate-matrix:
if: ${{ github.repository_owner == 'pytorch' }}
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
with:
package-type: wheel
Expand All @@ -34,6 +35,7 @@ jobs:
with-rocm: enable
with-cpu: enable
build:
if: ${{ github.repository_owner == 'pytorch' }}
needs: generate-matrix
name: pytorch/FBGEMM
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ concurrency:

jobs:
build-linux:
runs-on: ${{ matrix.host-machine.instance }}
runs-on: ${{ github.repository_owner == 'pytorch' && matrix.host-machine.instance || 'ubuntu-latest' }}
container:
image: amazonlinux:2023
options: --user root
Expand Down Expand Up @@ -105,7 +105,7 @@ jobs:
build-bazel:
runs-on: linux.12xlarge
runs-on: ${{ github.repository_owner == 'pytorch' && matrix.host-machine.instance || 'ubuntu-latest' }}
container:
image: amazonlinux:2023
options: --user root
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fbgemm_gpu_ci_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ concurrency:
jobs:
# Build on CPU hosts, run tests, and upload to GHA
build_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down Expand Up @@ -118,6 +119,7 @@ jobs:

# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fbgemm_gpu_ci_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ concurrency:
jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down Expand Up @@ -127,6 +128,7 @@ jobs:

# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
# runs-on: linux.4xlarge.nvidia.gpu
# Use available instance types - https://github.com/pytorch/test-infra/blob/main/.github/scale-config.yml
runs-on: ${{ matrix.host-machine.instance }}
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fbgemm_gpu_ci_genai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ concurrency:
jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down Expand Up @@ -127,6 +128,7 @@ jobs:

# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
# Use available instance types - https://github.com/pytorch/test-infra/blob/main/.github/scale-config.yml
runs-on: ${{ matrix.host-machine.instance }}
defaults:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fbgemm_gpu_ci_genai_generic_infra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ concurrency:
jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
if: ${{ github.repository_owner != 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down Expand Up @@ -116,6 +117,7 @@ jobs:

# Download the built artifact from GHA, test on GPU, and push to PyPI
test_artifact:
if: ${{ github.repository_owner != 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fbgemm_gpu_ci_rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ concurrency:
jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: ${{ matrix.container-image }}
Expand Down Expand Up @@ -125,6 +126,7 @@ jobs:

# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: "rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}-complete"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/fbgemm_gpu_docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ on:

jobs:
build-docs:
if: ${{ github.repository_owner == 'pytorch' }}
permissions:
# Grant write permission here so that the generated docs can be pushed to `gh-pages` branch
contents: write
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/fbgemm_gpu_lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ "3.11" ]
python-version: [ "3.12" ]

steps:
- name: Checkout the Repository
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/fbgemm_gpu_pip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ on:

jobs:
test_pypi_install_cpu:
if: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cpu') }}
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cpu')) }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down Expand Up @@ -104,7 +104,7 @@ jobs:


test_pypi_install_cuda:
if: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cuda') }}
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'cuda') }}
runs-on: ${{ matrix.host-machine.instance }}
defaults:
run:
Expand Down Expand Up @@ -165,7 +165,7 @@ jobs:


test_pypi_install_rocm:
if: ${{ github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'rocm') }}
if: ${{ github.repository_owner == 'pytorch' && (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.fbgemm_gpu_variant_type == 'rocm') }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: "rocm/dev-ubuntu-20.04:${{ matrix.rocm-version }}-complete"
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fbgemm_gpu_release_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ concurrency:
jobs:
# Build on CPU hosts, run tests, and upload to GHA
build_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down Expand Up @@ -114,6 +115,7 @@ jobs:

# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fbgemm_gpu_release_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ concurrency:
jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down Expand Up @@ -126,6 +127,7 @@ jobs:

# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
defaults:
run:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/fbgemm_gpu_release_genai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ concurrency:
jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
Expand Down Expand Up @@ -126,6 +127,7 @@ jobs:

# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: ${{ matrix.host-machine.instance }}
defaults:
run:
Expand Down
2 changes: 1 addition & 1 deletion fbgemm_gpu/bench/quantize_ops_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ def mixdim(
) # output is FP32

print(
f"Input tensor batch_size: {batch_size}, num_tables: {num_tables}, tensor_size: {input_data.numel() / (1 << 30)} GB, average table dimension: {sum(table_dims) * 1.0/num_tables}."
f"Input tensor batch_size: {batch_size}, num_tables: {num_tables}, tensor_size: {input_data.numel() / (1 << 30)} GB, average table dimension: {sum(table_dims) * 1.0 / num_tables}."
)
print(
f"Mixed dim dequantize average time per iter FP32: {average_time_mixed_dim_fp32} s, bandwidth : {input_data.numel() / (1 << 30) / average_time_mixed_dim_fp32} GB/s."
Expand Down
16 changes: 8 additions & 8 deletions fbgemm_gpu/bench/split_table_batched_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ def cache( # noqa C901
param_size_multiplier = weights_precision.bit_rate() / 8.0
logging.info(
f"Embedding tables: {E * T} rows, {nparams / 1.0e9: .2f} GParam, "
f"{nparams * param_size_multiplier / 1.0e9: .2f} GB"
f"{nparams * param_size_multiplier / 1.0e9: .2f} GB"
)
logging.info(
f"Accessed weights per batch: {B * T * L} rows, "
Expand Down Expand Up @@ -889,11 +889,11 @@ def cache( # noqa C901
cache_misses.append((emb.lxu_cache_locations_list[0] == NOT_FOUND).sum().item())
emb.forward(indices.long(), offsets.long())
logging.info(
f"Exchanged cache lines -- mean: {sum(exchanged_cache_lines)/len(requests): .2f}, "
f"Exchanged cache lines -- mean: {sum(exchanged_cache_lines) / len(requests): .2f}, "
f"max: {max(exchanged_cache_lines)}, min: {min(exchanged_cache_lines)}"
)
logging.info(
f"Cache miss -- mean: {sum(cache_misses)/len(requests)}, "
f"Cache miss -- mean: {sum(cache_misses) / len(requests)}, "
f"max: {max(cache_misses)}, min: {min(cache_misses)}"
)

Expand Down Expand Up @@ -2386,24 +2386,24 @@ def nbit_cache( # noqa C901
input_indices.append(len(indices))

logging.info(
f"Exchanged cache lines -- mean: {sum(exchanged_cache_lines)/len(requests): .2f}, "
f"Exchanged cache lines -- mean: {sum(exchanged_cache_lines) / len(requests): .2f}, "
f"max: {max(exchanged_cache_lines)}, min: {min(exchanged_cache_lines)}"
)
logging.info(
f"Cache miss -- mean: {sum(cache_misses)/len(requests)}, "
f"Cache miss -- mean: {sum(cache_misses) / len(requests)}, "
f"max: {max(cache_misses)}, min: {min(cache_misses)}"
)
logging.info(
f"input_indices -- mean: {sum(input_indices)/len(requests)}, "
f"input_indices -- mean: {sum(input_indices) / len(requests)}, "
f"max: {max(input_indices)}, min: {min(input_indices)}"
)
logging.info(
f"unique_indices -- mean: {sum(unique_indices)/len(requests)}, "
f"unique_indices -- mean: {sum(unique_indices) / len(requests)}, "
f"max: {max(unique_indices)}, min: {min(unique_indices)}"
)
unique_miss_rate = [a / b for (a, b) in zip(exchanged_cache_lines, unique_indices)]
logging.info(
f"unique_miss_rate -- mean: {sum(unique_miss_rate)/len(requests)}, "
f"unique_miss_rate -- mean: {sum(unique_miss_rate) / len(requests)}, "
f"max: {max(unique_miss_rate)}, min: {min(unique_miss_rate)}"
)
if record_cache_miss_counter or record_tablewise_cache_miss:
Expand Down
4 changes: 2 additions & 2 deletions fbgemm_gpu/bench/ssd_table_batched_embeddings_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def benchmark_read_write(
gibps_wr = byte_seconds_per_ns / (write_lat_ns * 2**30)
gibps_tot = 2 * byte_seconds_per_ns / ((read_lat_ns + write_lat_ns) * 2**30)
logging.info(
f"Total bytes: {total_bytes/1e9:0.2f} GB, "
f"Total bytes: {total_bytes / 1e9:0.2f} GB, "
f"Read_us: {read_lat_ns / 1000:8.0f}, "
f"Write_us: {write_lat_ns / 1000:8.0f}, "
f"Total_us: {(read_lat_ns + write_lat_ns) / 1000:8.0f}, "
Expand Down Expand Up @@ -389,7 +389,7 @@ def gen_split_tbe_generator(
+ param_size_multiplier * B * sum(Ds) * L
)

logging.info(f"Batch read write bytes: {read_write_bytes/1.0e9: .2f} GB")
logging.info(f"Batch read write bytes: {read_write_bytes / 1.0e9: .2f} GB")

# Compute width of test name and bandwidth widths to improve report
# readability
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -531,10 +531,10 @@ def print_cache_miss_counter(self) -> None:
f"Miss counter value [3] - # of total requested indices : {self.cache_miss_counter[3]}, "
)
logging.info(
f"unique_miss_rate using counter : {self.cache_miss_counter[1]/self.cache_miss_counter[2]}, \n"
f"unique_miss_rate using counter : {self.cache_miss_counter[1] / self.cache_miss_counter[2]}, \n"
)
logging.info(
f"total_miss_rate using counter : {self.cache_miss_counter[1]/self.cache_miss_counter[3]}, \n"
f"total_miss_rate using counter : {self.cache_miss_counter[1] / self.cache_miss_counter[3]}, \n"
)

def get_uvm_cache_stats(self) -> Tensor:
Expand All @@ -558,8 +558,8 @@ def print_uvm_cache_stats(self) -> None:
)
if uvm_cache_stats[1]:
logging.info(
f"unique indices / requested indices: {uvm_cache_stats[2]/uvm_cache_stats[1]}\n"
f"unique misses / requested indices: {uvm_cache_stats[3]/uvm_cache_stats[1]}\n"
f"unique indices / requested indices: {uvm_cache_stats[2] / uvm_cache_stats[1]}\n"
f"unique misses / requested indices: {uvm_cache_stats[3] / uvm_cache_stats[1]}\n"
)

@torch.jit.export
Expand Down
6 changes: 3 additions & 3 deletions fbgemm_gpu/test/quantize/fp8_rowwise_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,9 @@ def test_quantize_and_dequantize_op_padded_fp8_rowwise(
logging.info(f"qref {torch.gather(qref, dim=1, index=idx)}")
logging.info(f"dqcat {torch.gather(dqcat, dim=1, index=idx)}")
logging.info(
f"relative error: max: {errors.abs().max()*100:.1f}%, "
f"median: {errors.abs().median()*100:.1f}%, "
f"mean: {errors.abs().mean()*100:.1f}%"
f"relative error: max: {errors.abs().max() * 100:.1f}%, "
f"median: {errors.abs().median() * 100:.1f}%, "
f"mean: {errors.abs().mean() * 100:.1f}%"
)

torch.testing.assert_allclose(dqcat, qref, rtol=0.1, atol=0.05)
Expand Down

0 comments on commit 6b34091

Please sign in to comment.