Skip to content

Commit

Permalink
Merge branch 'perplexity-test' of https://github.com/nod-ai/sharktank
Browse files Browse the repository at this point in the history
…into perplexity-test
  • Loading branch information
archana-ramalingam committed Oct 16, 2024
2 parents 96458a8 + e4ccb10 commit b4e3635
Show file tree
Hide file tree
Showing 51 changed files with 1,123 additions and 377 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci-tuner.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ jobs:

steps:
- name: Checkout code
uses: actions/[email protected]
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7

- name: Set up Python
uses: actions/setup-python@v4
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
with:
python-version: '3.10.12'

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile -r pytorch-cpu-requirements.txt
pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
-e "git+https://github.com/iree-org/iree-turbine.git#egg=shark-turbine"
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
pip install --no-compile -r requirements.txt -e sharktank/
- name: Run sharktank tests
Expand Down
11 changes: 4 additions & 7 deletions .github/workflows/ci_linux_x64-libshortfin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ jobs:
run: |
sudo apt update
sudo apt install clang lld cmake ninja-build
sudo apt install libspdlog-dev libxtensor-dev
- name: Checkout repository
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
Expand Down Expand Up @@ -89,9 +88,8 @@ jobs:
-DCMAKE_CXX_COMPILER=clang++-18 \
-DCMAKE_LINKER_TYPE=LLD \
-DSHORTFIN_BUNDLE_DEPS=ON \
-DSHORTFIN_IREE_SOURCE_DIR=${{ env.IREE_REPO_DIR }} \
-DSHORTFIN_BUILD_PYTHON_BINDINGS=ON \
..
-DSHORTFIN_IREE_SOURCE_DIR="${{ env.IREE_REPO_DIR }}" \
-DSHORTFIN_BUILD_PYTHON_BINDINGS=ON
cmake --build build --target all
pip install -v -e build/
Expand All @@ -113,10 +111,9 @@ jobs:
-DCMAKE_C_COMPILER=clang-18 \
-DCMAKE_CXX_COMPILER=clang++-18 \
-DCMAKE_LINKER_TYPE=LLD \
-DSHORTFIN_IREE_SOURCE_DIR=${{ env.IREE_REPO_DIR }} \
-DSHORTFIN_IREE_SOURCE_DIR="${{ env.IREE_REPO_DIR }}" \
-DSHORTFIN_BUILD_PYTHON_BINDINGS=ON \
-DSHORTFIN_HAVE_AMDGPU=OFF \
-DSHORTFIN_BUILD_STATIC=ON \
-DSHORTFIN_BUILD_DYNAMIC=ON \
..
-DSHORTFIN_BUILD_DYNAMIC=ON
cmake --build build-host-only --target all
5 changes: 2 additions & 3 deletions .github/workflows/ci_linux_x64_nogil-libshortfin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,8 @@ jobs:
-DCMAKE_CXX_COMPILER=clang++-18 \
-DCMAKE_LINKER_TYPE=LLD \
-DSHORTFIN_BUNDLE_DEPS=ON \
-DSHORTFIN_IREE_SOURCE_DIR=${{ env.IREE_REPO_DIR }} \
-DSHORTFIN_BUILD_PYTHON_BINDINGS=ON \
..
-DSHORTFIN_IREE_SOURCE_DIR="${{ env.IREE_REPO_DIR }}" \
-DSHORTFIN_BUILD_PYTHON_BINDINGS=ON
cmake --build build --target all
pip install -v -e build/
Expand Down
95 changes: 95 additions & 0 deletions .github/workflows/ci_windows_x64-libshortfin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright 2024 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

name: CI - shortfin - Windows

on:
workflow_dispatch:
pull_request:
push:
branches:
- main
paths:
- '.github/workflows/ci_windows_x64-libshortfin.yml'
- 'shortfin/**'

permissions:
contents: read

concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true

env:
IREE_REPO_DIR: ${{ github.workspace }}/iree
LIBSHORTFIN_DIR: ${{ github.workspace }}/shortfin/

jobs:
build-and-test:
name: Build and test
runs-on: windows-2022

steps:
- name: Configure MSVC
uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0

- name: Checkout repository
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
submodules: false

- name: Checkout IREE repo
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
with:
repository: iree-org/iree
path: ${{ env.IREE_REPO_DIR }}
submodules: false
ref: candidate-20240904.1006

- name: Initalize IREE submodules
working-directory: ${{ env.IREE_REPO_DIR }}
run : |
git submodule update --init --depth 1 -- third_party/benchmark
git submodule update --init --depth 1 -- third_party/cpuinfo/
git submodule update --init --depth 1 -- third_party/flatcc
git submodule update --init --depth 1 -- third_party/googletest
git submodule update --init --depth 1 -- third_party/hip-build-deps/
- name: Setup Python
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
with:
python-version: "3.12"
cache: "pip"
- name: Install Python packages
working-directory: ${{ env.LIBSHORTFIN_DIR }}
run: |
pip install -r requirements-tests.txt
pip install -r requirements-iree-compiler.txt
pip freeze
- name: Build shortfin (full)
working-directory: ${{ env.LIBSHORTFIN_DIR }}
shell: bash
run: |
mkdir build
cmake -GNinja \
-S. \
-Bbuild \
-DSHORTFIN_BUNDLE_DEPS=ON \
-DSHORTFIN_IREE_SOURCE_DIR="${{ env.IREE_REPO_DIR }}" \
-DSHORTFIN_BUILD_PYTHON_BINDINGS=ON
cmake --build build --target all
pip install -v -e build/
- name: Test shortfin (full)
working-directory: ${{ env.LIBSHORTFIN_DIR }}
run: |
ctest --timeout 30 --output-on-failure --test-dir build
pytest -s
4 changes: 2 additions & 2 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile -r pytorch-cpu-requirements.txt
pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
-e "git+https://github.com/iree-org/iree-turbine.git#egg=shark-turbine"
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
pip install --no-compile -r requirements.txt -e sharktank/ shortfin/
# Try with the latest nightly releases, not what iree-turbine pins.
Expand Down Expand Up @@ -85,7 +85,7 @@ jobs:
python -m pip install --no-compile --upgrade pip
pip install --no-compile -r pytorch-rocm-requirements.txt
pip install --no-compile -f https://iree.dev/pip-release-links.html --src deps \
-e "git+https://github.com/iree-org/iree-turbine.git#egg=shark-turbine"
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
pip install --no-compile -r requirements.txt -e sharktank/ shortfin/
- name: Run punet tests
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pip install -r pytorch-rocm-requirements.txt
```
# Clone and install editable iree-turbine dep in deps/
pip install -f https://iree.dev/pip-release-links.html --src deps \
-e "git+https://github.com/iree-org/iree-turbine.git#egg=shark-turbine"
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"
# Install editable local projects.
pip install -r requirements.txt -e sharktank/ shortfin/
Expand Down
2 changes: 1 addition & 1 deletion docs/model_cookbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ source .venv/bin/activate
# Install requirements.
pip install -r pytorch-cpu-requirements.txt
pip install -f https://iree.dev/pip-release-links.html --src deps \
-e "git+https://github.com/iree-org/iree-turbine.git#egg=shark-turbine"
-e "git+https://github.com/iree-org/iree-turbine.git#egg=iree-turbine"

# Install local projects.
pip install -r requirements.txt -e sharktank/ shortfin/
Expand Down
6 changes: 3 additions & 3 deletions docs/quantization.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,9 @@ is everything). We're just starting to exploit some of this as the PyTorch
level. Some examples:

* Something as simple as a humble runtime
[tensor trace/print](https://github.com/iree-org/iree-turbine/blob/main/shark_turbine/ops/iree.py#L52)
* [Simple linalg based template expansion](https://github.com/iree-org/iree-turbine/blob/main/shark_turbine/ops/_jinja_test_ops.py#L28)
(see backing example [jinja template](https://github.com/iree-org/iree-turbine/blob/main/shark_turbine/ops/templates/test_add_jinja.mlir)).
[tensor trace/print](https://github.com/iree-org/iree-turbine/blob/main/iree.turbine/ops/iree.py#L52)
* [Simple linalg based template expansion](https://github.com/iree-org/iree-turbine/blob/main/iree.turbine/ops/_jinja_test_ops.py#L28)
(see backing example [jinja template](https://github.com/iree-org/iree-turbine/blob/main/iree.turbine/ops/templates/test_add_jinja.mlir)).
* Optimal linalg-based [8-bit block scaled mmt for weight compression](https://github.com/nod-ai/sharktank/blob/main/sharktank/sharktank/kernels/mmt_block_scaled_q8.py)
(see backing [jinja template](https://github.com/nod-ai/sharktank/blob/main/sharktank/sharktank/kernels/templates/mmt_block_scaled_q8_3d.mlir)).
* DSL based [like this fused attention kernel](https://github.com/iree-org/iree-turbine/blob/main/tests/kernel/fused_attention_test.py#L20)
Expand Down
55 changes: 42 additions & 13 deletions sharktank/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

from pathlib import Path
import pytest
from typing import Optional
from pytest import FixtureRequest
from typing import Optional, Any


# Tests under each top-level directory will get a mark.
Expand Down Expand Up @@ -47,6 +48,15 @@ def pytest_addoption(parser):
default=None,
help="Exported model parameters. If not specified a temporary file will be used.",
)
parser.addoption(
"--prefix",
type=str,
default=None,
help=(
"Path prefix for test artifacts. "
"Other arguments may override this for specific values."
),
)
parser.addoption(
"--caching",
action="store_true",
Expand All @@ -55,21 +65,40 @@ def pytest_addoption(parser):
)


@pytest.fixture(scope="session")
def mlir_path(pytestconfig: pytest.Config) -> Optional[Path]:
return pytestconfig.getoption("mlir")
def set_fixture_from_cli_option(
request: FixtureRequest,
cli_option_name: str,
class_attribute_name: Optional[str] = None,
) -> Optional[Any]:
res = request.config.getoption(cli_option_name)
if request.cls is None:
return res
else:
if class_attribute_name is None:
class_attribute_name = cli_option_name
setattr(request.cls, class_attribute_name, res)


@pytest.fixture(scope="class")
def mlir_path(request: FixtureRequest) -> Optional[Path]:
return set_fixture_from_cli_option(request, "mlir", "mlir_path")


@pytest.fixture(scope="class")
def module_path(request: FixtureRequest) -> Optional[Path]:
return set_fixture_from_cli_option(request, "module", "module_path")


@pytest.fixture(scope="session")
def module_path(pytestconfig: pytest.Config) -> Optional[Path]:
return pytestconfig.getoption("module")
@pytest.fixture(scope="class")
def parameters_path(request: FixtureRequest) -> Optional[Path]:
return set_fixture_from_cli_option(request, "parameters", "parameters_path")


@pytest.fixture(scope="session")
def parameters_path(pytestconfig: pytest.Config) -> Optional[Path]:
return pytestconfig.getoption("parameters")
@pytest.fixture(scope="class")
def path_prefix(request: FixtureRequest) -> Optional[str]:
return set_fixture_from_cli_option(request, "prefix", "path_prefix")


@pytest.fixture(scope="session")
def caching(pytestconfig: pytest.Config) -> Optional[Path]:
return pytestconfig.getoption("caching")
@pytest.fixture(scope="class")
def caching(request: FixtureRequest) -> Optional[bool]:
return set_fixture_from_cli_option(request, "caching")
5 changes: 2 additions & 3 deletions sharktank/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,10 @@ def initialize_options(self):
name=f"sharktank",
version=f"{PACKAGE_VERSION}",
author="SHARK Authors",
author_email="[email protected]",
description="SHARK layers and inference models for genai",
long_description=README,
long_description_content_type="text/markdown",
url="https://github.com/nod-ai/sharktank",
url="https://github.com/nod-ai/SHARK-Platform",
license="Apache-2.0",
classifiers=[
"Development Status :: 3 - Alpha",
Expand All @@ -95,7 +94,7 @@ def initialize_options(self):
"sharktank": ["py.typed", "kernels/templates/*.mlir"],
},
install_requires=[
"shark-turbine",
"iree-turbine",
],
extras_require={
"testing": [
Expand Down
2 changes: 1 addition & 1 deletion sharktank/sharktank/examples/export_paged_llm_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import json
import torch

from shark_turbine.aot import *
from iree.turbine.aot import *

from sharktank.layers import *
from sharktank.types import *
Expand Down
2 changes: 1 addition & 1 deletion sharktank/sharktank/examples/sharding/export_ffn_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def main(raw_args=None):
ds = Dataset.load(args.output_irpa_file)

mdl = ShardedFFN(ds.root_theta)
from shark_turbine import aot
from iree.turbine import aot

example_arg = torch.empty(bs, sl, primary_dim, dtype=torch.float16)
ep = torch.export.export(mdl, (example_arg,))
Expand Down
2 changes: 1 addition & 1 deletion sharktank/sharktank/examples/sharding/export_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
from torch import Tensor
from sharktank import ops
from shark_turbine import aot
from iree.turbine import aot


def export_gemm(
Expand Down
2 changes: 1 addition & 1 deletion sharktank/sharktank/export_layer/export_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import torch
from shark_turbine.aot import *
from iree.turbine.aot import *
from sharktank.models.llama.testing import make_moe_block_theta, make_rand_torch
from sharktank.layers.mixture_of_experts_block import PreGatherMoeBlock
from ..utils import cli
Expand Down
2 changes: 1 addition & 1 deletion sharktank/sharktank/export_layer/export_paged_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

import torch.nn.functional as F

from shark_turbine.aot import *
from iree.turbine.aot import *

from sharktank.layers import *
from sharktank.types import *
Expand Down
6 changes: 3 additions & 3 deletions sharktank/sharktank/kernels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from jinja2 import Environment, PackageLoader, select_autoescape

from shark_turbine.support.ir_imports import (
from iree.turbine.support.ir_imports import (
FlatSymbolRefAttr,
FunctionType,
IrType,
Expand All @@ -24,15 +24,15 @@
Value,
)

from shark_turbine.runtime.op_reg import (
from iree.turbine.runtime.op_reg import (
def_library,
CustomOp,
KernelBuilder,
KernelSelection,
TensorArg,
)

from shark_turbine.transforms.merger import Merger
from iree.turbine.transforms.merger import Merger

from ..utils.logging import get_logger

Expand Down
Loading

0 comments on commit b4e3635

Please sign in to comment.